From 89382c3de5bc2250b1dad1c42c1f73d5ec6febda Mon Sep 17 00:00:00 2001 From: Knut Omang Date: Mon, 7 Aug 2017 12:58:40 +0200 Subject: sockets: factor out a new try_bind() function A refactoring step to prepare for the problem exposed by the test-listen test in the previous commit. Simplify and reorganize the IPv6 specific extra measures and move it out of the for loop to increase code readability. No semantic changes. Signed-off-by: Knut Omang Reviewed-by: Daniel P. Berrange Signed-off-by: Daniel P. Berrange --- util/qemu-sockets.c | 69 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index d149383..211ebad 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -149,6 +149,44 @@ int inet_ai_family_from_address(InetSocketAddress *addr, return PF_UNSPEC; } +static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e) +{ +#ifndef IPV6_V6ONLY + return bind(socket, e->ai_addr, e->ai_addrlen); +#else + /* + * Deals with first & last cases in matrix in comment + * for inet_ai_family_from_address(). + */ + int v6only = + ((!saddr->has_ipv4 && !saddr->has_ipv6) || + (saddr->has_ipv4 && saddr->ipv4 && + saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1; + int stat; + + rebind: + if (e->ai_family == PF_INET6) { + qemu_setsockopt(socket, IPPROTO_IPV6, IPV6_V6ONLY, &v6only, + sizeof(v6only)); + } + + stat = bind(socket, e->ai_addr, e->ai_addrlen); + if (!stat) { + return 0; + } + + /* If we got EADDRINUSE from an IPv6 bind & v6only is unset, + * it could be that the IPv4 port is already claimed, so retry + * with v6only set + */ + if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) { + v6only = 1; + goto rebind; + } + return stat; +#endif +} + static int inet_listen_saddr(InetSocketAddress *saddr, int port_offset, bool update_addr, @@ -228,39 +266,10 @@ static int inet_listen_saddr(InetSocketAddress *saddr, port_min = inet_getport(e); port_max = saddr->has_to ? saddr->to + port_offset : port_min; for (p = port_min; p <= port_max; p++) { -#ifdef IPV6_V6ONLY - /* - * Deals with first & last cases in matrix in comment - * for inet_ai_family_from_address(). - */ - int v6only = - ((!saddr->has_ipv4 && !saddr->has_ipv6) || - (saddr->has_ipv4 && saddr->ipv4 && - saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1; -#endif inet_setport(e, p); -#ifdef IPV6_V6ONLY - rebind: - if (e->ai_family == PF_INET6) { - qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY, &v6only, - sizeof(v6only)); - } -#endif - if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) { + if (try_bind(slisten, saddr, e) >= 0) { goto listen; } - -#ifdef IPV6_V6ONLY - /* If we got EADDRINUSE from an IPv6 bind & V6ONLY is unset, - * it could be that the IPv4 port is already claimed, so retry - * with V6ONLY set - */ - if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) { - v6only = 1; - goto rebind; - } -#endif - if (p == port_max) { if (!e->ai_next) { error_setg_errno(errp, errno, "Failed to bind socket"); -- cgit v1.1 From 39f80521df1e7f1252960d1ada2bd1a41d4d2cd3 Mon Sep 17 00:00:00 2001 From: Knut Omang Date: Mon, 7 Aug 2017 12:58:41 +0200 Subject: sockets: factor out create_fast_reuse_socket Another refactoring step to prepare for fixing the problem exposed with the test-listen test in the previous commit Signed-off-by: Knut Omang Reviewed-by: Daniel P. Berrange Signed-off-by: Daniel P. Berrange --- util/qemu-sockets.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 211ebad..65f6dcd 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -149,6 +149,16 @@ int inet_ai_family_from_address(InetSocketAddress *addr, return PF_UNSPEC; } +static int create_fast_reuse_socket(struct addrinfo *e) +{ + int slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol); + if (slisten < 0) { + return -1; + } + socket_set_fast_reuse(slisten); + return slisten; +} + static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e) { #ifndef IPV6_V6ONLY @@ -253,7 +263,8 @@ static int inet_listen_saddr(InetSocketAddress *saddr, getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen, uaddr,INET6_ADDRSTRLEN,uport,32, NI_NUMERICHOST | NI_NUMERICSERV); - slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol); + + slisten = create_fast_reuse_socket(e); if (slisten < 0) { if (!e->ai_next) { error_setg_errno(errp, errno, "Failed to create socket"); @@ -261,8 +272,6 @@ static int inet_listen_saddr(InetSocketAddress *saddr, continue; } - socket_set_fast_reuse(slisten); - port_min = inet_getport(e); port_max = saddr->has_to ? saddr->to + port_offset : port_min; for (p = port_min; p <= port_max; p++) { -- cgit v1.1 From 9cf961bba74d433db76a110917ac70aecc2ebcc4 Mon Sep 17 00:00:00 2001 From: Knut Omang Date: Mon, 7 Aug 2017 12:58:42 +0200 Subject: sockets: Handle race condition between binds to the same port If an offset of ports is specified to the inet_listen_saddr function(), and two or more processes tries to bind from these ports at the same time, occasionally more than one process may be able to bind to the same port. The condition is detected by listen() but too late to avoid a failure. This function is called by socket_listen() and used by all socket listening code in QEMU, so all cases where any form of dynamic port selection is used should be subject to this issue. Add code to close and re-establish the socket when this condition is observed, hiding the race condition from the user. Also clean up some issues with error handling to allow more accurate reporting of the cause of an error. This has been developed and tested by means of the test-listen unit test in the previous commit. Enable the test for make check now that it passes. Reviewed-by: Bhavesh Davda Reviewed-by: Yuval Shaia Reviewed-by: Girish Moodalbail Signed-off-by: Knut Omang Reviewed-by: Daniel P. Berrange Signed-off-by: Daniel P. Berrange --- util/qemu-sockets.c | 58 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 65f6dcd..b47fb45 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -206,7 +206,10 @@ static int inet_listen_saddr(InetSocketAddress *saddr, char port[33]; char uaddr[INET6_ADDRSTRLEN+1]; char uport[33]; - int slisten, rc, port_min, port_max, p; + int rc, port_min, port_max, p; + int slisten = 0; + int saved_errno = 0; + bool socket_created = false; Error *err = NULL; memset(&ai,0, sizeof(ai)); @@ -258,7 +261,7 @@ static int inet_listen_saddr(InetSocketAddress *saddr, return -1; } - /* create socket + bind */ + /* create socket + bind/listen */ for (e = res; e != NULL; e = e->ai_next) { getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen, uaddr,INET6_ADDRSTRLEN,uport,32, @@ -266,37 +269,58 @@ static int inet_listen_saddr(InetSocketAddress *saddr, slisten = create_fast_reuse_socket(e); if (slisten < 0) { - if (!e->ai_next) { - error_setg_errno(errp, errno, "Failed to create socket"); - } continue; } + socket_created = true; port_min = inet_getport(e); port_max = saddr->has_to ? saddr->to + port_offset : port_min; for (p = port_min; p <= port_max; p++) { inet_setport(e, p); - if (try_bind(slisten, saddr, e) >= 0) { - goto listen; - } - if (p == port_max) { - if (!e->ai_next) { + rc = try_bind(slisten, saddr, e); + if (rc) { + if (errno == EADDRINUSE) { + continue; + } else { error_setg_errno(errp, errno, "Failed to bind socket"); + goto listen_failed; } } + if (!listen(slisten, 1)) { + goto listen_ok; + } + if (errno != EADDRINUSE) { + error_setg_errno(errp, errno, "Failed to listen on socket"); + goto listen_failed; + } + /* Someone else managed to bind to the same port and beat us + * to listen on it! Socket semantics does not allow us to + * recover from this situation, so we need to recreate the + * socket to allow bind attempts for subsequent ports: + */ + closesocket(slisten); + slisten = create_fast_reuse_socket(e); + if (slisten < 0) { + error_setg_errno(errp, errno, + "Failed to recreate failed listening socket"); + goto listen_failed; + } } + } + error_setg_errno(errp, errno, + socket_created ? + "Failed to find an available port" : + "Failed to create a socket"); +listen_failed: + saved_errno = errno; + if (slisten >= 0) { closesocket(slisten); } freeaddrinfo(res); + errno = saved_errno; return -1; -listen: - if (listen(slisten,1) != 0) { - error_setg_errno(errp, errno, "Failed to listen on socket"); - closesocket(slisten); - freeaddrinfo(res); - return -1; - } +listen_ok: if (update_addr) { g_free(saddr->host); saddr->host = g_strdup(uaddr); -- cgit v1.1 From a7b20a8efa28e5f22c26c06cd06c2f12bc863493 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 9 Oct 2017 14:43:42 +0100 Subject: io: monitor encoutput buffer size from websocket GSource The websocket GSource is monitoring the size of the rawoutput buffer to determine if the channel can accepts more writes. The rawoutput buffer, however, is merely a temporary staging buffer before data is copied into the encoutput buffer. Thus its size will always be zero when the GSource runs. This flaw causes the encoutput buffer to grow without bound if the other end of the underlying data channel doesn't read data being sent. This can be seen with VNC if a client is on a slow WAN link and the guest OS is sending many screen updates. A malicious VNC client can act like it is on a slow link by playing a video in the guest and then reading data very slowly, causing QEMU host memory to expand arbitrarily. This issue is assigned CVE-2017-15268, publically reported in https://bugs.launchpad.net/qemu/+bug/1718964 Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange --- io/channel-websock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io/channel-websock.c b/io/channel-websock.c index d1d471f..04bcc05 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -28,7 +28,7 @@ #include -/* Max amount to allow in rawinput/rawoutput buffers */ +/* Max amount to allow in rawinput/encoutput buffers */ #define QIO_CHANNEL_WEBSOCK_MAX_BUFFER 8192 #define QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN 24 @@ -1208,7 +1208,7 @@ qio_channel_websock_source_check(GSource *source) if (wsource->wioc->rawinput.offset || wsource->wioc->io_eof) { cond |= G_IO_IN; } - if (wsource->wioc->rawoutput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER) { + if (wsource->wioc->encoutput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER) { cond |= G_IO_OUT; } -- cgit v1.1 From 57b0cdf152b7266e68bfa3e84635d4bdb64ef2cd Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 9 Oct 2017 15:34:06 +0100 Subject: io: simplify websocket ping reply handling We must ensure we don't get flooded with ping replies if the outbound channel is slow. Currently we do this by keeping the ping reply in a separate temporary buffer and only writing it if the encoutput buffer is completely empty. This is overly pessimistic, as it is reasonable to add a ping reply to the encoutput buffer even if it has previous data in it, as long as that previous data doesn't include a ping reply. To track this better, put the ping reply directly into the encoutput buffer, and then record the size of encoutput at this time in pong_remain. As we write encoutput to the underlying channel, we can decrement the pong_remain counter. Once it hits zero, we can accept further ping replies for transmission. Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange --- include/io/channel-websock.h | 2 +- io/channel-websock.c | 28 +++++++++++++++------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/io/channel-websock.h b/include/io/channel-websock.h index ff32d86..3762707 100644 --- a/include/io/channel-websock.h +++ b/include/io/channel-websock.h @@ -60,8 +60,8 @@ struct QIOChannelWebsock { Buffer encoutput; Buffer rawinput; Buffer rawoutput; - Buffer ping_reply; size_t payload_remain; + size_t pong_remain; QIOChannelWebsockMask mask; guint io_tag; Error *io_err; diff --git a/io/channel-websock.c b/io/channel-websock.c index 04bcc05..6083f74 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -825,11 +825,14 @@ static int qio_channel_websock_decode_payload(QIOChannelWebsock *ioc, } return -1; } else if (ioc->opcode == QIO_CHANNEL_WEBSOCK_OPCODE_PING) { - /* ping frames produce an immediate reply */ - buffer_reset(&ioc->ping_reply); - qio_channel_websock_encode_buffer( - ioc, &ioc->ping_reply, QIO_CHANNEL_WEBSOCK_OPCODE_PONG, - &ioc->encinput); + /* ping frames produce an immediate reply, as long as we've not still + * got a previous pong queued, in which case we drop the new pong */ + if (ioc->pong_remain == 0) { + qio_channel_websock_encode_buffer( + ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_PONG, + &ioc->encinput); + ioc->pong_remain = ioc->encoutput.offset; + } } /* pong frames are ignored */ if (payload_len) { @@ -888,7 +891,6 @@ static void qio_channel_websock_finalize(Object *obj) buffer_free(&ioc->encoutput); buffer_free(&ioc->rawinput); buffer_free(&ioc->rawoutput); - buffer_free(&ioc->ping_reply); object_unref(OBJECT(ioc->master)); if (ioc->io_tag) { g_source_remove(ioc->io_tag); @@ -946,12 +948,7 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *ioc, ssize_t ret; ssize_t done = 0; - /* ping replies take priority over binary data */ - if (!ioc->ping_reply.offset) { - qio_channel_websock_encode(ioc); - } else if (!ioc->encoutput.offset) { - buffer_move_empty(&ioc->encoutput, &ioc->ping_reply); - } + qio_channel_websock_encode(ioc); while (ioc->encoutput.offset > 0) { ret = qio_channel_write(ioc->master, @@ -968,6 +965,11 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *ioc, } buffer_advance(&ioc->encoutput, ret); done += ret; + if (ioc->pong_remain < ret) { + ioc->pong_remain = 0; + } else { + ioc->pong_remain -= ret; + } } return done; } @@ -1026,7 +1028,7 @@ static void qio_channel_websock_set_watch(QIOChannelWebsock *ioc) return; } - if (ioc->encoutput.offset || ioc->ping_reply.offset) { + if (ioc->encoutput.offset) { cond |= G_IO_OUT; } if (ioc->encinput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER && -- cgit v1.1 From bac6c95415788c03590542eb244c723a18d0771c Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 9 Oct 2017 14:39:17 +0100 Subject: io: get rid of qio_channel_websock_encode helper method The qio_channel_websock_encode method is only used in one place, everything else calls qio_channel_websock_encode_buffer directly. It can also be pushed up a level into the qio_channel_websock_writev method, since every other caller of qio_channel_websock_write_wire has already filled encoutput. Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange --- io/channel-websock.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/io/channel-websock.c b/io/channel-websock.c index 6083f74..700f5be 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -616,18 +616,6 @@ static void qio_channel_websock_encode_buffer(QIOChannelWebsock *ioc, } -static void qio_channel_websock_encode(QIOChannelWebsock *ioc) -{ - if (!ioc->rawoutput.offset) { - return; - } - qio_channel_websock_encode_buffer( - ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME, - &ioc->rawoutput); - buffer_reset(&ioc->rawoutput); -} - - static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *, Error **); @@ -948,8 +936,6 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *ioc, ssize_t ret; ssize_t done = 0; - qio_channel_websock_encode(ioc); - while (ioc->encoutput.offset > 0) { ret = qio_channel_write(ioc->master, (char *)ioc->encoutput.buffer, @@ -1134,6 +1120,12 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, } done: + if (ioc->rawoutput.offset) { + qio_channel_websock_encode_buffer( + ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME, + &ioc->rawoutput); + buffer_reset(&ioc->rawoutput); + } ret = qio_channel_websock_write_wire(wioc, errp); if (ret < 0 && ret != QIO_CHANNEL_ERR_BLOCK) { -- cgit v1.1 From fb74e5903914b9ec8c80b6f7a35da000f9f92ae7 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 9 Oct 2017 16:33:20 +0100 Subject: io: pass a struct iovec into qio_channel_websock_encode Instead of requiring use of another Buffer, pass a struct iovec into qio_channel_websock_encode, which gives callers more flexibility in how they process data. Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange --- io/channel-websock.c | 71 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/io/channel-websock.c b/io/channel-websock.c index 700f5be..93c06f5 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -582,37 +582,48 @@ static gboolean qio_channel_websock_handshake_io(QIOChannel *ioc, } -static void qio_channel_websock_encode_buffer(QIOChannelWebsock *ioc, - Buffer *output, - uint8_t opcode, Buffer *buffer) +static void qio_channel_websock_encode(QIOChannelWebsock *ioc, + uint8_t opcode, + const struct iovec *iov, + size_t niov, + size_t size) { size_t header_size; + size_t i; union { char buf[QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT]; QIOChannelWebsockHeader ws; } header; + assert(size <= iov_size(iov, niov)); + header.ws.b0 = QIO_CHANNEL_WEBSOCK_HEADER_FIELD_FIN | (opcode & QIO_CHANNEL_WEBSOCK_HEADER_FIELD_OPCODE); - if (buffer->offset < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_7_BIT) { - header.ws.b1 = (uint8_t)buffer->offset; + if (size < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_7_BIT) { + header.ws.b1 = (uint8_t)size; header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_7_BIT; - } else if (buffer->offset < - QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_16_BIT) { + } else if (size < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_16_BIT) { header.ws.b1 = QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_16_BIT; - header.ws.u.s16.l16 = cpu_to_be16((uint16_t)buffer->offset); + header.ws.u.s16.l16 = cpu_to_be16((uint16_t)size); header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_16_BIT; } else { header.ws.b1 = QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_64_BIT; - header.ws.u.s64.l64 = cpu_to_be64(buffer->offset); + header.ws.u.s64.l64 = cpu_to_be64(size); header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT; } header_size -= QIO_CHANNEL_WEBSOCK_HEADER_LEN_MASK; - trace_qio_channel_websock_encode(ioc, opcode, header_size, buffer->offset); - buffer_reserve(output, header_size + buffer->offset); - buffer_append(output, header.buf, header_size); - buffer_append(output, buffer->buffer, buffer->offset); + trace_qio_channel_websock_encode(ioc, opcode, header_size, size); + buffer_reserve(&ioc->encoutput, header_size + size); + buffer_append(&ioc->encoutput, header.buf, header_size); + for (i = 0; i < niov && size != 0; i++) { + size_t want = iov[i].iov_len; + if (want > size) { + want = size; + } + buffer_append(&ioc->encoutput, iov[i].iov_base, want); + size -= want; + } } @@ -622,6 +633,7 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *, Error **); static void qio_channel_websock_write_close(QIOChannelWebsock *ioc, uint16_t code, const char *reason) { + struct iovec iov; buffer_reserve(&ioc->rawoutput, 2 + (reason ? strlen(reason) : 0)); *(uint16_t *)(ioc->rawoutput.buffer + ioc->rawoutput.offset) = cpu_to_be16(code); @@ -629,9 +641,10 @@ static void qio_channel_websock_write_close(QIOChannelWebsock *ioc, if (reason) { buffer_append(&ioc->rawoutput, reason, strlen(reason)); } - qio_channel_websock_encode_buffer( - ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE, - &ioc->rawoutput); + iov.iov_base = ioc->rawoutput.buffer; + iov.iov_len = ioc->rawoutput.offset; + qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE, + &iov, 1, iov.iov_len); buffer_reset(&ioc->rawoutput); qio_channel_websock_write_wire(ioc, NULL); qio_channel_shutdown(ioc->master, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); @@ -801,9 +814,10 @@ static int qio_channel_websock_decode_payload(QIOChannelWebsock *ioc, error_setg(errp, "websocket closed by peer"); if (payload_len) { /* echo client status */ - qio_channel_websock_encode_buffer( - ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE, - &ioc->encinput); + struct iovec iov = { .iov_base = ioc->encinput.buffer, + .iov_len = ioc->encinput.offset }; + qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE, + &iov, 1, iov.iov_len); qio_channel_websock_write_wire(ioc, NULL); qio_channel_shutdown(ioc->master, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); } else { @@ -816,9 +830,10 @@ static int qio_channel_websock_decode_payload(QIOChannelWebsock *ioc, /* ping frames produce an immediate reply, as long as we've not still * got a previous pong queued, in which case we drop the new pong */ if (ioc->pong_remain == 0) { - qio_channel_websock_encode_buffer( - ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_PONG, - &ioc->encinput); + struct iovec iov = { .iov_base = ioc->encinput.buffer, + .iov_len = ioc->encinput.offset }; + qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_PONG, + &iov, 1, iov.iov_len); ioc->pong_remain = ioc->encoutput.offset; } } /* pong frames are ignored */ @@ -1120,11 +1135,13 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, } done: - if (ioc->rawoutput.offset) { - qio_channel_websock_encode_buffer( - ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME, - &ioc->rawoutput); - buffer_reset(&ioc->rawoutput); + if (wioc->rawoutput.offset) { + struct iovec iov = { .iov_base = wioc->rawoutput.buffer, + .iov_len = wioc->rawoutput.offset }; + qio_channel_websock_encode(wioc, + QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME, + &iov, 1, iov.iov_len); + buffer_reset(&wioc->rawoutput); } ret = qio_channel_websock_write_wire(wioc, errp); if (ret < 0 && -- cgit v1.1 From 8dfd5f96515ca20c4eb109cb0ee28e2bb32fc505 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 9 Oct 2017 16:54:07 +0100 Subject: io: get rid of bounce buffering in websock write path Currently most outbound I/O on the websock channel gets copied into the rawoutput buffer, and then immediately copied again into the encoutput buffer, with a header prepended. Now that qio_channel_websock_encode accepts a struct iovec, we can trivially remove this bounce buffering and write directly to encoutput. In doing so, we also now correctly validate the encoutput size against the QIO_CHANNEL_WEBSOCK_MAX_BUFFER limit. Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange --- include/io/channel-websock.h | 1 - io/channel-websock.c | 64 +++++++++++++++++++------------------------- 2 files changed, 28 insertions(+), 37 deletions(-) diff --git a/include/io/channel-websock.h b/include/io/channel-websock.h index 3762707..a7e5e92 100644 --- a/include/io/channel-websock.h +++ b/include/io/channel-websock.h @@ -59,7 +59,6 @@ struct QIOChannelWebsock { Buffer encinput; Buffer encoutput; Buffer rawinput; - Buffer rawoutput; size_t payload_remain; size_t pong_remain; QIOChannelWebsockMask mask; diff --git a/io/channel-websock.c b/io/channel-websock.c index 93c06f5..e82b1be 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -24,6 +24,7 @@ #include "io/channel-websock.h" #include "crypto/hash.h" #include "trace.h" +#include "qemu/iov.h" #include @@ -633,19 +634,22 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *, Error **); static void qio_channel_websock_write_close(QIOChannelWebsock *ioc, uint16_t code, const char *reason) { - struct iovec iov; - buffer_reserve(&ioc->rawoutput, 2 + (reason ? strlen(reason) : 0)); - *(uint16_t *)(ioc->rawoutput.buffer + ioc->rawoutput.offset) = - cpu_to_be16(code); - ioc->rawoutput.offset += 2; + struct iovec iov[2] = { + { .iov_base = &code, .iov_len = sizeof(code) }, + }; + size_t niov = 1; + size_t size = iov[0].iov_len; + + cpu_to_be16s(&code); + if (reason) { - buffer_append(&ioc->rawoutput, reason, strlen(reason)); + iov[1].iov_base = (void *)reason; + iov[1].iov_len = strlen(reason); + size += iov[1].iov_len; + niov++; } - iov.iov_base = ioc->rawoutput.buffer; - iov.iov_len = ioc->rawoutput.offset; qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE, - &iov, 1, iov.iov_len); - buffer_reset(&ioc->rawoutput); + iov, niov, size); qio_channel_websock_write_wire(ioc, NULL); qio_channel_shutdown(ioc->master, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); } @@ -893,7 +897,6 @@ static void qio_channel_websock_finalize(Object *obj) buffer_free(&ioc->encinput); buffer_free(&ioc->encoutput); buffer_free(&ioc->rawinput); - buffer_free(&ioc->rawoutput); object_unref(OBJECT(ioc->master)); if (ioc->io_tag) { g_source_remove(ioc->io_tag); @@ -1103,8 +1106,8 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, Error **errp) { QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); - size_t i; - ssize_t done = 0; + ssize_t want = iov_size(iov, niov); + ssize_t avail; ssize_t ret; if (wioc->io_err) { @@ -1117,32 +1120,21 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, return -1; } - for (i = 0; i < niov; i++) { - size_t want = iov[i].iov_len; - if ((want + wioc->rawoutput.offset) > QIO_CHANNEL_WEBSOCK_MAX_BUFFER) { - want = (QIO_CHANNEL_WEBSOCK_MAX_BUFFER - wioc->rawoutput.offset); - } - if (want == 0) { - goto done; - } - - buffer_reserve(&wioc->rawoutput, want); - buffer_append(&wioc->rawoutput, iov[i].iov_base, want); - done += want; - if (want < iov[i].iov_len) { - break; - } + avail = wioc->encoutput.offset >= QIO_CHANNEL_WEBSOCK_MAX_BUFFER ? + 0 : (QIO_CHANNEL_WEBSOCK_MAX_BUFFER - wioc->encoutput.offset); + if (want > avail) { + want = avail; } - done: - if (wioc->rawoutput.offset) { - struct iovec iov = { .iov_base = wioc->rawoutput.buffer, - .iov_len = wioc->rawoutput.offset }; + if (want) { qio_channel_websock_encode(wioc, QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME, - &iov, 1, iov.iov_len); - buffer_reset(&wioc->rawoutput); + iov, niov, want); } + + /* Even if want == 0, we'll try write_wire in case there's + * pending data we could usefully flush out + */ ret = qio_channel_websock_write_wire(wioc, errp); if (ret < 0 && ret != QIO_CHANNEL_ERR_BLOCK) { @@ -1152,11 +1144,11 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, qio_channel_websock_set_watch(wioc); - if (done == 0) { + if (want == 0) { return QIO_CHANNEL_ERR_BLOCK; } - return done; + return want; } static int qio_channel_websock_set_blocking(QIOChannel *ioc, -- cgit v1.1 From 6d5d23b00709510d55711661c7ca41408fd9934e Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 9 Oct 2017 17:52:28 +0100 Subject: io: cope with websock 'Connection' header having multiple values The noVNC server sends a header "Connection: keep-alive, Upgrade" which fails our simple equality test. Split the header on ',', trim whitespace and then check for 'upgrade' token. Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange --- io/channel-websock.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/io/channel-websock.c b/io/channel-websock.c index e82b1be..0354845 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -374,6 +374,9 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc, size_t nhdrs = G_N_ELEMENTS(hdrs); const char *protocols = NULL, *version = NULL, *key = NULL, *host = NULL, *connection = NULL, *upgrade = NULL; + char **connectionv; + bool upgraded = false; + size_t i; nhdrs = qio_channel_websock_extract_headers(ioc, buffer, hdrs, nhdrs, errp); if (!nhdrs) { @@ -440,7 +443,16 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc, goto bad_request; } - if (strcasecmp(connection, QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE) != 0) { + connectionv = g_strsplit(connection, ",", 0); + for (i = 0; connectionv != NULL && connectionv[i] != NULL; i++) { + g_strstrip(connectionv[i]); + if (strcasecmp(connectionv[i], + QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE) == 0) { + upgraded = true; + } + } + g_strfreev(connectionv); + if (!upgraded) { error_setg(errp, "No connection upgrade requested '%s'", connection); goto bad_request; } -- cgit v1.1 From 0efd6c9ec19a1ea6c413424fbea54e1dfe471026 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 10 Oct 2017 13:28:03 +0100 Subject: io: add trace points for websocket HTTP protocol headers Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange --- io/channel-websock.c | 4 ++++ io/trace-events | 2 ++ 2 files changed, 6 insertions(+) diff --git a/io/channel-websock.c b/io/channel-websock.c index 0354845..aa35ef3 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -224,6 +224,7 @@ qio_channel_websock_extract_headers(QIOChannelWebsock *ioc, goto bad_request; } *nl = '\0'; + trace_qio_channel_websock_http_greeting(ioc, buffer); tmp = strchr(buffer, ' '); if (!tmp) { @@ -425,6 +426,9 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc, goto bad_request; } + trace_qio_channel_websock_http_request(ioc, protocols, version, + host, connection, upgrade, key); + if (!g_strrstr(protocols, QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY)) { error_setg(errp, "No '%s' protocol is supported by client '%s'", QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY, protocols); diff --git a/io/trace-events b/io/trace-events index 801b5dc..f70bad7 100644 --- a/io/trace-events +++ b/io/trace-events @@ -48,6 +48,8 @@ qio_channel_websock_handshake_pending(void *ioc, int status) "Websock handshake qio_channel_websock_handshake_reply(void *ioc) "Websock handshake reply ioc=%p" qio_channel_websock_handshake_fail(void *ioc, const char *msg) "Websock handshake fail ioc=%p err=%s" qio_channel_websock_handshake_complete(void *ioc) "Websock handshake complete ioc=%p" +qio_channel_websock_http_greeting(void *ioc, const char *greeting) "Websocket HTTP request ioc=%p greeting='%s'" +qio_channel_websock_http_request(void *ioc, const char *protocols, const char *version, const char *host, const char *connection, const char *upgrade, const char *key) "Websocket HTTP request ioc=%p protocols='%s' version='%s' host='%s' connection='%s' upgrade='%s' key='%s'" qio_channel_websock_header_partial_decode(void *ioc, size_t payloadlen, unsigned char fin, unsigned char opcode, unsigned char has_mask) "Websocket header decoded ioc=%p payload-len=%zu fin=0x%x opcode=0x%x has_mask=0x%x" qio_channel_websock_header_full_decode(void *ioc, size_t headerlen, size_t payloadlen, uint32_t mask) "Websocket header decoded ioc=%p header-len=%zu payload-len=%zu mask=0x%x" qio_channel_websock_payload_decode(void *ioc, uint8_t opcode, size_t payload_remain) "Websocket header decoded ioc=%p opcode=0x%x payload-remain=%zu" -- cgit v1.1 From 7fc3fcefe2fc5966c6aa1ef4f10e9740d8d73bf2 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 11 Oct 2017 16:38:10 +0100 Subject: io: fix mem leak in websock error path Coverity pointed out the 'date' is not free()d in the error path Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange --- io/channel-websock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io/channel-websock.c b/io/channel-websock.c index aa35ef3..df2c3a9 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -341,7 +341,7 @@ static void qio_channel_websock_handshake_send_res_ok(QIOChannelWebsock *ioc, char combined_key[QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN + QIO_CHANNEL_WEBSOCK_GUID_LEN + 1]; char *accept = NULL; - char *date = qio_channel_websock_date_str(); + char *date = NULL; g_strlcpy(combined_key, key, QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN + 1); g_strlcat(combined_key, QIO_CHANNEL_WEBSOCK_GUID, @@ -360,6 +360,7 @@ static void qio_channel_websock_handshake_send_res_ok(QIOChannelWebsock *ioc, return; } + date = qio_channel_websock_date_str(); qio_channel_websock_handshake_send_res( ioc, QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_OK, date, accept); -- cgit v1.1