From 646c5478c04297485e3e045cd8969d2ae7642004 Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Mon, 26 Sep 2016 11:08:21 +0300 Subject: record/replay: add network support This patch adds support of recording and replaying network packets in irount rr mode. Record and replay for network interactions is performed with the network filter. Each backend must have its own instance of the replay filter as follows: -netdev user,id=net1 -device rtl8139,netdev=net1 -object filter-replay,id=replay,netdev=net1 Replay network filter is used to record and replay network packets. While recording the virtual machine this filter puts all packets coming from the outer world into the log. In replay mode packets from the log are injected into the network device. All interactions with network backend in replay mode are disabled. v5 changes: - using iov_to_buf function instead of loop Signed-off-by: Pavel Dovgalyuk Signed-off-by: Jason Wang --- docs/replay.txt | 14 +++++++ include/sysemu/replay.h | 12 ++++++ net/Makefile.objs | 1 + net/filter-replay.c | 92 ++++++++++++++++++++++++++++++++++++++++++ replay/Makefile.objs | 1 + replay/replay-events.c | 11 +++++ replay/replay-internal.h | 10 +++++ replay/replay-net.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++ replay/replay.c | 2 +- vl.c | 3 +- 10 files changed, 246 insertions(+), 2 deletions(-) create mode 100644 net/filter-replay.c create mode 100644 replay/replay-net.c diff --git a/docs/replay.txt b/docs/replay.txt index 779c6c0..347b2ff 100644 --- a/docs/replay.txt +++ b/docs/replay.txt @@ -195,3 +195,17 @@ Queue is flushed at checkpoints and information about processed requests is recorded to the log. In replay phase the queue is matched with events read from the log. Therefore block devices requests are processed deterministically. + +Network devices +--------------- + +Record and replay for network interactions is performed with the network filter. +Each backend must have its own instance of the replay filter as follows: + -netdev user,id=net1 -device rtl8139,netdev=net1 + -object filter-replay,id=replay,netdev=net1 + +Replay network filter is used to record and replay network packets. While +recording the virtual machine this filter puts all packets coming from +the outer world into the log. In replay mode packets from the log are +injected into the network device. All interactions with network backend +in replay mode are disabled. diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h index f80d6d2..abb35ca 100644 --- a/include/sysemu/replay.h +++ b/include/sysemu/replay.h @@ -39,6 +39,8 @@ enum ReplayCheckpoint { }; typedef enum ReplayCheckpoint ReplayCheckpoint; +typedef struct ReplayNetState ReplayNetState; + extern ReplayMode replay_mode; /* Replay process control functions */ @@ -137,4 +139,14 @@ void replay_char_read_all_save_error(int res); /*! Writes character read_all execution result into the replay log. */ void replay_char_read_all_save_buf(uint8_t *buf, int offset); +/* Network */ + +/*! Registers replay network filter attached to some backend. */ +ReplayNetState *replay_register_net(NetFilterState *nfs); +/*! Unregisters replay network filter. */ +void replay_unregister_net(ReplayNetState *rns); +/*! Called to write network packet to the replay log. */ +void replay_net_packet_event(ReplayNetState *rns, unsigned flags, + const struct iovec *iov, int iovcnt); + #endif diff --git a/net/Makefile.objs b/net/Makefile.objs index 2a80df5..2e2fd43 100644 --- a/net/Makefile.objs +++ b/net/Makefile.objs @@ -19,3 +19,4 @@ common-obj-y += filter-mirror.o common-obj-y += colo-compare.o common-obj-y += colo.o common-obj-y += filter-rewriter.o +common-obj-y += filter-replay.o diff --git a/net/filter-replay.c b/net/filter-replay.c new file mode 100644 index 0000000..cff65f8 --- /dev/null +++ b/net/filter-replay.c @@ -0,0 +1,92 @@ +/* + * filter-replay.c + * + * Copyright (c) 2010-2016 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "clients.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "qemu/iov.h" +#include "qemu/log.h" +#include "qemu/timer.h" +#include "qapi/visitor.h" +#include "net/filter.h" +#include "sysemu/replay.h" + +#define TYPE_FILTER_REPLAY "filter-replay" + +#define FILTER_REPLAY(obj) \ + OBJECT_CHECK(NetFilterReplayState, (obj), TYPE_FILTER_REPLAY) + +struct NetFilterReplayState { + NetFilterState nfs; + ReplayNetState *rns; +}; +typedef struct NetFilterReplayState NetFilterReplayState; + +static ssize_t filter_replay_receive_iov(NetFilterState *nf, + NetClientState *sndr, + unsigned flags, + const struct iovec *iov, + int iovcnt, NetPacketSent *sent_cb) +{ + NetFilterReplayState *nfrs = FILTER_REPLAY(nf); + switch (replay_mode) { + case REPLAY_MODE_RECORD: + if (nf->netdev == sndr) { + replay_net_packet_event(nfrs->rns, flags, iov, iovcnt); + return iov_size(iov, iovcnt); + } + return 0; + case REPLAY_MODE_PLAY: + /* Drop all packets in replay mode. + Packets from the log will be injected by the replay module. */ + return iov_size(iov, iovcnt); + default: + /* Pass all the packets. */ + return 0; + } +} + +static void filter_replay_instance_init(Object *obj) +{ + NetFilterReplayState *nfrs = FILTER_REPLAY(obj); + nfrs->rns = replay_register_net(&nfrs->nfs); +} + +static void filter_replay_instance_finalize(Object *obj) +{ + NetFilterReplayState *nfrs = FILTER_REPLAY(obj); + replay_unregister_net(nfrs->rns); +} + +static void filter_replay_class_init(ObjectClass *oc, void *data) +{ + NetFilterClass *nfc = NETFILTER_CLASS(oc); + + nfc->receive_iov = filter_replay_receive_iov; +} + +static const TypeInfo filter_replay_info = { + .name = TYPE_FILTER_REPLAY, + .parent = TYPE_NETFILTER, + .class_init = filter_replay_class_init, + .instance_init = filter_replay_instance_init, + .instance_finalize = filter_replay_instance_finalize, + .instance_size = sizeof(NetFilterReplayState), +}; + +static void filter_replay_register_types(void) +{ + type_register_static(&filter_replay_info); +} + +type_init(filter_replay_register_types); diff --git a/replay/Makefile.objs b/replay/Makefile.objs index c8ad3eb..b2afd40 100644 --- a/replay/Makefile.objs +++ b/replay/Makefile.objs @@ -5,3 +5,4 @@ common-obj-y += replay-time.o common-obj-y += replay-input.o common-obj-y += replay-char.o common-obj-y += replay-snapshot.o +common-obj-y += replay-net.o diff --git a/replay/replay-events.c b/replay/replay-events.c index c513913..94a6dcc 100644 --- a/replay/replay-events.c +++ b/replay/replay-events.c @@ -54,6 +54,9 @@ static void replay_run_event(Event *event) case REPLAY_ASYNC_EVENT_BLOCK: aio_bh_call(event->opaque); break; + case REPLAY_ASYNC_EVENT_NET: + replay_event_net_run(event->opaque); + break; default: error_report("Replay: invalid async event ID (%d) in the queue", event->event_kind); @@ -189,6 +192,9 @@ static void replay_save_event(Event *event, int checkpoint) case REPLAY_ASYNC_EVENT_BLOCK: replay_put_qword(event->id); break; + case REPLAY_ASYNC_EVENT_NET: + replay_event_net_save(event->opaque); + break; default: error_report("Unknown ID %" PRId64 " of replay event", event->id); exit(1); @@ -252,6 +258,11 @@ static Event *replay_read_event(int checkpoint) read_id = replay_get_qword(); } break; + case REPLAY_ASYNC_EVENT_NET: + event = g_malloc0(sizeof(Event)); + event->event_kind = read_event_kind; + event->opaque = replay_event_net_load(); + return event; default: error_report("Unknown ID %d of replay event", read_event_kind); exit(1); diff --git a/replay/replay-internal.h b/replay/replay-internal.h index 9117e44..c26d079 100644 --- a/replay/replay-internal.h +++ b/replay/replay-internal.h @@ -50,6 +50,7 @@ enum ReplayAsyncEventKind { REPLAY_ASYNC_EVENT_INPUT_SYNC, REPLAY_ASYNC_EVENT_CHAR_READ, REPLAY_ASYNC_EVENT_BLOCK, + REPLAY_ASYNC_EVENT_NET, REPLAY_ASYNC_COUNT }; @@ -161,6 +162,15 @@ void replay_event_char_read_save(void *opaque); /*! Reads char event read from the file. */ void *replay_event_char_read_load(void); +/* Network devices */ + +/*! Called to run network event. */ +void replay_event_net_run(void *opaque); +/*! Writes network event to the file. */ +void replay_event_net_save(void *opaque); +/*! Reads network from the file. */ +void *replay_event_net_load(void); + /* VMState-related functions */ /* Registers replay VMState. diff --git a/replay/replay-net.c b/replay/replay-net.c new file mode 100644 index 0000000..80b7054 --- /dev/null +++ b/replay/replay-net.c @@ -0,0 +1,102 @@ +/* + * replay-net.c + * + * Copyright (c) 2010-2016 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "sysemu/replay.h" +#include "replay-internal.h" +#include "sysemu/sysemu.h" +#include "net/net.h" +#include "net/filter.h" +#include "qemu/iov.h" + +struct ReplayNetState { + NetFilterState *nfs; + int id; +}; + +typedef struct NetEvent { + uint8_t id; + uint32_t flags; + uint8_t *data; + size_t size; +} NetEvent; + +static NetFilterState **network_filters; +static int network_filters_count; + +ReplayNetState *replay_register_net(NetFilterState *nfs) +{ + ReplayNetState *rns = g_new0(ReplayNetState, 1); + rns->nfs = nfs; + rns->id = network_filters_count++; + network_filters = g_realloc(network_filters, + network_filters_count + * sizeof(*network_filters)); + network_filters[network_filters_count - 1] = nfs; + return rns; +} + +void replay_unregister_net(ReplayNetState *rns) +{ + network_filters[rns->id] = NULL; + g_free(rns); +} + +void replay_net_packet_event(ReplayNetState *rns, unsigned flags, + const struct iovec *iov, int iovcnt) +{ + NetEvent *event = g_new(NetEvent, 1); + event->flags = flags; + event->data = g_malloc(iov_size(iov, iovcnt)); + event->size = iov_size(iov, iovcnt); + event->id = rns->id; + iov_to_buf(iov, iovcnt, 0, event->data, event->size); + + replay_add_event(REPLAY_ASYNC_EVENT_NET, event, NULL, 0); +} + +void replay_event_net_run(void *opaque) +{ + NetEvent *event = opaque; + struct iovec iov = { + .iov_base = (void *)event->data, + .iov_len = event->size + }; + + assert(event->id < network_filters_count); + + qemu_netfilter_pass_to_next(network_filters[event->id]->netdev, + event->flags, &iov, 1, network_filters[event->id]); + + g_free(event->data); + g_free(event); +} + +void replay_event_net_save(void *opaque) +{ + NetEvent *event = opaque; + + replay_put_byte(event->id); + replay_put_dword(event->flags); + replay_put_array(event->data, event->size); +} + +void *replay_event_net_load(void) +{ + NetEvent *event = g_new(NetEvent, 1); + + event->id = replay_get_byte(); + event->flags = replay_get_dword(); + replay_get_array_alloc(&event->data, &event->size); + + return event; +} diff --git a/replay/replay.c b/replay/replay.c index c797aea..7f27cf1 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -21,7 +21,7 @@ /* Current version of the replay mechanism. Increase it when file format changes. */ -#define REPLAY_VERSION 0xe02004 +#define REPLAY_VERSION 0xe02005 /* Size of replay log header */ #define HEADER_SIZE (sizeof(uint32_t) + sizeof(uint64_t)) diff --git a/vl.c b/vl.c index d77dd86..a23de18 100644 --- a/vl.c +++ b/vl.c @@ -2859,7 +2859,8 @@ static bool object_create_initial(const char *type) g_str_equal(type, "filter-mirror") || g_str_equal(type, "filter-redirector") || g_str_equal(type, "colo-compare") || - g_str_equal(type, "filter-rewriter")) { + g_str_equal(type, "filter-rewriter") || + g_str_equal(type, "filter-replay")) { return false; } -- cgit v1.1 From 30a3e70167d0b8db9ea057f270cf18d93c153ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= Date: Tue, 13 Dec 2016 20:44:41 +0100 Subject: rtl8139: correctly handle PHY reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to datasheet: "[Bit 15 of Basic Mode Control Register] sets the status and control registers of the PHY (register 0062-0074) in a default state. This bit is self-clearing. 1 = software reset; 0 = normal operation." This fixes the netcard detection failure in Minoca OS. Signed-off-by: Hervé Poussineau Signed-off-by: Jason Wang --- hw/net/rtl8139.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index f05e59c..671c7e4 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -1205,6 +1205,20 @@ static void rtl8139_reset_rxring(RTL8139State *s, uint32_t bufferSize) s->RxBufAddr = 0; } +static void rtl8139_reset_phy(RTL8139State *s) +{ + s->BasicModeStatus = 0x7809; + s->BasicModeStatus |= 0x0020; /* autonegotiation completed */ + /* preserve link state */ + s->BasicModeStatus |= qemu_get_queue(s->nic)->link_down ? 0 : 0x04; + + s->NWayAdvert = 0x05e1; /* all modes, full duplex */ + s->NWayLPAR = 0x05e1; /* all modes, full duplex */ + s->NWayExpansion = 0x0001; /* autonegotiation supported */ + + s->CSCR = CSCR_F_LINK_100 | CSCR_HEART_BIT | CSCR_LD; +} + static void rtl8139_reset(DeviceState *d) { RTL8139State *s = RTL8139(d); @@ -1256,25 +1270,14 @@ static void rtl8139_reset(DeviceState *d) s->Config3 = 0x1; /* fast back-to-back compatible */ s->Config5 = 0x0; - s->CSCR = CSCR_F_LINK_100 | CSCR_HEART_BIT | CSCR_LD; - s->CpCmd = 0x0; /* reset C+ mode */ s->cplus_enabled = 0; - // s->BasicModeCtrl = 0x3100; // 100Mbps, full duplex, autonegotiation // s->BasicModeCtrl = 0x2100; // 100Mbps, full duplex s->BasicModeCtrl = 0x1000; // autonegotiation - s->BasicModeStatus = 0x7809; - //s->BasicModeStatus |= 0x0040; /* UTP medium */ - s->BasicModeStatus |= 0x0020; /* autonegotiation completed */ - /* preserve link state */ - s->BasicModeStatus |= qemu_get_queue(s->nic)->link_down ? 0 : 0x04; - - s->NWayAdvert = 0x05e1; /* all modes, full duplex */ - s->NWayLPAR = 0x05e1; /* all modes, full duplex */ - s->NWayExpansion = 0x0001; /* autonegotiation supported */ + rtl8139_reset_phy(s); /* also reset timer and disable timer interrupt */ s->TCTR = 0; @@ -1469,7 +1472,7 @@ static void rtl8139_BasicModeCtrl_write(RTL8139State *s, uint32_t val) DPRINTF("BasicModeCtrl register write(w) val=0x%04x\n", val); /* mask unwritable bits */ - uint32_t mask = 0x4cff; + uint32_t mask = 0xccff; if (1 || !rtl8139_config_writable(s)) { @@ -1479,6 +1482,11 @@ static void rtl8139_BasicModeCtrl_write(RTL8139State *s, uint32_t val) mask |= 0x0100; } + if (val & 0x8000) { + /* Reset PHY */ + rtl8139_reset_phy(s); + } + val = SET_MASKED(val, mask, s->BasicModeCtrl); s->BasicModeCtrl = val; -- cgit v1.1 From 7e354ed4df9787ff95cdb189374739e476be4f9a Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 5 Jan 2017 12:26:36 -0800 Subject: fsl_etsec: Fix Tx BD ring wrapping handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current code that handles Tx buffer desciprtor ring scanning employs the following algorithm: 1. Restore current buffer descriptor pointer from TBPTRn 2. Process current descriptor 3. If current descriptor has BD_WRAP flag set set current descriptor pointer to start of the descriptor ring 4. If current descriptor points to start of the ring exit the loop, otherwise increment current descriptor pointer and go to #2 5. Store current descriptor in TBPTRn The way the code is implemented results in buffer descriptor ring being scanned starting at offset/descriptor #0. While covering 99% of the cases, this algorithm becomes problematic for a number of edge cases. Consider the following scenario: guest OS driver initializes descriptor ring to N individual descriptors and starts sending data out. Depending on the volume of traffic and probably guest OS driver implementation it is possible that an edge case where a packet, spread across 2 descriptors is placed in descriptors N - 1 and 0 in that order(it is easy to imagine similar examples involving more than 2 descriptors). What happens then is aforementioned algorithm starts at descriptor 0, sees a descriptor marked as BD_LAST, which it happily sends out as a separate packet(very much malformed at this point) then the iteration continues and the first part of the original packet is tacked to the next transmission which ends up being bogus as well. This behvaiour can be pretty reliably observed when scp'ing data from a guest OS via TAP interface for files larger than 160K (every time for 700K+). This patch changes the scanning algorithm to do the following: 1. Restore "current" buffer descriptor pointer from TBPTRn 2. If "current" descriptor does not have BD_TX_READY set, goto #6 3. Process current descriptor 4. If "current" descriptor has BD_WRAP flag set "current" descriptor pointer to start of the descriptor ring otherwise set increment "current" by the size of one descriptor 5. Goto #1 6. Save "current" buffer descriptor in TBPTRn This way we preserve the information about which descriptor was processed last and always start where we left off avoiding the original problem. On top of that, judging by the following excerpt from MPC8548ERM (p. 14-48): "... When the end of the TxBD ring is reached, eTSEC initializes TBPTRn to the value in the corresponding TBASEn. The TBPTR register is internally written by the eTSEC’s DMA controller during transmission. The pointer increments by eight (bytes) each time a descriptor is closed successfully by the eTSEC..." revised algorithm might also a more correct way of emulating this aspect of eTSEC peripheral. Cc: Alexander Graf Cc: Scott Wood Cc: Jason Wang Cc: qemu-devel@nongnu.org Signed-off-by: Andrey Smirnov Signed-off-by: Jason Wang --- hw/net/fsl_etsec/rings.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c index 54c0127..d0f93ee 100644 --- a/hw/net/fsl_etsec/rings.c +++ b/hw/net/fsl_etsec/rings.c @@ -358,25 +358,24 @@ void etsec_walk_tx_ring(eTSEC *etsec, int ring_nbr) /* Save flags before BD update */ bd_flags = bd.flags; - if (bd_flags & BD_TX_READY) { - process_tx_bd(etsec, &bd); - - /* Write back BD after update */ - write_buffer_descriptor(etsec, bd_addr, &bd); + if (!(bd_flags & BD_TX_READY)) { + break; } + process_tx_bd(etsec, &bd); + /* Write back BD after update */ + write_buffer_descriptor(etsec, bd_addr, &bd); + /* Wrap or next BD */ if (bd_flags & BD_WRAP) { bd_addr = ring_base; } else { bd_addr += sizeof(eTSEC_rxtx_bd); } + } while (TRUE); - } while (bd_addr != ring_base); - - bd_addr = ring_base; - - /* Save the Buffer Descriptor Pointers to current bd */ + /* Save the Buffer Descriptor Pointers to last bd that was not + * succesfully closed */ etsec->regs[TBPTR0 + ring_nbr].value = bd_addr; /* Set transmit halt THLTx */ -- cgit v1.1