From aa4f082f7526d39dac8e2ca64d192d858014ee10 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Sat, 3 Aug 2013 22:20:41 -0400 Subject: tap: Use numbered tap/tun devices on all *BSD OS's The following patch simplifies the *BSD tap/tun code and makes use of numbered tap/tun interfaces on all *BSD OS's. NetBSD has a patch in their pkgsrc tree to make use of this feature and DragonFly also supports this as well. Signed-off-by: Brad Smith Signed-off-by: Stefan Hajnoczi --- net/tap-bsd.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/net/tap-bsd.c b/net/tap-bsd.c index f61d580..90f8a02 100644 --- a/net/tap-bsd.c +++ b/net/tap-bsd.c @@ -44,8 +44,6 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, struct stat s; #endif -#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ - defined(__OpenBSD__) || defined(__APPLE__) /* if no ifname is given, always start the search from tap0/tun0. */ int i; char dname[100]; @@ -76,15 +74,6 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, dname, strerror(errno)); return -1; } -#else - TFR(fd = open("/dev/tap", O_RDWR)); - if (fd < 0) { - fprintf(stderr, - "warning: could not open /dev/tap: no virtual network emulation: %s\n", - strerror(errno)); - return -1; - } -#endif #ifdef TAPGIFNAME if (ioctl(fd, TAPGIFNAME, (void *)&ifr) < 0) { -- cgit v1.1 From 067404be626d03656788adb7deff8072ca84299f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 2 Aug 2013 21:47:08 +0200 Subject: net: Rename send_queue to incoming_queue Each networking client has a queue for packets that could not yet be delivered to that client. Calling this queue "send_queue" is highly confusing as it has nothing to to with packets send from this client but to it. Avoid this confusing by renaming it to "incoming_queue". Signed-off-by: Jan Kiszka Signed-off-by: Stefan Hajnoczi --- include/net/net.h | 2 +- net/hub.c | 2 +- net/net.c | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/net/net.h b/include/net/net.h index 30e4b04..11e1468 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -69,7 +69,7 @@ struct NetClientState { int link_down; QTAILQ_ENTRY(NetClientState) next; NetClientState *peer; - NetQueue *send_queue; + NetQueue *incoming_queue; char *model; char *name; char info_str[256]; diff --git a/net/hub.c b/net/hub.c index df32074..33a99c9 100644 --- a/net/hub.c +++ b/net/hub.c @@ -347,7 +347,7 @@ bool net_hub_flush(NetClientState *nc) QLIST_FOREACH(port, &source_port->hub->ports, next) { if (port != source_port) { - ret += qemu_net_queue_flush(port->nc.send_queue); + ret += qemu_net_queue_flush(port->nc.incoming_queue); } } return ret ? true : false; diff --git a/net/net.c b/net/net.c index 1148592..c330c9a 100644 --- a/net/net.c +++ b/net/net.c @@ -207,7 +207,7 @@ static void qemu_net_client_setup(NetClientState *nc, } QTAILQ_INSERT_TAIL(&net_clients, nc, next); - nc->send_queue = qemu_new_net_queue(nc); + nc->incoming_queue = qemu_new_net_queue(nc); nc->destructor = destructor; } @@ -289,8 +289,8 @@ static void qemu_cleanup_net_client(NetClientState *nc) static void qemu_free_net_client(NetClientState *nc) { - if (nc->send_queue) { - qemu_del_net_queue(nc->send_queue); + if (nc->incoming_queue) { + qemu_del_net_queue(nc->incoming_queue); } if (nc->peer) { nc->peer->peer = NULL; @@ -431,7 +431,7 @@ void qemu_purge_queued_packets(NetClientState *nc) return; } - qemu_net_queue_purge(nc->peer->send_queue, nc); + qemu_net_queue_purge(nc->peer->incoming_queue, nc); } void qemu_flush_queued_packets(NetClientState *nc) @@ -444,7 +444,7 @@ void qemu_flush_queued_packets(NetClientState *nc) } return; } - if (qemu_net_queue_flush(nc->send_queue)) { + if (qemu_net_queue_flush(nc->incoming_queue)) { /* We emptied the queue successfully, signal to the IO thread to repoll * the file descriptor (for tap, for example). */ @@ -468,7 +468,7 @@ static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender, return size; } - queue = sender->peer->send_queue; + queue = sender->peer->incoming_queue; return qemu_net_queue_send(queue, sender, flags, buf, size, sent_cb); } @@ -543,7 +543,7 @@ ssize_t qemu_sendv_packet_async(NetClientState *sender, return iov_size(iov, iovcnt); } - queue = sender->peer->send_queue; + queue = sender->peer->incoming_queue; return qemu_net_queue_send_iov(queue, sender, QEMU_NET_PACKET_FLAG_NONE, -- cgit v1.1 From e9845f0985f088dd01790f4821026df0afba5795 Mon Sep 17 00:00:00 2001 From: Vincenzo Maffione Date: Fri, 2 Aug 2013 18:30:52 +0200 Subject: e1000: add interrupt mitigation support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch partially implements the e1000 interrupt mitigation mechanisms. Using a single QEMUTimer, it emulates the ITR register (which is the newer mitigation register, recommended by Intel) and approximately emulates RADV and TADV registers. TIDV and RDTR register functionalities are not emulated (RDTR is only used to validate RADV, according to the e1000 specs). RADV, TADV, TIDV and RDTR registers make up the older e1000 mitigation mechanism and would need a timer each to be completely emulated. However, a single timer has been used in order to reach a good compromise between emulation accuracy and simplicity/efficiency. The implemented mechanism can be enabled/disabled specifying the command line e1000-specific boolean parameter "mitigation", e.g. qemu-system-x86_64 -device e1000,mitigation=on,... ... For more information, see the Software developer's manual at http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf. Interrupt mitigation boosts performance when the guest suffers from an high interrupt rate (i.e. receiving short UDP packets at high packet rate). For some numerical results see the following link http://info.iet.unipi.it/~luigi/papers/20130520-rizzo-vm.pdf Signed-off-by: Vincenzo Maffione Reviewed-by: Andreas Färber (for pc-* machines) Signed-off-by: Stefan Hajnoczi --- hw/i386/pc_piix.c | 16 ++++++- hw/i386/pc_q35.c | 15 +++++- hw/net/e1000.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++++-- include/hw/i386/pc.h | 8 ++++ 4 files changed, 164 insertions(+), 6 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 275e395..147d08c 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -339,14 +339,25 @@ static void pc_xen_hvm_init(QEMUMachineInitArgs *args) .desc = "Standard PC (i440FX + PIIX, 1996)", \ .hot_add_cpu = pc_hot_add_cpu +#define PC_I440FX_1_7_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS +static QEMUMachine pc_i440fx_machine_v1_7 = { + PC_I440FX_1_7_MACHINE_OPTIONS, + .name = "pc-i440fx-1.7", + .alias = "pc", + .init = pc_init_pci, + .is_default = 1, +}; + #define PC_I440FX_1_6_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS static QEMUMachine pc_i440fx_machine_v1_6 = { PC_I440FX_1_6_MACHINE_OPTIONS, .name = "pc-i440fx-1.6", - .alias = "pc", .init = pc_init_pci_1_6, - .is_default = 1, + .compat_props = (GlobalProperty[]) { + PC_COMPAT_1_6, + { /* end of list */ } + }, }; static QEMUMachine pc_i440fx_machine_v1_5 = { @@ -735,6 +746,7 @@ static QEMUMachine xenfv_machine = { static void pc_machine_init(void) { + qemu_register_machine(&pc_i440fx_machine_v1_7); qemu_register_machine(&pc_i440fx_machine_v1_6); qemu_register_machine(&pc_i440fx_machine_v1_5); qemu_register_machine(&pc_i440fx_machine_v1_4); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index d7b7c3b..b4b5155 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -258,13 +258,25 @@ static void pc_q35_init_1_4(QEMUMachineInitArgs *args) .desc = "Standard PC (Q35 + ICH9, 2009)", \ .hot_add_cpu = pc_hot_add_cpu +#define PC_Q35_1_7_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS + +static QEMUMachine pc_q35_machine_v1_7 = { + PC_Q35_1_7_MACHINE_OPTIONS, + .name = "pc-q35-1.7", + .alias = "q35", + .init = pc_q35_init, +}; + #define PC_Q35_1_6_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS static QEMUMachine pc_q35_machine_v1_6 = { PC_Q35_1_6_MACHINE_OPTIONS, .name = "pc-q35-1.6", - .alias = "q35", .init = pc_q35_init_1_6, + .compat_props = (GlobalProperty[]) { + PC_COMPAT_1_6, + { /* end of list */ } + }, }; static QEMUMachine pc_q35_machine_v1_5 = { @@ -293,6 +305,7 @@ static QEMUMachine pc_q35_machine_v1_4 = { static void pc_q35_machine_init(void) { + qemu_register_machine(&pc_q35_machine_v1_7); qemu_register_machine(&pc_q35_machine_v1_6); qemu_register_machine(&pc_q35_machine_v1_5); qemu_register_machine(&pc_q35_machine_v1_4); diff --git a/hw/net/e1000.c b/hw/net/e1000.c index f5ebed4..d3f274c 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -135,9 +135,16 @@ typedef struct E1000State_st { QEMUTimer *autoneg_timer; + QEMUTimer *mit_timer; /* Mitigation timer. */ + bool mit_timer_on; /* Mitigation timer is running. */ + bool mit_irq_level; /* Tracks interrupt pin level. */ + uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */ + /* Compatibility flags for migration to/from qemu 1.3.0 and older */ #define E1000_FLAG_AUTONEG_BIT 0 +#define E1000_FLAG_MIT_BIT 1 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT) +#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT) uint32_t compat_flags; } E1000State; @@ -158,7 +165,8 @@ enum { defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL), defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC), defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA), - defreg(VET), + defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV), + defreg(ITR), }; static void @@ -245,10 +253,21 @@ static const uint32_t mac_reg_init[] = { E1000_MANC_RMCP_EN, }; +/* Helper function, *curr == 0 means the value is not set */ +static inline void +mit_update_delay(uint32_t *curr, uint32_t value) +{ + if (value && (*curr == 0 || value < *curr)) { + *curr = value; + } +} + static void set_interrupt_cause(E1000State *s, int index, uint32_t val) { PCIDevice *d = PCI_DEVICE(s); + uint32_t pending_ints; + uint32_t mit_delay; if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) { /* Only for 8257x */ @@ -266,7 +285,57 @@ set_interrupt_cause(E1000State *s, int index, uint32_t val) */ s->mac_reg[ICS] = val; - qemu_set_irq(d->irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0); + pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]); + if (!s->mit_irq_level && pending_ints) { + /* + * Here we detect a potential raising edge. We postpone raising the + * interrupt line if we are inside the mitigation delay window + * (s->mit_timer_on == 1). + * We provide a partial implementation of interrupt mitigation, + * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for + * RADV and TADV, 256ns units for ITR). RDTR is only used to enable + * RADV; relative timers based on TIDV and RDTR are not implemented. + */ + if (s->mit_timer_on) { + return; + } + if (s->compat_flags & E1000_FLAG_MIT) { + /* Compute the next mitigation delay according to pending + * interrupts and the current values of RADV (provided + * RDTR!=0), TADV and ITR. + * Then rearm the timer. + */ + mit_delay = 0; + if (s->mit_ide && + (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) { + mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4); + } + if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) { + mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4); + } + mit_update_delay(&mit_delay, s->mac_reg[ITR]); + + if (mit_delay) { + s->mit_timer_on = 1; + timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + mit_delay * 256); + } + s->mit_ide = 0; + } + } + + s->mit_irq_level = (pending_ints != 0); + qemu_set_irq(d->irq[0], s->mit_irq_level); +} + +static void +e1000_mit_timer(void *opaque) +{ + E1000State *s = opaque; + + s->mit_timer_on = 0; + /* Call set_interrupt_cause to update the irq level (if necessary). */ + set_interrupt_cause(s, 0, s->mac_reg[ICR]); } static void @@ -307,6 +376,10 @@ static void e1000_reset(void *opaque) int i; timer_del(d->autoneg_timer); + timer_del(d->mit_timer); + d->mit_timer_on = 0; + d->mit_irq_level = 0; + d->mit_ide = 0; memset(d->phy_reg, 0, sizeof d->phy_reg); memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init); memset(d->mac_reg, 0, sizeof d->mac_reg); @@ -572,6 +645,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) struct e1000_context_desc *xp = (struct e1000_context_desc *)dp; struct e1000_tx *tp = &s->tx; + s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE); if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor op = le32_to_cpu(xp->cmd_and_length); tp->ipcss = xp->lower_setup.ip_fields.ipcss; @@ -1047,7 +1121,8 @@ static uint32_t (*macreg_readops[])(E1000State *, int) = { getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL), getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS), getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL), - getreg(TDLEN), getreg(RDLEN), + getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV), + getreg(TADV), getreg(ITR), [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4, [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4, @@ -1069,6 +1144,8 @@ static void (*macreg_writeops[])(E1000State *, int, uint32_t) = { [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt, [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr, [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl, + [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit, + [ITR] = set_16bit, [RA ... RA+31] = &mac_writereg, [MTA ... MTA+127] = &mac_writereg, [VFTA ... VFTA+127] = &mac_writereg, @@ -1150,6 +1227,11 @@ static void e1000_pre_save(void *opaque) E1000State *s = opaque; NetClientState *nc = qemu_get_queue(s->nic); + /* If the mitigation timer is active, emulate a timeout now. */ + if (s->mit_timer_on) { + e1000_mit_timer(s); + } + if (!(s->compat_flags & E1000_FLAG_AUTONEG)) { return; } @@ -1171,6 +1253,14 @@ static int e1000_post_load(void *opaque, int version_id) E1000State *s = opaque; NetClientState *nc = qemu_get_queue(s->nic); + if (!(s->compat_flags & E1000_FLAG_MIT)) { + s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] = + s->mac_reg[TADV] = 0; + s->mit_irq_level = false; + } + s->mit_ide = 0; + s->mit_timer_on = false; + /* nc.link_down can't be migrated, so infer link_down according * to link status bit in mac_reg[STATUS]. * Alternatively, restart link negotiation if it was in progress. */ @@ -1190,6 +1280,28 @@ static int e1000_post_load(void *opaque, int version_id) return 0; } +static bool e1000_mit_state_needed(void *opaque) +{ + E1000State *s = opaque; + + return s->compat_flags & E1000_FLAG_MIT; +} + +static const VMStateDescription vmstate_e1000_mit_state = { + .name = "e1000/mit_state", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(mac_reg[RDTR], E1000State), + VMSTATE_UINT32(mac_reg[RADV], E1000State), + VMSTATE_UINT32(mac_reg[TADV], E1000State), + VMSTATE_UINT32(mac_reg[ITR], E1000State), + VMSTATE_BOOL(mit_irq_level, E1000State), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_e1000 = { .name = "e1000", .version_id = 2, @@ -1267,6 +1379,14 @@ static const VMStateDescription vmstate_e1000 = { VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128), VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128), VMSTATE_END_OF_LIST() + }, + .subsections = (VMStateSubsection[]) { + { + .vmsd = &vmstate_e1000_mit_state, + .needed = e1000_mit_state_needed, + }, { + /* empty */ + } } }; @@ -1316,6 +1436,8 @@ pci_e1000_uninit(PCIDevice *dev) timer_del(d->autoneg_timer); timer_free(d->autoneg_timer); + timer_del(d->mit_timer); + timer_free(d->mit_timer); memory_region_destroy(&d->mmio); memory_region_destroy(&d->io); qemu_del_nic(d->nic); @@ -1371,6 +1493,7 @@ static int pci_e1000_init(PCIDevice *pci_dev) add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0"); d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d); + d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d); return 0; } @@ -1385,6 +1508,8 @@ static Property e1000_properties[] = { DEFINE_NIC_PROPERTIES(E1000State, conf), DEFINE_PROP_BIT("autonegotiation", E1000State, compat_flags, E1000_FLAG_AUTONEG_BIT, true), + DEFINE_PROP_BIT("mitigation", E1000State, + compat_flags, E1000_FLAG_MIT_BIT, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 7fb04d8..9b2ddc4 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -225,7 +225,15 @@ void pvpanic_init(ISABus *bus); int e820_add_entry(uint64_t, uint64_t, uint32_t); +#define PC_COMPAT_1_6 \ + {\ + .driver = "e1000",\ + .property = "mitigation",\ + .value = "off",\ + } + #define PC_COMPAT_1_5 \ + PC_COMPAT_1_6, \ {\ .driver = "Conroe-" TYPE_X86_CPU,\ .property = "model",\ -- cgit v1.1 From 3dbb9786e9f5fa8429824818b6f799d8f65d1199 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Fri, 23 Aug 2013 12:28:25 -0400 Subject: vmxnet3: Eliminate __packed redefined warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This eliminates a warning about __packed being redefined as exposed by the vmxnet3 code. __packed is not used anywhere in the vmxnet3 code. CC hw/net/vmxnet3.o In file included from hw/net/vmxnet3.c:29: hw/net/vmxnet3.h:37:1: warning: "__packed" redefined In file included from /usr/include/stdlib.h:38, from /buildbot-qemu/default_openbsd_current/build/include/qemu-common.h:26, from /buildbot-qemu/default_openbsd_current/build/include/hw/hw.h:5, from hw/net/vmxnet3.c:18: /usr/include/sys/cdefs.h:209:1: warning: this is the location of the previous definition Signed-off-by: Brad Smith Reviewed-by: Andreas Färber Signed-off-by: Stefan Hajnoczi --- hw/net/vmxnet3.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/hw/net/vmxnet3.h b/hw/net/vmxnet3.h index 4eae7c7..f987d71 100644 --- a/hw/net/vmxnet3.h +++ b/hw/net/vmxnet3.h @@ -34,7 +34,6 @@ #define __le16 uint16_t #define __le32 uint32_t #define __le64 uint64_t -#define __packed QEMU_PACKED #if defined(HOST_WORDS_BIGENDIAN) #define __BIG_ENDIAN_BITFIELD @@ -749,7 +748,6 @@ struct Vmxnet3_DriverShared { #undef __le16 #undef __le32 #undef __le64 -#undef __packed #if defined(HOST_WORDS_BIGENDIAN) #undef __BIG_ENDIAN_BITFIELD #endif -- cgit v1.1 From 45d883dcf208160e2db308d1b368beb74f37dc7e Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 2 Sep 2013 13:10:34 +0200 Subject: ne2000: mark I/O as LITTLE_ENDIAN Now that the memory subsystem is propagating the endianness correctly, the ne2000 device should have its I/O ports marked as LITTLE_ENDIAN, as PCI devices are little endian. This makes the ne2000 NIC to work again on PowerPC. Cc: qemu-stable@nongnu.org Cc: Stefan Hajnoczi Signed-off-by: Aurelien Jarno Signed-off-by: Stefan Hajnoczi --- hw/net/ne2000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c index 31afd28..c961258 100644 --- a/hw/net/ne2000.c +++ b/hw/net/ne2000.c @@ -693,7 +693,7 @@ static void ne2000_write(void *opaque, hwaddr addr, static const MemoryRegionOps ne2000_ops = { .read = ne2000_read, .write = ne2000_write, - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_LITTLE_ENDIAN, }; /***********************************************************/ -- cgit v1.1