From ec50dd4634ae06091e61f42b7ba975f9ed510ad0 Mon Sep 17 00:00:00 2001 From: Gonglei Date: Thu, 25 Jun 2015 14:24:10 +0800 Subject: rocker: fix memory leak Meanwhile, using g_new0 instead of g_malloc0, refer to commit 5839e53. Signed-off-by: Gonglei Message-id: 1435213450-6700-1-git-send-email-arei.gonglei@huawei.com Signed-off-by: Stefan Hajnoczi --- hw/net/rocker/rocker.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index 4d25842..7e06015 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -96,7 +96,7 @@ World *rocker_get_world(Rocker *r, enum rocker_world_type type) RockerSwitch *qmp_query_rocker(const char *name, Error **errp) { - RockerSwitch *rocker = g_malloc0(sizeof(*rocker)); + RockerSwitch *rocker; Rocker *r; r = rocker_find(name); @@ -106,6 +106,7 @@ RockerSwitch *qmp_query_rocker(const char *name, Error **errp) return NULL; } + rocker = g_new0(RockerSwitch, 1); rocker->name = g_strdup(r->name); rocker->id = r->switch_id; rocker->ports = r->fp_ports; -- cgit v1.1 From 5df6a1855b62dc653515d919e48c5b6f00c48f32 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 25 Jun 2015 10:18:05 +0100 Subject: e1000: flush packets when link comes up e1000_can_receive() checks the link up status register bit. If the bit is clear, packets will be queued and the peer may disable receive to avoid wasting CPU reading packets that cannot be delivered. The queue must be flushed once the link comes back up again. This patch fixes broken e1000 receive with Mac OS X Snow Leopard guests and tap networking. Flushing the queue invokes the async send callback, which re-enables tap fd read. Reported-by: Jonathan Liu Signed-off-by: Stefan Hajnoczi Reviewed-by: Fam Zheng Message-id: 1435223885-12745-1-git-send-email-stefanha@redhat.com --- hw/net/e1000.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/net/e1000.c b/hw/net/e1000.c index bab8e2a..5c6bcd0 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -185,6 +185,9 @@ e1000_link_up(E1000State *s) { s->mac_reg[STATUS] |= E1000_STATUS_LU; s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS; + + /* E1000_STATUS_LU is tested by e1000_can_receive() */ + qemu_flush_queued_packets(qemu_get_queue(s->nic)); } static bool -- cgit v1.1 From b83b5f2ef9753713c2fb64ff4cae7cb1e080624e Mon Sep 17 00:00:00 2001 From: Brian Kress Date: Tue, 23 Jun 2015 11:49:25 -0400 Subject: vmxnet3: Fix incorrect small packet padding When running ESXi under qemu there is an issue with the ESXi guest discarding packets that are too short. The guest discards any packets under the normal minimum length for an ethernet packet (60). This results in odd behaviour where other hosts or VMs on other hosts can communicate with the ESXi guest just fine (since there's a physical NIC somewhere doing padding), but VMs on the host and the host itself cannot because the ARP request packets are too small for the ESXi host to accept. Someone in the past thought this was worth fixing, and added code to the vmxnet3 qemu emulation such that if it is receiving packets smaller than 60 bytes to pad the packet out to 60. Unfortunately this code is wrong (or at least in the wrong place). It does so BEFORE before taking into account the vnet_hdr at the front of the packet added by the tap device. As a result, it might add padding, but it never adds enough. Specifically it adds 10 less (the length of the vnet_hdr) than it needs to. The following (hopefully "obviously correct") patch simply swaps the order of processing the vnet header and the padding. With this patch an ESXi guest is able to communicate with the host or other local VMs. Signed-off-by: Brian Kress Reviewed-by: Paolo Bonzini Reviewed-by: Dmitry Fleytman Signed-off-by: Stefan Hajnoczi --- hw/net/vmxnet3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 104a0f5..706e060 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -1879,6 +1879,12 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) return -1; } + if (s->peer_has_vhdr) { + vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); + buf += sizeof(struct virtio_net_hdr); + size -= sizeof(struct virtio_net_hdr); + } + /* Pad to minimum Ethernet frame length */ if (size < sizeof(min_buf)) { memcpy(min_buf, buf, size); @@ -1887,12 +1893,6 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) size = sizeof(min_buf); } - if (s->peer_has_vhdr) { - vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); - buf += sizeof(struct virtio_net_hdr); - size -= sizeof(struct virtio_net_hdr); - } - vmxnet_rx_pkt_set_packet_type(s->rx_pkt, get_eth_packet_type(PKT_GET_ETH_HDR(buf))); -- cgit v1.1 From 66851f640b73a5a84160ee6ab19ab429f68bbb9f Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Tue, 30 Jun 2015 19:25:53 -0700 Subject: rocker: don't queue receive pkts when port is disabled Commit 6e99c63 ("net/socket: Drop net_socket_can_send") changed the semantics around .can_receive for sockets to now require the device to flush queued pkts when transitioning to a .can_receive=true state. Rocker device was not flushing the queue on .can_receive=true transition, so the receiver was stuck. But, turns out we really don't want any queuing at all on the port when the port is disabled, otherwise when the port transitions to enabled, we'd receive and forward stale pkts that really should have been dropped. So, let's remove .can_receive so avoid queuing and drop the pkt in .receive if the port is disabled. Signed-off-by: Scott Feldman Reviewed-by: Fam Zheng Message-id: 1435717553-36187-1-git-send-email-sfeldma@gmail.com Signed-off-by: Stefan Hajnoczi --- hw/net/rocker/rocker_fp.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c index d8d934c..c693ae5 100644 --- a/hw/net/rocker/rocker_fp.c +++ b/hw/net/rocker/rocker_fp.c @@ -125,18 +125,21 @@ int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt) return ROCKER_OK; } -static int fp_port_can_receive(NetClientState *nc) -{ - FpPort *port = qemu_get_nic_opaque(nc); - - return port->enabled; -} - static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) { FpPort *port = qemu_get_nic_opaque(nc); + /* If the port is disabled, we want to drop this pkt + * now rather than queing it for later. We don't want + * any stale pkts getting into the device when the port + * transitions to enabled. + */ + + if (!port->enabled) { + return -1; + } + return world_ingress(port->world, port->pport, iov, iovcnt); } @@ -165,7 +168,6 @@ static void fp_port_set_link_status(NetClientState *nc) static NetClientInfo fp_port_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), - .can_receive = fp_port_can_receive, .receive = fp_port_receive, .receive_iov = fp_port_receive_iov, .cleanup = fp_port_cleanup, -- cgit v1.1 From d1a88c96b7f94c8e12c07518f55fce8873e814d0 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 1 Jul 2015 03:33:08 -0700 Subject: rocker: fix misplaced break statement Premature break in switch case block. This particular case (group L2 rewrite) will be used for L2 LAG and L3 ECMP support, neither of which are enabled in the guest driver at this time, but are under development. Signed-off-by: Scott Feldman Reported-by: Paolo Bonzini Message-id: 1435746792-41278-2-git-send-email-sfeldma@gmail.com Signed-off-by: Stefan Hajnoczi --- hw/net/rocker/rocker_of_dpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index b25a17d..02b3896 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -2525,7 +2525,6 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data) ngroup->has_set_vlan_id = true; ngroup->set_vlan_id = ntohs(group->l2_rewrite.vlan_id); } - break; if (memcmp(group->l2_rewrite.src_mac.a, zero_mac.a, ETH_ALEN)) { ngroup->has_set_eth_src = true; ngroup->set_eth_src = @@ -2536,6 +2535,7 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data) ngroup->set_eth_dst = qemu_mac_strdup_printf(group->l2_rewrite.dst_mac.a); } + break; case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: ngroup->has_vlan_id = true; -- cgit v1.1 From f211fcd75fec96ec9850885622ed028c6f7ebdf4 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 1 Jul 2015 03:33:09 -0700 Subject: rocker: fix missing break statements Signed-off-by: Scott Feldman Reported-by: Paolo Bonzini Message-id: 1435746792-41278-3-git-send-email-sfeldma@gmail.com Signed-off-by: Stefan Hajnoczi --- hw/net/rocker/rocker.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index 7e06015..6e3d35a 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -193,11 +193,13 @@ static int tx_consume(Rocker *r, DescInfo *info) if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { return -ROCKER_EINVAL; } + break; case ROCKER_TX_OFFLOAD_TSO: if (!tlvs[ROCKER_TLV_TX_TSO_MSS] || !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { return -ROCKER_EINVAL; } + break; } if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { -- cgit v1.1 From 96497af0afd60e57749316f1bc196b417055c585 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 1 Jul 2015 03:33:10 -0700 Subject: rocker: return -1 when dropping packet on ingress Signed-off-by: Scott Feldman Message-id: 1435746792-41278-4-git-send-email-sfeldma@gmail.com Signed-off-by: Stefan Hajnoczi --- hw/net/rocker/rocker_world.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c index b991e87..a6b18f1 100644 --- a/hw/net/rocker/rocker_world.c +++ b/hw/net/rocker/rocker_world.c @@ -32,7 +32,7 @@ ssize_t world_ingress(World *world, uint32_t pport, return world->ops->ig(world, pport, iov, iovcnt); } - return iov_size(iov, iovcnt); + return -1; } int world_do_cmd(World *world, DescInfo *info, -- cgit v1.1 From d0d2555852c5e684a97dce787d3c2a65b9a6d64c Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 1 Jul 2015 03:33:11 -0700 Subject: rocker: mark copy-to-cpu pkts as forwarding offloaded For pkts copied to the CPU (to be processed by guest driver), mark the Rx descriptor with flag "OFFLOAD_FWD" to indicate device has already forwarded pkt. The guest driver will use this indicator to avoid duplicate forwarding in the guest OS. Examples include bcast/mcast/unknown ucast pkts flooded to bridged ports. We want to avoid both the device and the guest bridge driver flooding these pkts, which would result in duplicates pkts on the wire. Packet sampling, such as sFlow, can also use this technique to mark pkts for the guest OS to record but otherwise drop. Signed-off-by: Scott Feldman Message-id: 1435746792-41278-5-git-send-email-sfeldma@gmail.com Signed-off-by: Stefan Hajnoczi --- docs/specs/rocker.txt | 4 ++++ hw/net/rocker/rocker.c | 6 +++++- hw/net/rocker/rocker.h | 2 +- hw/net/rocker/rocker_hw.h | 1 + hw/net/rocker/rocker_of_dpa.c | 5 ++++- 5 files changed, 15 insertions(+), 3 deletions(-) diff --git a/docs/specs/rocker.txt b/docs/specs/rocker.txt index 0af5c61..1c74351 100644 --- a/docs/specs/rocker.txt +++ b/docs/specs/rocker.txt @@ -637,6 +637,7 @@ The TLVs for Rx descriptor buffer are: (1 << 5): TCP packet (1 << 6): UDP packet (1 << 7): TCP/UDP csum good + (1 << 8): Offload forward RX_CSUM 2 IP calculated checksum: IPv4: IP payload csum IPv6: header and payload csum @@ -645,6 +646,9 @@ The TLVs for Rx descriptor buffer are: RX_FRAG_MAX_LEN 2 Packet maximum fragment length RX_FRAG_LEN 2 Actual packet fragment length after receive +Offload forward RX_FLAG indicates the device has already forwarded the packet +so the host CPU should not also forward the packet. + Possible status return codes in descriptor on completion are: DESC_COMP_ERR reason diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index 6e3d35a..47d080f 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -603,7 +603,7 @@ static DescRing *rocker_get_rx_ring_by_pport(Rocker *r, } int rx_produce(World *world, uint32_t pport, - const struct iovec *iov, int iovcnt) + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu) { Rocker *r = world_rocker(world); PCIDevice *dev = (PCIDevice *)r; @@ -646,6 +646,10 @@ int rx_produce(World *world, uint32_t pport, goto out; } + if (copy_to_cpu) { + rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD; + } + /* XXX calc rx flags/csum */ tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */ diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h index b3310b6..f9c80f8 100644 --- a/hw/net/rocker/rocker.h +++ b/hw/net/rocker/rocker.h @@ -77,7 +77,7 @@ int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up); int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, uint16_t vlan_id); int rx_produce(World *world, uint32_t pport, - const struct iovec *iov, int iovcnt); + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu); int rocker_port_eg(Rocker *r, uint32_t pport, const struct iovec *iov, int iovcnt); diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h index fe639ba..8c50830 100644 --- a/hw/net/rocker/rocker_hw.h +++ b/hw/net/rocker/rocker_hw.h @@ -250,6 +250,7 @@ enum { #define ROCKER_RX_FLAGS_TCP (1 << 5) #define ROCKER_RX_FLAGS_UDP (1 << 6) #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7) +#define ROCKER_RX_FLAGS_FWD_OFFLOAD (1 << 8) /* Tx msg */ enum { diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index 02b3896..874fb01 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -825,6 +825,8 @@ static OfDpaGroup *of_dpa_group_alloc(uint32_t id) static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, OfDpaGroup *group) { + uint8_t copy_to_cpu = fc->action_set.apply.copy_to_cpu; + if (group->l2_interface.pop_vlan) { of_dpa_flow_pkt_strip_vlan(fc); } @@ -837,7 +839,8 @@ static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, */ if (group->l2_interface.out_pport == 0) { - rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt); + rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt, + copy_to_cpu); } else if (group->l2_interface.out_pport != fc->in_pport) { rocker_port_eg(world_rocker(fc->of_dpa->world), group->l2_interface.out_pport, -- cgit v1.1 From 849729bb796e0ecbb3f370f119682f2821dd1441 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 1 Jul 2015 03:33:12 -0700 Subject: rocker: tests: don't need to specify master/self when setting vlans 4.1 Linux kernel doesn't require specifying "master" or "self" when setting vlans on a port, so clean these up from the tests that use vlans. Signed-off-by: Scott Feldman Message-id: 1435746792-41278-6-git-send-email-sfeldma@gmail.com Signed-off-by: Stefan Hajnoczi --- tests/rocker/bridge-vlan | 4 ++-- tests/rocker/bridge-vlan-stp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/rocker/bridge-vlan b/tests/rocker/bridge-vlan index ef9e5f5..897d82c 100755 --- a/tests/rocker/bridge-vlan +++ b/tests/rocker/bridge-vlan @@ -20,8 +20,8 @@ simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filte # add both ports to VLAN 57 -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self" -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2" # turn off learning and flooding in SW diff --git a/tests/rocker/bridge-vlan-stp b/tests/rocker/bridge-vlan-stp index c660312..85d2646 100755 --- a/tests/rocker/bridge-vlan-stp +++ b/tests/rocker/bridge-vlan-stp @@ -21,8 +21,8 @@ simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filte # add both ports to VLAN 57 -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self" -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2" # turn off learning and flooding in SW -- cgit v1.1