diff options
Diffstat (limited to 'migration/rdma.c')
-rw-r--r-- | migration/rdma.c | 137 |
1 files changed, 100 insertions, 37 deletions
diff --git a/migration/rdma.c b/migration/rdma.c index c6bc607..ca56594 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -165,20 +165,6 @@ enum { RDMA_CONTROL_UNREGISTER_FINISHED, /* unpinning finished */ }; -static const char *control_desc[] = { - [RDMA_CONTROL_NONE] = "NONE", - [RDMA_CONTROL_ERROR] = "ERROR", - [RDMA_CONTROL_READY] = "READY", - [RDMA_CONTROL_QEMU_FILE] = "QEMU FILE", - [RDMA_CONTROL_RAM_BLOCKS_REQUEST] = "RAM BLOCKS REQUEST", - [RDMA_CONTROL_RAM_BLOCKS_RESULT] = "RAM BLOCKS RESULT", - [RDMA_CONTROL_COMPRESS] = "COMPRESS", - [RDMA_CONTROL_REGISTER_REQUEST] = "REGISTER REQUEST", - [RDMA_CONTROL_REGISTER_RESULT] = "REGISTER RESULT", - [RDMA_CONTROL_REGISTER_FINISHED] = "REGISTER FINISHED", - [RDMA_CONTROL_UNREGISTER_REQUEST] = "UNREGISTER REQUEST", - [RDMA_CONTROL_UNREGISTER_FINISHED] = "UNREGISTER FINISHED", -}; /* * Memory and MR structures used to represent an IB Send/Recv work request. @@ -251,6 +237,30 @@ typedef struct QEMU_PACKED RDMADestBlock { uint32_t padding; } RDMADestBlock; +static const char *control_desc(unsigned int rdma_control) +{ + static const char *strs[] = { + [RDMA_CONTROL_NONE] = "NONE", + [RDMA_CONTROL_ERROR] = "ERROR", + [RDMA_CONTROL_READY] = "READY", + [RDMA_CONTROL_QEMU_FILE] = "QEMU FILE", + [RDMA_CONTROL_RAM_BLOCKS_REQUEST] = "RAM BLOCKS REQUEST", + [RDMA_CONTROL_RAM_BLOCKS_RESULT] = "RAM BLOCKS RESULT", + [RDMA_CONTROL_COMPRESS] = "COMPRESS", + [RDMA_CONTROL_REGISTER_REQUEST] = "REGISTER REQUEST", + [RDMA_CONTROL_REGISTER_RESULT] = "REGISTER RESULT", + [RDMA_CONTROL_REGISTER_FINISHED] = "REGISTER FINISHED", + [RDMA_CONTROL_UNREGISTER_REQUEST] = "UNREGISTER REQUEST", + [RDMA_CONTROL_UNREGISTER_FINISHED] = "UNREGISTER FINISHED", + }; + + if (rdma_control > RDMA_CONTROL_UNREGISTER_FINISHED) { + return "??BAD CONTROL VALUE??"; + } + + return strs[rdma_control]; +} + static uint64_t htonll(uint64_t v) { union { uint32_t lv[2]; uint64_t llv; } u; @@ -1466,6 +1476,56 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out, return 0; } +/* Wait for activity on the completion channel. + * Returns 0 on success, none-0 on error. + */ +static int qemu_rdma_wait_comp_channel(RDMAContext *rdma) +{ + /* + * Coroutine doesn't start until migration_fd_process_incoming() + * so don't yield unless we know we're running inside of a coroutine. + */ + if (rdma->migration_started_on_destination) { + yield_until_fd_readable(rdma->comp_channel->fd); + } else { + /* This is the source side, we're in a separate thread + * or destination prior to migration_fd_process_incoming() + * we can't yield; so we have to poll the fd. + * But we need to be able to handle 'cancel' or an error + * without hanging forever. + */ + while (!rdma->error_state && !rdma->received_error) { + GPollFD pfds[1]; + pfds[0].fd = rdma->comp_channel->fd; + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; + /* 0.1s timeout, should be fine for a 'cancel' */ + switch (qemu_poll_ns(pfds, 1, 100 * 1000 * 1000)) { + case 1: /* fd active */ + return 0; + + case 0: /* Timeout, go around again */ + break; + + default: /* Error of some type - + * I don't trust errno from qemu_poll_ns + */ + error_report("%s: poll failed", __func__); + return -EPIPE; + } + + if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) { + /* Bail out and let the cancellation happen */ + return -EPIPE; + } + } + } + + if (rdma->received_error) { + return -EPIPE; + } + return rdma->error_state; +} + /* * Block until the next work request has completed. * @@ -1513,22 +1573,21 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, } while (1) { - /* - * Coroutine doesn't start until migration_fd_process_incoming() - * so don't yield unless we know we're running inside of a coroutine. - */ - if (rdma->migration_started_on_destination) { - yield_until_fd_readable(rdma->comp_channel->fd); + ret = qemu_rdma_wait_comp_channel(rdma); + if (ret) { + goto err_block_for_wrid; } - if (ibv_get_cq_event(rdma->comp_channel, &cq, &cq_ctx)) { + ret = ibv_get_cq_event(rdma->comp_channel, &cq, &cq_ctx); + if (ret) { perror("ibv_get_cq_event"); goto err_block_for_wrid; } num_cq_events++; - if (ibv_req_notify_cq(cq, 0)) { + ret = -ibv_req_notify_cq(cq, 0); + if (ret) { goto err_block_for_wrid; } @@ -1564,6 +1623,8 @@ err_block_for_wrid: if (num_cq_events) { ibv_ack_cq_events(cq, num_cq_events); } + + rdma->error_state = ret; return ret; } @@ -1590,7 +1651,7 @@ static int qemu_rdma_post_send_control(RDMAContext *rdma, uint8_t *buf, .num_sge = 1, }; - trace_qemu_rdma_post_send_control(control_desc[head->type]); + trace_qemu_rdma_post_send_control(control_desc(head->type)); /* * We don't actually need to do a memcpy() in here if we used @@ -1669,16 +1730,16 @@ static int qemu_rdma_exchange_get_response(RDMAContext *rdma, network_to_control((void *) rdma->wr_data[idx].control); memcpy(head, rdma->wr_data[idx].control, sizeof(RDMAControlHeader)); - trace_qemu_rdma_exchange_get_response_start(control_desc[expecting]); + trace_qemu_rdma_exchange_get_response_start(control_desc(expecting)); if (expecting == RDMA_CONTROL_NONE) { - trace_qemu_rdma_exchange_get_response_none(control_desc[head->type], + trace_qemu_rdma_exchange_get_response_none(control_desc(head->type), head->type); } else if (head->type != expecting || head->type == RDMA_CONTROL_ERROR) { error_report("Was expecting a %s (%d) control message" ", but got: %s (%d), length: %d", - control_desc[expecting], expecting, - control_desc[head->type], head->type, head->len); + control_desc(expecting), expecting, + control_desc(head->type), head->type, head->len); if (head->type == RDMA_CONTROL_ERROR) { rdma->received_error = true; } @@ -1788,7 +1849,7 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, } } - trace_qemu_rdma_exchange_send_waiting(control_desc[resp->type]); + trace_qemu_rdma_exchange_send_waiting(control_desc(resp->type)); ret = qemu_rdma_exchange_get_response(rdma, resp, resp->type, RDMA_WRID_DATA); @@ -1800,7 +1861,7 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, if (resp_idx) { *resp_idx = RDMA_WRID_DATA; } - trace_qemu_rdma_exchange_send_received(control_desc[resp->type]); + trace_qemu_rdma_exchange_send_received(control_desc(resp->type)); } rdma->control_ready_expected = 1; @@ -2208,7 +2269,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) int ret, idx; if (rdma->cm_id && rdma->connected) { - if (rdma->error_state && !rdma->received_error) { + if ((rdma->error_state || + migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) && + !rdma->received_error) { RDMAControlHeader head = { .len = 0, .type = RDMA_CONTROL_ERROR, .repeat = 1, @@ -2365,6 +2428,12 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) caps_to_network(&cap); + ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY); + if (ret) { + ERROR(errp, "posting second control recv"); + goto err_rdma_source_connect; + } + ret = rdma_connect(rdma->cm_id, &conn_param); if (ret) { perror("rdma_connect"); @@ -2405,12 +2474,6 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) rdma_ack_cm_event(cm_event); - ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY); - if (ret) { - ERROR(errp, "posting second control recv!"); - goto err_rdma_source_connect; - } - rdma->control_ready_expected = 1; rdma->nb_sent = 0; return 0; @@ -3350,7 +3413,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque) ret = -EIO; goto out; default: - error_report("Unknown control message %s", control_desc[head.type]); + error_report("Unknown control message %s", control_desc(head.type)); ret = -EIO; goto out; } |