Merge tag 'migration-20231002-pull-request' of https://gitlab.com/juan.quintela/qemu into staging

Migration Pull request (20231002) In this migration pull request: - Refactor repeated call of yank_unregister_instance (tejus) - More migraton-test changes Please, apply. # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmUatX4ACgkQ9IfvGFhy # 1yMlbQ/+Kp7m1Mr5LUM/8mvh9LZTVvWauBHch1pdvpCsJO+Grdtv6MtZL5UKT2ue # xYksZvf/rT4bdt2H1lSsG1o2GOcIf4qyWICgYNDo8peaxm1IrvgAbimaWHWLeORX # sBxKcBBuTac55vmEKzbPSbwGCGGTU/11UGXQ4ruGN3Hwbd2JZHAK6GxGIzANToZc # JtwBr/31SxJ2YndNLaPMEnD3cHbRbD2UyODeTt1KI5LdTGgXHoB6PgCk2AMQP1Ko # LlaPLsrEKC06h2CJ27BB36CNVEGMN2iFa3aKz1FC85Oj2ckatspAFw78t9guj6eM # MYxn0ipSsjjWjMsc3zEDxi7JrA///5bp1e6e7WdLpOaMBPpV4xuvVvA6Aku2es7D # fMPOMdftBp6rrXp8edBMTs1sOHdE1k8ZsyJ90m96ckjfLX39TPAiJRm4pWD2UuP5 # Wjr+/IU+LEp/KCqimMj0kYMRz4rM3PP8hOakPZLiRR5ZG6sgbHZK44iPXB/Udz/g # TCZ87siIpI8YHb3WCaO5CvbdjPrszg1j9v7RimtDeGLDR/hNokkQ1EEeszDTGpgt # xst4S4wVmex2jYyi53woH4V1p8anP7iqa8elPehAaYPobp47pmBV53ZaSwibqzPN # TmO7P9rfyQGCiXXZRvrAQJa+gmAkQlSEI7mSssV77pU+1gdEj9c= # =hD/8 # -----END PGP SIGNATURE----- # gpg: Signature made Mon 02 Oct 2023 08:20:14 EDT # gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full] # gpg: aka "Juan Quintela <quintela@trasno.org>" [full] # Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723 * tag 'migration-20231002-pull-request' of https://gitlab.com/juan.quintela/qemu: migration/rdma: Simplify the function that saves a page migration: Remove unused qemu_file_credit_transfer() migration/rdma: Don't use imaginary transfers migration/rdma: Remove QEMUFile parameter when not used migration/RDMA: It is accounting for zero/normal pages in two places migration: Don't abuse qemu_file transferred for RDMA migration: Use qemu_file_transferred_noflush() for block migration. migration: Refactor repeated call of yank_unregister_instance migration-test: simplify shmem_opts handling migration-test: dirtylimit checks for x86_64 arch before migration-test: Add bootfile_create/delete() functions migration-test: bootpath is the same for all tests and for all archs migration-test: Create kvm_opts Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
author: Stefan Hajnoczi <stefanha@redhat.com> 2023-10-02 14:42:44 -0400
committer: Stefan Hajnoczi <stefanha@redhat.com> 2023-10-02 14:42:44 -0400
commit: 50d0bfd0ed78209f003e8f7b9ac25edaa0399157 (patch)
tree: 2d2f161f2f50cedf324dd95049686238aa9f28bb /migration
parent: 5d7e601df37d8bdd490472fd4cfe3e4ca258df09 (diff)
parent: 9c53d369e5903375a2e3358f739be77dcb8dae49 (diff)
download: qemu-50d0bfd0ed78209f003e8f7b9ac25edaa0399157.zip
qemu-50d0bfd0ed78209f003e8f7b9ac25edaa0399157.tar.gz
qemu-50d0bfd0ed78209f003e8f7b9ac25edaa0399157.tar.bz2
9 files changed, 60 insertions, 85 deletions
diff --git a/migration/block.c b/migration/block.c
index eb6aafe..5f93087 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -755,7 +755,7 @@ static int block_save_setup(QEMUFile *f, void *opaque)
 static int block_save_iterate(QEMUFile *f, void *opaque)
 {
     int ret;
-    uint64_t last_bytes = qemu_file_transferred(f);
+    uint64_t last_bytes = qemu_file_transferred_noflush(f);
 
     trace_migration_block_save("iterate", block_mig_state.submitted,
                                block_mig_state.transferred);
@@ -807,7 +807,7 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
     }
 
     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
-    uint64_t delta_bytes = qemu_file_transferred(f) - last_bytes;
+    uint64_t delta_bytes = qemu_file_transferred_noflush(f) - last_bytes;
     return (delta_bytes > 0);
 }
 
diff --git a/migration/migration-stats.c b/migration/migration-stats.c
index 095d6d7..84e11e6 100644
--- a/migration/migration-stats.c
+++ b/migration/migration-stats.c
@@ -61,8 +61,9 @@ void migration_rate_reset(QEMUFile *f)
 uint64_t migration_transferred_bytes(QEMUFile *f)
 {
     uint64_t multifd = stat64_get(&mig_stats.multifd_bytes);
+    uint64_t rdma = stat64_get(&mig_stats.rdma_bytes);
     uint64_t qemu_file = qemu_file_transferred(f);
 
-    trace_migration_transferred_bytes(qemu_file, multifd);
-    return qemu_file + multifd;
+    trace_migration_transferred_bytes(qemu_file, multifd, rdma);
+    return qemu_file + multifd + rdma;
 }
diff --git a/migration/migration-stats.h b/migration/migration-stats.h
index ac2260e..2358caa 100644
--- a/migration/migration-stats.h
+++ b/migration/migration-stats.h
@@ -90,6 +90,10 @@ typedef struct {
      */
     Stat64 rate_limit_max;
     /*
+     * Number of bytes sent through RDMA.
+     */
+    Stat64 rdma_bytes;
+    /*
      * Total number of bytes transferred.
      */
     Stat64 transferred;
diff --git a/migration/migration.c b/migration/migration.c
index e2ed85b..6d3cf5d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1703,15 +1703,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     } else if (strstart(uri, "fd:", &p)) {
         fd_start_outgoing_migration(s, p, &local_err);
     } else {
-        if (!resume_requested) {
-            yank_unregister_instance(MIGRATION_YANK_INSTANCE);
-        }
         error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri",
                    "a valid migration protocol");
         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
                           MIGRATION_STATUS_FAILED);
         block_cleanup_parameters();
-        return;
     }
 
     if (local_err) {
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 19c33c9..5e8207d 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -322,23 +322,20 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data)
     }
 }
 
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
-                             ram_addr_t offset, size_t size,
-                             uint64_t *bytes_sent)
+int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+                          ram_addr_t offset, size_t size)
 {
     if (f->hooks && f->hooks->save_page) {
-        int ret = f->hooks->save_page(f, block_offset,
-                                      offset, size, bytes_sent);
-
+        int ret = f->hooks->save_page(f, block_offset, offset, size);
+        /*
+         * RAM_SAVE_CONTROL_* are negative values
+         */
         if (ret != RAM_SAVE_CONTROL_DELAYED &&
             ret != RAM_SAVE_CONTROL_NOT_SUPP) {
-            if (bytes_sent && *bytes_sent > 0) {
-                qemu_file_credit_transfer(f, *bytes_sent);
-            } else if (ret < 0) {
+            if (ret < 0) {
                 qemu_file_set_error(f, ret);
             }
         }
-
         return ret;
     }
 
@@ -400,11 +397,6 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
     return len;
 }
 
-void qemu_file_credit_transfer(QEMUFile *f, size_t size)
-{
-    f->total_transferred += size;
-}
-
 /** Closes the file
  *
  * Returns negative error value if any error happened on previous operations or
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 47015f5..03e718c 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -49,11 +49,10 @@ typedef int (QEMURamHookFunc)(QEMUFile *f, uint64_t flags, void *data);
  * This function allows override of where the RAM page
  * is saved (such as RDMA, for example.)
  */
-typedef size_t (QEMURamSaveFunc)(QEMUFile *f,
-                                 ram_addr_t block_offset,
-                                 ram_addr_t offset,
-                                 size_t size,
-                                 uint64_t *bytes_sent);
+typedef int (QEMURamSaveFunc)(QEMUFile *f,
+                              ram_addr_t block_offset,
+                              ram_addr_t offset,
+                              size_t size);
 
 typedef struct QEMUFileHooks {
     QEMURamHookFunc *before_ram_iterate;
@@ -119,14 +118,6 @@ bool qemu_file_buffer_empty(QEMUFile *file);
  */
 int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset);
 void qemu_file_skip(QEMUFile *f, int size);
-/*
- * qemu_file_credit_transfer:
- *
- * Report on a number of bytes that have been transferred
- * out of band from the main file object I/O methods. This
- * accounting information tracks the total migration traffic.
- */
-void qemu_file_credit_transfer(QEMUFile *f, size_t size);
 int qemu_file_get_error_obj_any(QEMUFile *f1, QEMUFile *f2, Error **errp);
 void qemu_file_set_error_obj(QEMUFile *f, int ret, Error *err);
 void qemu_file_set_error(QEMUFile *f, int ret);
@@ -150,9 +141,8 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
 #define RAM_SAVE_CONTROL_NOT_SUPP -1000
 #define RAM_SAVE_CONTROL_DELAYED  -2000
 
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
-                             ram_addr_t offset, size_t size,
-                             uint64_t *bytes_sent);
+int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+                          ram_addr_t offset, size_t size);
 QIOChannel *qemu_file_get_ioc(QEMUFile *file);
 
 #endif
diff --git a/migration/ram.c b/migration/ram.c
index 0c202f8..e4bfd39 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1186,31 +1186,19 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block,
 static bool control_save_page(PageSearchStatus *pss, RAMBlock *block,
                               ram_addr_t offset, int *pages)
 {
-    uint64_t bytes_xmit = 0;
     int ret;
 
-    *pages = -1;
     ret = ram_control_save_page(pss->pss_channel, block->offset, offset,
-                                TARGET_PAGE_SIZE, &bytes_xmit);
+                                TARGET_PAGE_SIZE);
     if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
         return false;
     }
 
-    if (bytes_xmit) {
-        ram_transferred_add(bytes_xmit);
-        *pages = 1;
-    }
-
     if (ret == RAM_SAVE_CONTROL_DELAYED) {
+        *pages = 1;
         return true;
     }
-
-    if (bytes_xmit > 0) {
-        stat64_add(&mig_stats.normal_pages, 1);
-    } else if (bytes_xmit == 0) {
-        stat64_add(&mig_stats.zero_pages, 1);
-    }
-
+    *pages = ret;
     return true;
 }
 
diff --git a/migration/rdma.c b/migration/rdma.c
index c78ddfc..7d2726d 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2029,7 +2029,7 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head,
  * If we're using dynamic registration on the dest-side, we have to
  * send a registration command first.
  */
-static int qemu_rdma_write_one(QEMUFile *f, RDMAContext *rdma,
+static int qemu_rdma_write_one(RDMAContext *rdma,
                                int current_index, uint64_t current_addr,
                                uint64_t length)
 {
@@ -2124,9 +2124,18 @@ retry:
                     return -EIO;
                 }
 
+                /*
+                 * TODO: Here we are sending something, but we are not
+                 * accounting for anything transferred.  The following is wrong:
+                 *
+                 * stat64_add(&mig_stats.rdma_bytes, sge.length);
+                 *
+                 * because we are using some kind of compression.  I
+                 * would think that head.len would be the more similar
+                 * thing to a correct value.
+                 */
                 stat64_add(&mig_stats.zero_pages,
                            sge.length / qemu_target_page_size());
-
                 return 1;
             }
 
@@ -2234,8 +2243,17 @@ retry:
 
     set_bit(chunk, block->transit_bitmap);
     stat64_add(&mig_stats.normal_pages, sge.length / qemu_target_page_size());
+    /*
+     * We are adding to transferred the amount of data written, but no
+     * overhead at all.  I will asume that RDMA is magicaly and don't
+     * need to transfer (at least) the addresses where it wants to
+     * write the pages.  Here it looks like it should be something
+     * like:
+     *     sizeof(send_wr) + sge.length
+     * but this being RDMA, who knows.
+     */
+    stat64_add(&mig_stats.rdma_bytes, sge.length);
     ram_transferred_add(sge.length);
-    qemu_file_credit_transfer(f, sge.length);
     rdma->total_writes++;
 
     return 0;
@@ -2247,7 +2265,7 @@ retry:
  * We support sending out multiple chunks at the same time.
  * Not all of them need to get signaled in the completion queue.
  */
-static int qemu_rdma_write_flush(QEMUFile *f, RDMAContext *rdma)
+static int qemu_rdma_write_flush(RDMAContext *rdma)
 {
     int ret;
 
@@ -2255,7 +2273,7 @@ static int qemu_rdma_write_flush(QEMUFile *f, RDMAContext *rdma)
         return 0;
     }
 
-    ret = qemu_rdma_write_one(f, rdma,
+    ret = qemu_rdma_write_one(rdma,
             rdma->current_index, rdma->current_addr, rdma->current_length);
 
     if (ret < 0) {
@@ -2328,7 +2346,7 @@ static inline int qemu_rdma_buffer_mergable(RDMAContext *rdma,
  *    and only require that a batch gets acknowledged in the completion
  *    queue instead of each individual chunk.
  */
-static int qemu_rdma_write(QEMUFile *f, RDMAContext *rdma,
+static int qemu_rdma_write(RDMAContext *rdma,
                            uint64_t block_offset, uint64_t offset,
                            uint64_t len)
 {
@@ -2339,7 +2357,7 @@ static int qemu_rdma_write(QEMUFile *f, RDMAContext *rdma,
 
     /* If we cannot merge it, we flush the current buffer first. */
     if (!qemu_rdma_buffer_mergable(rdma, current_addr, len)) {
-        ret = qemu_rdma_write_flush(f, rdma);
+        ret = qemu_rdma_write_flush(rdma);
         if (ret) {
             return ret;
         }
@@ -2361,7 +2379,7 @@ static int qemu_rdma_write(QEMUFile *f, RDMAContext *rdma,
 
     /* flush it if buffer is too large */
     if (rdma->current_length >= RDMA_MERGE_MAX) {
-        return qemu_rdma_write_flush(f, rdma);
+        return qemu_rdma_write_flush(rdma);
     }
 
     return 0;
@@ -2782,7 +2800,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
                                        Error **errp)
 {
     QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
-    QEMUFile *f = rioc->file;
     RDMAContext *rdma;
     int ret;
     ssize_t done = 0;
@@ -2803,7 +2820,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
      * Push out any writes that
      * we're queued up for VM's ram.
      */
-    ret = qemu_rdma_write_flush(f, rdma);
+    ret = qemu_rdma_write_flush(rdma);
     if (ret < 0) {
         rdma->error_state = ret;
         error_setg(errp, "qemu_rdma_write_flush returned %d", ret);
@@ -2942,11 +2959,11 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
 /*
  * Block until all the outstanding chunks have been delivered by the hardware.
  */
-static int qemu_rdma_drain_cq(QEMUFile *f, RDMAContext *rdma)
+static int qemu_rdma_drain_cq(RDMAContext *rdma)
 {
     int ret;
 
-    if (qemu_rdma_write_flush(f, rdma) < 0) {
+    if (qemu_rdma_write_flush(rdma) < 0) {
         return -EIO;
     }
 
@@ -3225,13 +3242,12 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
  *
  *    @size : Number of bytes to transfer
  *
- *    @bytes_sent : User-specificed pointer to indicate how many bytes were
+ *    @pages_sent : User-specificed pointer to indicate how many pages were
  *                  sent. Usually, this will not be more than a few bytes of
  *                  the protocol because most transfers are sent asynchronously.
  */
-static size_t qemu_rdma_save_page(QEMUFile *f,
-                                  ram_addr_t block_offset, ram_addr_t offset,
-                                  size_t size, uint64_t *bytes_sent)
+static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset,
+                               ram_addr_t offset, size_t size)
 {
     QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
     RDMAContext *rdma;
@@ -3257,25 +3273,13 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
      * is full, or the page doesn't belong to the current chunk,
      * an actual RDMA write will occur and a new chunk will be formed.
      */
-    ret = qemu_rdma_write(f, rdma, block_offset, offset, size);
+    ret = qemu_rdma_write(rdma, block_offset, offset, size);
     if (ret < 0) {
         error_report("rdma migration: write error! %d", ret);
         goto err;
     }
 
     /*
-     * We always return 1 bytes because the RDMA
-     * protocol is completely asynchronous. We do not yet know
-     * whether an  identified chunk is zero or not because we're
-     * waiting for other pages to potentially be merged with
-     * the current chunk. So, we have to call qemu_update_position()
-     * later on when the actual write occurs.
-     */
-    if (bytes_sent) {
-        *bytes_sent = 1;
-    }
-
-    /*
      * Drain the Completion Queue if possible, but do not block,
      * just poll.
      *
@@ -3914,7 +3918,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
     CHECK_ERROR_STATE();
 
     qemu_fflush(f);
-    ret = qemu_rdma_drain_cq(f, rdma);
+    ret = qemu_rdma_drain_cq(rdma);
 
     if (ret < 0) {
         goto err;
diff --git a/migration/trace-events b/migration/trace-events
index 4666f19..6348373 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -191,7 +191,7 @@ process_incoming_migration_co_postcopy_end_main(void) ""
 postcopy_preempt_enabled(bool value) "%d"
 
 # migration-stats
-migration_transferred_bytes(uint64_t qemu_file, uint64_t multifd) "qemu_file %" PRIu64 " multifd %" PRIu64
+migration_transferred_bytes(uint64_t qemu_file, uint64_t multifd, uint64_t rdma) "qemu_file %" PRIu64 " multifd %" PRIu64 " RDMA %" PRIu64
 
 # channel.c
 migration_set_incoming_channel(void *ioc, const char *ioctype) "ioc=%p ioctype=%s"
author	Stefan Hajnoczi <stefanha@redhat.com>	2023-10-02 14:42:44 -0400
committer	Stefan Hajnoczi <stefanha@redhat.com>	2023-10-02 14:42:44 -0400
commit	50d0bfd0ed78209f003e8f7b9ac25edaa0399157 (patch)
tree	2d2f161f2f50cedf324dd95049686238aa9f28bb /migration
parent	5d7e601df37d8bdd490472fd4cfe3e4ca258df09 (diff)
parent	9c53d369e5903375a2e3358f739be77dcb8dae49 (diff)
download	qemu-50d0bfd0ed78209f003e8f7b9ac25edaa0399157.zip qemu-50d0bfd0ed78209f003e8f7b9ac25edaa0399157.tar.gz qemu-50d0bfd0ed78209f003e8f7b9ac25edaa0399157.tar.bz2