From d0895d6e390d59d3dc6edab771335adfea263029 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 1 Mar 2012 12:21:43 +0100
Subject: Group snapshot: Fix format name for backing file

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index d78aa51..96a893b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -800,7 +800,8 @@ void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list,
         /* create new image w/backing file */
         ret = bdrv_img_create(snapshot_file, format,
                               states->old_bs->filename,
-                              drv->format_name, NULL, -1, flags);
+                              states->old_bs->drv->format_name,
+                              NULL, -1, flags);
         if (ret) {
             error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
             goto delete_and_fail;
-- 
cgit v1.1


From 14fe292d86da90b79e2fb56a4986d27346339a00 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Mon, 27 Feb 2012 13:16:01 +0000
Subject: qed: do not evict in-use L2 table cache entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The L2 table cache reduces QED metadata reads that would be required
when translating LBAs to offsets into the image file.  Since requests
execute in parallel it is possible to share an L2 table between multiple
requests.

There is a potential data corruption issue when an in-use L2 table is
evicted from the cache because the following situation occurs:

  1. An allocating write performs an update to L2 table "A".

  2. Another request needs L2 table "B" and causes table "A" to be
     evicted.

  3. A new read request needs L2 table "A" but it is not cached.

As a result the L2 update from #1 can overlap with the L2 fetch from #3.
We must avoid doing overlapping I/O requests here since the worst case
outcome is that the L2 fetch completes before the L2 update and yields
stale data.  In that case we would effectively discard the L2 update and
lose data clusters!

Thanks to Benoît Canet <benoit.canet@gmail.com> for extensive testing
and debugging which lead to discovery of this bug.

Reported-by: Benoît Canet <benoit.canet@gmail.com>
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Tested-by: Benoît Canet <benoit.canet@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qed-l2-cache.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c
index 02b81a2..e9b2aae 100644
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -161,11 +161,25 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
         return;
     }
 
+    /* Evict an unused cache entry so we have space.  If all entries are in use
+     * we can grow the cache temporarily and we try to shrink back down later.
+     */
     if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) {
-        entry = QTAILQ_FIRST(&l2_cache->entries);
-        QTAILQ_REMOVE(&l2_cache->entries, entry, node);
-        l2_cache->n_entries--;
-        qed_unref_l2_cache_entry(entry);
+        CachedL2Table *next;
+        QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) {
+            if (entry->ref > 1) {
+                continue;
+            }
+
+            QTAILQ_REMOVE(&l2_cache->entries, entry, node);
+            l2_cache->n_entries--;
+            qed_unref_l2_cache_entry(entry);
+
+            /* Stop evicting when we've shrunk back to max size */
+            if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) {
+                break;
+            }
+        }
     }
 
     l2_cache->n_entries++;
-- 
cgit v1.1


From 3cce16f44dc51b8695a144b7b0437548f886276e Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 1 Mar 2012 18:36:21 +0100
Subject: qcow2: Add some tracing

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 block/qcow2-cache.c   | 18 ++++++++++++++++++
 block/qcow2-cluster.c | 15 ++++++++++++++-
 block/qcow2.c         |  9 +++++++++
 trace-events          | 24 ++++++++++++++++++++++++
 4 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 340a6f2..710d4b1 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -25,6 +25,7 @@
 #include "block_int.h"
 #include "qemu-common.h"
 #include "qcow2.h"
+#include "trace.h"
 
 typedef struct Qcow2CachedTable {
     void*   table;
@@ -100,6 +101,9 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
         return 0;
     }
 
+    trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
+                                  c == s->l2_table_cache, i);
+
     if (c->depends) {
         ret = qcow2_cache_flush_dependency(bs, c);
     } else if (c->depends_on_flush) {
@@ -132,10 +136,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
 
 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
 {
+    BDRVQcowState *s = bs->opaque;
     int result = 0;
     int ret;
     int i;
 
+    trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
+
     for (i = 0; i < c->size; i++) {
         ret = qcow2_cache_entry_flush(bs, c, i);
         if (ret < 0 && result != -ENOSPC) {
@@ -218,6 +225,9 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
     int i;
     int ret;
 
+    trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
+                          offset, read_from_disk);
+
     /* Check if the table is already cached */
     for (i = 0; i < c->size; i++) {
         if (c->entries[i].offset == offset) {
@@ -227,6 +237,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
 
     /* If not, write a table back and replace it */
     i = qcow2_cache_find_entry_to_replace(c);
+    trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
+                                        c == s->l2_table_cache, i);
     if (i < 0) {
         return i;
     }
@@ -236,6 +248,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
         return ret;
     }
 
+    trace_qcow2_cache_get_read(qemu_coroutine_self(),
+                               c == s->l2_table_cache, i);
     c->entries[i].offset = 0;
     if (read_from_disk) {
         if (c == s->l2_table_cache) {
@@ -258,6 +272,10 @@ found:
     c->entries[i].cache_hits++;
     c->entries[i].ref++;
     *table = c->entries[i].table;
+
+    trace_qcow2_cache_get_done(qemu_coroutine_self(),
+                               c == s->l2_table_cache, i);
+
     return 0;
 }
 
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 07a2e93..a791bbe 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -27,6 +27,7 @@
 #include "qemu-common.h"
 #include "block_int.h"
 #include "block/qcow2.h"
+#include "trace.h"
 
 int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
 {
@@ -170,6 +171,8 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
 
     old_l2_offset = s->l1_table[l1_index];
 
+    trace_qcow2_l2_allocate(bs, l1_index);
+
     /* allocate a new l2 entry */
 
     l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
@@ -184,6 +187,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
 
     /* allocate a new entry in the l2 cache */
 
+    trace_qcow2_l2_allocate_get_empty(bs, l1_index);
     ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
     if (ret < 0) {
         return ret;
@@ -216,6 +220,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     /* write the l2 table to the file */
     BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
 
+    trace_qcow2_l2_allocate_write_l2(bs, l1_index);
     qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
     ret = qcow2_cache_flush(bs, s->l2_table_cache);
     if (ret < 0) {
@@ -223,6 +228,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     }
 
     /* update the L1 entry */
+    trace_qcow2_l2_allocate_write_l1(bs, l1_index);
     s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
     ret = write_l1_entry(bs, l1_index);
     if (ret < 0) {
@@ -230,9 +236,11 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     }
 
     *table = l2_table;
+    trace_qcow2_l2_allocate_done(bs, l1_index, 0);
     return 0;
 
 fail:
+    trace_qcow2_l2_allocate_done(bs, l1_index, ret);
     qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
     s->l1_table[l1_index] = old_l2_offset;
     return ret;
@@ -584,6 +592,8 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     uint64_t cluster_offset = m->cluster_offset;
     bool cow = false;
 
+    trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
+
     if (m->nb_clusters == 0)
         return 0;
 
@@ -695,6 +705,9 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
     unsigned int nb_clusters, i = 0;
     QCowL2Meta *old_alloc;
 
+    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
+                                      n_start, n_end);
+
     ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
     if (ret < 0) {
         return ret;
@@ -793,7 +806,7 @@ again:
     QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
 
     /* allocate a new cluster */
-
+    trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
     cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
     if (cluster_offset < 0) {
         ret = cluster_offset;
diff --git a/block/qcow2.c b/block/qcow2.c
index eb5ea48..6f53ec6 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -29,6 +29,7 @@
 #include "block/qcow2.h"
 #include "qemu-error.h"
 #include "qerror.h"
+#include "trace.h"
 
 /*
   Differences with QCOW:
@@ -569,6 +570,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         .nb_clusters = 0,
     };
 
+    trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
+                                 remaining_sectors);
+
     qemu_co_queue_init(&l2meta.dependent_requests);
 
     qemu_iovec_init(&hd_qiov, qiov->niov);
@@ -579,6 +583,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 
     while (remaining_sectors != 0) {
 
+        trace_qcow2_writev_start_part(qemu_coroutine_self());
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
         n_end = index_in_cluster + remaining_sectors;
         if (s->crypt_method &&
@@ -619,6 +624,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 
         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         qemu_co_mutex_unlock(&s->lock);
+        trace_qcow2_writev_data(qemu_coroutine_self(),
+                                (cluster_offset >> 9) + index_in_cluster);
         ret = bdrv_co_writev(bs->file,
                              (cluster_offset >> 9) + index_in_cluster,
                              cur_nr_sectors, &hd_qiov);
@@ -637,6 +644,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         remaining_sectors -= cur_nr_sectors;
         sector_num += cur_nr_sectors;
         bytes_done += cur_nr_sectors * 512;
+        trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
     }
     ret = 0;
 
@@ -647,6 +655,7 @@ fail:
 
     qemu_iovec_destroy(&hd_qiov);
     qemu_vfree(cluster_data);
+    trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
 
     return ret;
 }
diff --git a/trace-events b/trace-events
index c5d0f0f..d818ff1 100644
--- a/trace-events
+++ b/trace-events
@@ -312,6 +312,30 @@ scsi_request_sense(int target, int lun, int tag) "target %d lun %d tag %d"
 # vl.c
 vm_state_notify(int running, int reason) "running %d reason %d"
 
+# block/qcow2.c
+qcow2_writev_start_req(void *co, int64_t sector, int nb_sectors) "co %p sector %" PRIx64 " nb_sectors %d"
+qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
+qcow2_writev_start_part(void *co) "co %p"
+qcow2_writev_done_part(void *co, int cur_nr_sectors) "co %p cur_nr_sectors %d"
+qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64
+
+qcow2_alloc_clusters_offset(void *co, uint64_t offset, int n_start, int n_end) "co %p offet %" PRIx64 " n_start %d n_end %d"
+qcow2_cluster_alloc_phys(void *co) "co %p"
+qcow2_cluster_link_l2(void *co, int nb_clusters) "co %p nb_clusters %d"
+
+qcow2_l2_allocate(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_get_empty(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_write_l2(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_write_l1(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_done(void *bs, int l1_index, int ret) "bs %p l1_index %d ret %d"
+
+qcow2_cache_get(void *co, int c, uint64_t offset, bool read_from_disk) "co %p is_l2_cache %d offset %" PRIx64 " read_from_disk %d"
+qcow2_cache_get_replace_entry(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_get_read(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_get_done(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_flush(void *co, int c) "co %p is_l2_cache %d"
+qcow2_cache_entry_flush(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+
 # block/qed-l2-cache.c
 qed_alloc_l2_cache_entry(void *l2_cache, void *entry) "l2_cache %p entry %p"
 qed_unref_l2_cache_entry(void *entry, int ref) "entry %p ref %d"
-- 
cgit v1.1


From e88774971c33671477c9eb4a4cf1e65a047c9838 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Mon, 5 Mar 2012 18:10:11 +0000
Subject: block: handle -EBUSY in bdrv_commit_all()

Monitor operations that manipulate image files must not execute while a
background job (like image streaming) is in progress.  This prevents
corruptions from happening when two pieces of code are manipulating the
image file without knowledge of each other.

The monitor "commit" command raises QERR_DEVICE_IN_USE when
bdrv_commit() returns -EBUSY but "commit all" has no error handling.
This is easy to fix, although note that we do not deliver a detailed
error about which device was busy in the "commit all" case.

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c    | 8 ++++++--
 block.h    | 2 +-
 blockdev.c | 9 ++++++---
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/block.c b/block.c
index 52ffe14..b88ee90 100644
--- a/block.c
+++ b/block.c
@@ -1244,13 +1244,17 @@ ro_cleanup:
     return ret;
 }
 
-void bdrv_commit_all(void)
+int bdrv_commit_all(void)
 {
     BlockDriverState *bs;
 
     QTAILQ_FOREACH(bs, &bdrv_states, list) {
-        bdrv_commit(bs);
+        int ret = bdrv_commit(bs);
+        if (ret < 0) {
+            return ret;
+        }
     }
+    return 0;
 }
 
 struct BdrvTrackedRequest {
diff --git a/block.h b/block.h
index 48d0bf3..415bb17 100644
--- a/block.h
+++ b/block.h
@@ -165,7 +165,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs);
 int bdrv_commit(BlockDriverState *bs);
-void bdrv_commit_all(void);
+int bdrv_commit_all(void);
 int bdrv_change_backing_file(BlockDriverState *bs,
     const char *backing_file, const char *backing_fmt);
 void bdrv_register(BlockDriver *bdrv);
diff --git a/blockdev.c b/blockdev.c
index 96a893b..0e8666a 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -627,12 +627,15 @@ void do_commit(Monitor *mon, const QDict *qdict)
 {
     const char *device = qdict_get_str(qdict, "device");
     BlockDriverState *bs;
+    int ret;
 
     if (!strcmp(device, "all")) {
-        bdrv_commit_all();
+        ret = bdrv_commit_all();
+        if (ret == -EBUSY) {
+            qerror_report(QERR_DEVICE_IN_USE, device);
+            return;
+        }
     } else {
-        int ret;
-
         bs = bdrv_find(device);
         if (!bs) {
             qerror_report(QERR_DEVICE_NOT_FOUND, device);
-- 
cgit v1.1


From 259b21731050eccfe1fd4da949a9633ec7547b04 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 6 Mar 2012 12:44:45 +0100
Subject: qcow2: Add error messages in qcow2_truncate

qemu-img resize has some limitations with qcow2, but the user is only
told that "this image format does not support resize". Quite confusing,
so add some more detailed error_report() calls and change "this image
format" into "this image".

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 block/qcow2.c | 3 +++
 qemu-img.c    | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 6f53ec6..7aece65 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1120,16 +1120,19 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
     int ret, new_l1_size;
 
     if (offset & 511) {
+        error_report("The new size must be a multiple of 512");
         return -EINVAL;
     }
 
     /* cannot proceed if image has snapshots */
     if (s->nb_snapshots) {
+        error_report("Can't resize an image which has snapshots");
         return -ENOTSUP;
     }
 
     /* shrinking is currently not supported */
     if (offset < bs->total_sectors * 512) {
+        error_report("qcow2 doesn't support shrinking images yet");
         return -ENOTSUP;
     }
 
diff --git a/qemu-img.c b/qemu-img.c
index 8df3564..0e48b35 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1614,7 +1614,7 @@ static int img_resize(int argc, char **argv)
         printf("Image resized.\n");
         break;
     case -ENOTSUP:
-        error_report("This image format does not support resize");
+        error_report("This image does not support resize");
         break;
     case -EACCES:
         error_report("Image is read-only");
-- 
cgit v1.1


From 3811f63acdc0584e053d09f7070063ed0a9bafe6 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 8 Mar 2012 12:26:52 +0100
Subject: qemu-iotests: Mark some tests as quick

This creates a new test group 'quick' for some test case that take at
most a couple of seconds each, so that the group can be run during a
quick 'make check'

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/qemu-iotests/group | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index fcf869d..b549f10 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -8,32 +8,32 @@
 # test-group association ... one line per test
 #
 001 rw auto
-002 rw auto
+002 rw auto quick
 003 rw auto
-004 rw auto
+004 rw auto quick
 005 img auto
 006 img auto
 007 snapshot auto
 008 rw auto
 009 rw auto
 010 rw auto
-011 rw auto
-012 auto
+011 rw auto quick
+012 auto quick
 013 rw auto
 014 rw auto
 015 rw snapshot auto
-016 rw auto
-017 rw backing auto
+016 rw auto quick
+017 rw backing auto quick
 018 rw backing auto
-019 rw backing auto
-020 rw backing auto
+019 rw backing auto quick
+020 rw backing auto quick
 021 io auto
 022 rw snapshot auto
 023 rw auto
-024 rw backing auto
-025 rw auto
+024 rw backing auto quick
+025 rw auto quick
 026 rw blkdbg auto
-027 rw auto
+027 rw auto quick
 028 rw backing auto
-029 rw auto
+029 rw auto quick
 030 rw auto
-- 
cgit v1.1


From 8959449bb9b16e3b19cf35823d3358eb7d461210 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 8 Mar 2012 12:29:00 +0100
Subject: make check: Add qemu-iotests subset

Run the 'quick' group from qemu-iotests during 'make check'.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/Makefile              |  5 +++++
 tests/qemu-iotests-quick.sh | 17 +++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100755 tests/qemu-iotests-quick.sh

diff --git a/tests/Makefile b/tests/Makefile
index 74b29dc..571ad42 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,6 +1,9 @@
+export SRC_PATH
+
 CHECKS = check-qdict check-qfloat check-qint check-qstring check-qlist
 CHECKS += check-qjson test-qmp-output-visitor test-qmp-input-visitor
 CHECKS += test-string-input-visitor test-string-output-visitor test-coroutine
+CHECKS += $(SRC_PATH)/tests/qemu-iotests-quick.sh
 
 check-qint.o check-qstring.o check-qdict.o check-qlist.o check-qfloat.o check-qjson.o test-coroutine.o: $(GENERATED_HEADERS)
 
@@ -42,6 +45,8 @@ test-qmp-input-visitor: test-qmp-input-visitor.o $(qobject-obj-y) $(qapi-obj-y)
 test-qmp-commands.o: $(addprefix $(qapi-dir)/, test-qapi-types.c test-qapi-types.h test-qapi-visit.c test-qapi-visit.h test-qmp-marshal.c test-qmp-commands.h) $(qapi-obj-y)
 test-qmp-commands: test-qmp-commands.o $(qobject-obj-y) $(qapi-obj-y) $(tools-obj-y) $(qapi-dir)/test-qapi-visit.o $(qapi-dir)/test-qapi-types.o $(qapi-dir)/test-qmp-marshal.o module.o
 
+$(SRC_PATH)/tests/qemu-iotests-quick.sh: qemu-img qemu-io
+
 .PHONY: check
 check: $(CHECKS)
 	$(call quiet-command, gtester $(CHECKS), "  CHECK")
diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh
new file mode 100755
index 0000000..cf90de0
--- /dev/null
+++ b/tests/qemu-iotests-quick.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+# We don't know which of the system emulator binaries there is (or if there is
+# any at all), so the 'quick' group doesn't contain any tests that require
+# running qemu proper. Assign a fake binary name so that qemu-iotests doesn't
+# complain about the missing binary.
+export QEMU_PROG="this_should_be_unused"
+
+export QEMU_IMG_PROG="$(pwd)/qemu-img"
+export QEMU_IO_PROG="$(pwd)/qemu-io"
+
+cd $SRC_PATH/tests/qemu-iotests
+
+ret=0
+./check -T -nocache -qcow2 -g quick || ret=1
+
+exit $ret
-- 
cgit v1.1


From b8c6f29eb84cd3ccbbf23a5951224ae33f11116b Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 9 Mar 2012 13:37:40 +0100
Subject: Add 'make check-block'

Runs the full qemu-iotests suite for various image formats.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/Makefile       |  7 ++++++-
 tests/check-block.sh | 21 +++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100755 tests/check-block.sh

diff --git a/tests/Makefile b/tests/Makefile
index 571ad42..c78ade1 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -47,6 +47,11 @@ test-qmp-commands: test-qmp-commands.o $(qobject-obj-y) $(qapi-obj-y) $(tools-ob
 
 $(SRC_PATH)/tests/qemu-iotests-quick.sh: qemu-img qemu-io
 
-.PHONY: check
+
+.PHONY: check check-block
+
 check: $(CHECKS)
 	$(call quiet-command, gtester $(CHECKS), "  CHECK")
+
+check-block:
+	$(call quiet-command, $(SHELL) $(SRC_PATH)/tests/check-block.sh , "  CHECK")
diff --git a/tests/check-block.sh b/tests/check-block.sh
new file mode 100755
index 0000000..b9d9c6a
--- /dev/null
+++ b/tests/check-block.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+export QEMU_PROG="$(pwd)/x86_64-softmmu/qemu-system-x86_64"
+export QEMU_IMG_PROG="$(pwd)/qemu-img"
+export QEMU_IO_PROG="$(pwd)/qemu-io"
+
+if [ ! -x $QEMU_PROG ]; then
+    echo "'make check-block' requires qemu-system-x86_64"
+    exit 1
+fi
+
+cd $SRC_PATH/tests/qemu-iotests
+
+ret=0
+./check -T -nocache -raw || ret=1
+./check -T -nocache -qcow2 || ret=1
+./check -T -nocache -qed|| ret=1
+./check -T -nocache -vmdk|| ret=1
+./check -T -nocache -vpc || ret=1
+
+exit $ret
-- 
cgit v1.1


From 622d241998b6a981483594712b039190ee94eff8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 6 Mar 2012 18:55:54 +0100
Subject: use QSIMPLEQ_FOREACH_SAFE when freeing list elements

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 0e8666a..3abc1da 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -736,7 +736,7 @@ void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list,
     int ret = 0;
     SnapshotDevList *dev_entry = dev_list;
     SnapshotDev *dev_info = NULL;
-    BlkGroupSnapshotStates *states;
+    BlkGroupSnapshotStates *states, *next;
     BlockDriver *proto_drv;
     BlockDriver *drv;
     int flags;
@@ -842,7 +842,7 @@ delete_and_fail:
         }
     }
 exit:
-    QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
+    QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) {
         g_free(states);
     }
     return;
-- 
cgit v1.1


From dc8fb6df5a13f7231f88c78966f8a6c5306840b5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 6 Mar 2012 18:55:56 +0100
Subject: qapi: complete implementation of unions

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Luiz Capitulino <lcapitulino@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qapi-schema-test.json     | 10 ++++++++++
 scripts/qapi-types.py     |  6 ++++++
 scripts/qapi-visit.py     | 31 ++++++++++++++++++++++++++++++-
 test-qmp-input-visitor.c  | 18 ++++++++++++++++++
 test-qmp-output-visitor.c | 34 ++++++++++++++++++++++++++++++++++
 5 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/qapi-schema-test.json b/qapi-schema-test.json
index 2b38919..8c7f9f7 100644
--- a/qapi-schema-test.json
+++ b/qapi-schema-test.json
@@ -22,6 +22,16 @@
                        'dict2': { 'userdef1': 'UserDefOne', 'string2': 'str' },
                        '*dict3': { 'userdef2': 'UserDefOne', 'string3': 'str' } } } }
 
+# for testing unions
+{ 'type': 'UserDefA',
+  'data': { 'boolean': 'bool' } }
+
+{ 'type': 'UserDefB',
+  'data': { 'integer': 'int' } }
+
+{ 'union': 'UserDefUnion',
+  'data': { 'a' : 'UserDefA', 'b' : 'UserDefB' } }
+
 # testing commands
 { 'command': 'user_def_cmd', 'data': {} }
 { 'command': 'user_def_cmd1', 'data': {'ud1a': 'UserDefOne'} }
diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py
index b56225b..727fb77 100644
--- a/scripts/qapi-types.py
+++ b/scripts/qapi-types.py
@@ -117,6 +117,7 @@ struct %(name)s
 {
     %(name)sKind kind;
     union {
+        void *data;
 ''',
                 name=name)
 
@@ -269,6 +270,7 @@ for expr in exprs:
     elif expr.has_key('union'):
         ret += generate_fwd_struct(expr['union'], expr['data']) + "\n"
         ret += generate_enum('%sKind' % expr['union'], expr['data'].keys())
+        fdef.write(generate_enum_lookup('%sKind' % expr['union'], expr['data'].keys()))
     else:
         continue
     fdecl.write(ret)
@@ -283,6 +285,10 @@ for expr in exprs:
         fdef.write(generate_type_cleanup(expr['type']) + "\n")
     elif expr.has_key('union'):
         ret += generate_union(expr['union'], expr['data'])
+        ret += generate_type_cleanup_decl(expr['union'] + "List")
+        fdef.write(generate_type_cleanup(expr['union'] + "List") + "\n")
+        ret += generate_type_cleanup_decl(expr['union'])
+        fdef.write(generate_type_cleanup(expr['union']) + "\n")
     else:
         continue
     fdecl.write(ret)
diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py
index 5160d83..54117d4 100644
--- a/scripts/qapi-visit.py
+++ b/scripts/qapi-visit.py
@@ -110,10 +110,38 @@ def generate_visit_union(name, members):
 
 void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error **errp)
 {
-}
+    Error *err = NULL;
+
+    visit_start_struct(m, (void **)obj, "%(name)s", name, sizeof(%(name)s), &err);
+    visit_type_%(name)sKind(m, &(*obj)->kind, "type", &err);
+    if (err) {
+        error_propagate(errp, err);
+        goto end;
+    }
+    switch ((*obj)->kind) {
 ''',
                  name=name)
 
+    for key in members:
+        ret += mcgen('''
+    case %(abbrev)s_KIND_%(enum)s:
+        visit_type_%(c_type)s(m, &(*obj)->%(c_name)s, "data", errp);
+        break;
+''',
+                abbrev = de_camel_case(name).upper(),
+                enum = de_camel_case(key).upper(),
+                c_type=members[key],
+                c_name=c_var(key))
+
+    ret += mcgen('''
+    default:
+        abort();
+    }
+end:
+    visit_end_struct(m, errp);
+}
+''')
+
     return ret
 
 def generate_declaration(name, members, genlist=True):
@@ -242,6 +270,7 @@ for expr in exprs:
         fdecl.write(ret)
     elif expr.has_key('union'):
         ret = generate_visit_union(expr['union'], expr['data'])
+        ret += generate_visit_list(expr['union'], expr['data'])
         fdef.write(ret)
 
         ret = generate_decl_enum('%sKind' % expr['union'], expr['data'].keys())
diff --git a/test-qmp-input-visitor.c b/test-qmp-input-visitor.c
index 926db5c..1996e49 100644
--- a/test-qmp-input-visitor.c
+++ b/test-qmp-input-visitor.c
@@ -234,6 +234,22 @@ static void test_visitor_in_list(TestInputVisitorData *data,
     qapi_free_UserDefOneList(head);
 }
 
+static void test_visitor_in_union(TestInputVisitorData *data,
+                                  const void *unused)
+{
+    Visitor *v;
+    Error *err = NULL;
+    UserDefUnion *tmp;
+
+    v = visitor_input_test_init(data, "{ 'type': 'b', 'data' : { 'integer': 42 } }");
+
+    visit_type_UserDefUnion(v, &tmp, NULL, &err);
+    g_assert(err == NULL);
+    g_assert_cmpint(tmp->kind, ==, USER_DEF_UNION_KIND_B);
+    g_assert_cmpint(tmp->b->integer, ==, 42);
+    qapi_free_UserDefUnion(tmp);
+}
+
 static void input_visitor_test_add(const char *testpath,
                                    TestInputVisitorData *data,
                                    void (*test_func)(TestInputVisitorData *data, const void *user_data))
@@ -264,6 +280,8 @@ int main(int argc, char **argv)
                             &in_visitor_data, test_visitor_in_struct_nested);
     input_visitor_test_add("/visitor/input/list",
                             &in_visitor_data, test_visitor_in_list);
+    input_visitor_test_add("/visitor/input/union",
+                            &in_visitor_data, test_visitor_in_union);
 
     g_test_run();
 
diff --git a/test-qmp-output-visitor.c b/test-qmp-output-visitor.c
index 5452cd4..4d6c4d4 100644
--- a/test-qmp-output-visitor.c
+++ b/test-qmp-output-visitor.c
@@ -380,6 +380,38 @@ static void test_visitor_out_list_qapi_free(TestOutputVisitorData *data,
     qapi_free_UserDefNestedList(head);
 }
 
+static void test_visitor_out_union(TestOutputVisitorData *data,
+                                   const void *unused)
+{
+    QObject *arg, *qvalue;
+    QDict *qdict, *value;
+
+    Error *err = NULL;
+
+    UserDefUnion *tmp = g_malloc0(sizeof(UserDefUnion));
+    tmp->kind = USER_DEF_UNION_KIND_A;
+    tmp->a = g_malloc0(sizeof(UserDefA));
+    tmp->a->boolean = true;
+
+    visit_type_UserDefUnion(data->ov, &tmp, NULL, &err);
+    g_assert(err == NULL);
+    arg = qmp_output_get_qobject(data->qov);
+
+    g_assert(qobject_type(arg) == QTYPE_QDICT);
+    qdict = qobject_to_qdict(arg);
+
+    g_assert_cmpstr(qdict_get_str(qdict, "type"), ==, "a");
+
+    qvalue = qdict_get(qdict, "data");
+    g_assert(data != NULL);
+    g_assert(qobject_type(qvalue) == QTYPE_QDICT);
+    value = qobject_to_qdict(qvalue);
+    g_assert_cmpint(qdict_get_bool(value, "boolean"), ==, true);
+
+    qapi_free_UserDefUnion(tmp);
+    QDECREF(qdict);
+}
+
 static void output_visitor_test_add(const char *testpath,
                                     TestOutputVisitorData *data,
                                     void (*test_func)(TestOutputVisitorData *data, const void *user_data))
@@ -416,6 +448,8 @@ int main(int argc, char **argv)
                             &out_visitor_data, test_visitor_out_list);
     output_visitor_test_add("/visitor/output/list-qapi-free",
                             &out_visitor_data, test_visitor_out_list_qapi_free);
+    output_visitor_test_add("/visitor/output/union",
+                            &out_visitor_data, test_visitor_out_union);
 
     g_test_run();
 
-- 
cgit v1.1


From 52e7c241ac766406f05fa331eec9dbb33ebd2640 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 6 Mar 2012 18:55:57 +0100
Subject: rename blockdev-group-snapshot-sync

We will add other kinds of operation.  Prepare for this by adjusting
the schema.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c       | 78 +++++++++++++++++++++++++++++---------------------------
 qapi-schema.json | 42 +++++++++++++++++++-----------
 qmp-commands.hx  | 52 ++++++++++++++++++++-----------------
 3 files changed, 96 insertions(+), 76 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 3abc1da..88730c1 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -719,31 +719,24 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
 
 
 /* New and old BlockDriverState structs for group snapshots */
-typedef struct BlkGroupSnapshotStates {
+typedef struct BlkTransactionStates {
     BlockDriverState *old_bs;
     BlockDriverState *new_bs;
-    QSIMPLEQ_ENTRY(BlkGroupSnapshotStates) entry;
-} BlkGroupSnapshotStates;
+    QSIMPLEQ_ENTRY(BlkTransactionStates) entry;
+} BlkTransactionStates;
 
 /*
  * 'Atomic' group snapshots.  The snapshots are taken as a set, and if any fail
  *  then we do not pivot any of the devices in the group, and abandon the
  *  snapshots
  */
-void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list,
-                                      Error **errp)
+void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
 {
     int ret = 0;
-    SnapshotDevList *dev_entry = dev_list;
-    SnapshotDev *dev_info = NULL;
-    BlkGroupSnapshotStates *states, *next;
-    BlockDriver *proto_drv;
-    BlockDriver *drv;
-    int flags;
-    const char *format;
-    const char *snapshot_file;
+    BlockdevActionList *dev_entry = dev_list;
+    BlkTransactionStates *states, *next;
 
-    QSIMPLEQ_HEAD(snap_bdrv_states, BlkGroupSnapshotStates) snap_bdrv_states;
+    QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states;
     QSIMPLEQ_INIT(&snap_bdrv_states);
 
     /* drain all i/o before any snapshots */
@@ -751,21 +744,46 @@ void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list,
 
     /* We don't do anything in this loop that commits us to the snapshot */
     while (NULL != dev_entry) {
+        BlockdevAction *dev_info = NULL;
+        BlockDriver *proto_drv;
+        BlockDriver *drv;
+        int flags;
+        const char *device;
+        const char *format = "qcow2";
+        const char *new_image_file = NULL;
+
         dev_info = dev_entry->value;
         dev_entry = dev_entry->next;
 
-        states = g_malloc0(sizeof(BlkGroupSnapshotStates));
+        states = g_malloc0(sizeof(BlkTransactionStates));
         QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);
 
-        states->old_bs = bdrv_find(dev_info->device);
+        switch (dev_info->kind) {
+        case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
+            device = dev_info->blockdev_snapshot_sync->device;
+            if (dev_info->blockdev_snapshot_sync->has_format) {
+                format = dev_info->blockdev_snapshot_sync->format;
+            }
+            new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file;
+            break;
+        default:
+            abort();
+        }
+
+        drv = bdrv_find_format(format);
+        if (!drv) {
+            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
+            goto delete_and_fail;
+        }
 
+        states->old_bs = bdrv_find(device);
         if (!states->old_bs) {
-            error_set(errp, QERR_DEVICE_NOT_FOUND, dev_info->device);
+            error_set(errp, QERR_DEVICE_NOT_FOUND, device);
             goto delete_and_fail;
         }
 
         if (bdrv_in_use(states->old_bs)) {
-            error_set(errp, QERR_DEVICE_IN_USE, dev_info->device);
+            error_set(errp, QERR_DEVICE_IN_USE, device);
             goto delete_and_fail;
         }
 
@@ -778,44 +796,30 @@ void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list,
             }
         }
 
-        snapshot_file = dev_info->snapshot_file;
-
         flags = states->old_bs->open_flags;
 
-        if (!dev_info->has_format) {
-            format = "qcow2";
-        } else {
-            format = dev_info->format;
-        }
-
-        drv = bdrv_find_format(format);
-        if (!drv) {
-            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
-            goto delete_and_fail;
-        }
-
-        proto_drv = bdrv_find_protocol(snapshot_file);
+        proto_drv = bdrv_find_protocol(new_image_file);
         if (!proto_drv) {
             error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
             goto delete_and_fail;
         }
 
         /* create new image w/backing file */
-        ret = bdrv_img_create(snapshot_file, format,
+        ret = bdrv_img_create(new_image_file, format,
                               states->old_bs->filename,
                               states->old_bs->drv->format_name,
                               NULL, -1, flags);
         if (ret) {
-            error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
+            error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
             goto delete_and_fail;
         }
 
         /* We will manually add the backing_hd field to the bs later */
         states->new_bs = bdrv_new("");
-        ret = bdrv_open(states->new_bs, snapshot_file,
+        ret = bdrv_open(states->new_bs, new_image_file,
                         flags | BDRV_O_NO_BACKING, drv);
         if (ret != 0) {
-            error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
+            error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
             goto delete_and_fail;
         }
     }
diff --git a/qapi-schema.json b/qapi-schema.json
index 5f293c4..85de38e 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1118,7 +1118,7 @@
 { 'command': 'block_resize', 'data': { 'device': 'str', 'size': 'int' }}
 
 ##
-# @SnapshotDev
+# @BlockdevSnapshot
 #
 # @device:  the name of the device to generate the snapshot from.
 #
@@ -1126,19 +1126,30 @@
 #
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
 ##
-{ 'type': 'SnapshotDev',
-  'data': {'device': 'str', 'snapshot-file': 'str', '*format': 'str' } }
+{ 'type': 'BlockdevSnapshot',
+  'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str' } }
+
+##
+# @BlockdevAction
+#
+# A discriminated record of operations that can be performed with
+# @transaction.
+##
+{ 'union': 'BlockdevAction',
+  'data': {
+       'blockdev-snapshot-sync': 'BlockdevSnapshot',
+   } }
 
 ##
-# @blockdev-group-snapshot-sync
+# @transaction
 #
-# Generates a synchronous snapshot of a group of one or more block devices,
-# as atomically as possible.  If the snapshot of any device in the group
-# fails, then the entire group snapshot will be abandoned and the
-# appropriate error returned.
+# Atomically operate on a group of one or more block devices.  If
+# any operation fails, then the entire set of actions will be
+# abandoned and the appropriate error returned.  The only operation
+# supported is currently blockdev-snapshot-sync.
 #
 #  List of:
-#  @SnapshotDev: information needed for the device snapshot
+#  @BlockdevAction: information needed for the device snapshot
 #
 # Returns: nothing on success
 #          If @device is not a valid block device, DeviceNotFound
@@ -1147,13 +1158,14 @@
 #          If @snapshot-file can't be opened, OpenFileFailed
 #          If @format is invalid, InvalidBlockFormat
 #
-# Note: The group snapshot attempt returns failure on the first snapshot
-# device failure.  Therefore, there will be only one device or snapshot file
-# returned in an error condition, and subsequent devices will not have been
-# attempted.
+# Note: The transaction aborts on the first failure.  Therefore, there will
+# be only one device or snapshot file returned in an error condition, and
+# subsequent actions will not have been attempted.
+#
+# Since 1.1
 ##
-{ 'command': 'blockdev-group-snapshot-sync',
-  'data': { 'devlist': [ 'SnapshotDev' ] } }
+{ 'command': 'transaction',
+  'data': { 'actions': [ 'BlockdevAction' ] } }
 
 ##
 # @blockdev-snapshot-sync
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 0c9bfac..fb4f1df 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -687,41 +687,45 @@ EQMP
         .mhandler.cmd_new = qmp_marshal_input_block_job_cancel,
     },
     {
-        .name       = "blockdev-group-snapshot-sync",
-        .args_type  = "devlist:O",
-        .params  = "device:B,snapshot-file:s,format:s?",
-        .mhandler.cmd_new = qmp_marshal_input_blockdev_group_snapshot_sync,
+        .name       = "transaction",
+        .args_type  = "actions:O",
+        .mhandler.cmd_new = qmp_marshal_input_transaction,
     },
 
 SQMP
-blockdev-group-snapshot-sync
-----------------------
-
-Synchronous snapshot of one or more block devices.  A list array input
-is accepted, that contains the device and snapshot file information for
-each device in group. The default format, if not specified, is qcow2.
+transaction
+-----------
 
-If there is any failure creating or opening a new snapshot, all snapshots
-for the group are abandoned, and the original disks pre-snapshot attempt
-are used.
+Atomically operate on one or more block devices.  The only supported
+operation for now is snapshotting.  If there is any failure performing
+any of the operations, all snapshots for the group are abandoned, and
+the original disks pre-snapshot attempt are used.
 
+A list of dictionaries is accepted, that contains the actions to be performed.
+For snapshots this is the device, the file to use for the new snapshot,
+and the format.  The default format, if not specified, is qcow2.
 
 Arguments:
 
-devlist array:
-    - "device": device name to snapshot (json-string)
-    - "snapshot-file": name of new image file (json-string)
-    - "format": format of new image (json-string, optional)
+actions array:
+    - "type": the operation to perform.  The only supported
+      value is "blockdev-snapshot-sync". (json-string)
+    - "data": a dictionary.  The contents depend on the value
+      of "type".  When "type" is "blockdev-snapshot-sync":
+      - "device": device name to snapshot (json-string)
+      - "snapshot-file": name of new image file (json-string)
+      - "format": format of new image (json-string, optional)
 
 Example:
 
--> { "execute": "blockdev-group-snapshot-sync", "arguments":
-                      { "devlist": [{ "device": "ide-hd0",
-                                      "snapshot-file": "/some/place/my-image",
-                                      "format": "qcow2" },
-                                    { "device": "ide-hd1",
-                                      "snapshot-file": "/some/place/my-image2",
-                                      "format": "qcow2" }] } }
+-> { "execute": "transaction",
+     "arguments": { "actions": [
+         { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd0",
+                                         "snapshot-file": "/some/place/my-image",
+                                         "format": "qcow2" } },
+         { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd1",
+                                         "snapshot-file": "/some/place/my-image2",
+                                         "format": "qcow2" } } ] } }
 <- { "return": {} }
 
 EQMP
-- 
cgit v1.1


From bc8b094feb61c5f3ad55113f1c9b3288dd843b10 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 6 Mar 2012 18:55:58 +0100
Subject: add mode field to blockdev-snapshot-sync transaction item

The mode field lets a management application create the snapshot
destination outside QEMU.

Right now, the only modes are "existing" and "absolute-paths".  Mirroring
introduces "no-backing-file".  In the future "relative-paths" could be
implemented too.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c       | 25 ++++++++++++++++---------
 qapi-schema.json | 19 ++++++++++++++++++-
 qmp-commands.hx  | 10 ++++++++++
 3 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 88730c1..0a6edc3 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -748,9 +748,10 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
         BlockDriver *proto_drv;
         BlockDriver *drv;
         int flags;
+        enum NewImageMode mode;
+        const char *new_image_file;
         const char *device;
         const char *format = "qcow2";
-        const char *new_image_file = NULL;
 
         dev_info = dev_entry->value;
         dev_entry = dev_entry->next;
@@ -761,10 +762,14 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
         switch (dev_info->kind) {
         case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
             device = dev_info->blockdev_snapshot_sync->device;
+            if (!dev_info->blockdev_snapshot_sync->has_mode) {
+                dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+            }
+            new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file;
             if (dev_info->blockdev_snapshot_sync->has_format) {
                 format = dev_info->blockdev_snapshot_sync->format;
             }
-            new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file;
+            mode = dev_info->blockdev_snapshot_sync->mode;
             break;
         default:
             abort();
@@ -805,13 +810,15 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
         }
 
         /* create new image w/backing file */
-        ret = bdrv_img_create(new_image_file, format,
-                              states->old_bs->filename,
-                              states->old_bs->drv->format_name,
-                              NULL, -1, flags);
-        if (ret) {
-            error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
-            goto delete_and_fail;
+        if (mode != NEW_IMAGE_MODE_EXISTING) {
+            ret = bdrv_img_create(new_image_file, format,
+                                  states->old_bs->filename,
+                                  states->old_bs->drv->format_name,
+                                  NULL, -1, flags);
+            if (ret) {
+                error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
+                goto delete_and_fail;
+            }
         }
 
         /* We will manually add the backing_hd field to the bs later */
diff --git a/qapi-schema.json b/qapi-schema.json
index 85de38e..0882f43 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1118,6 +1118,22 @@
 { 'command': 'block_resize', 'data': { 'device': 'str', 'size': 'int' }}
 
 ##
+# @NewImageMode
+#
+# An enumeration that tells QEMU how to set the backing file path in
+# a new image file.
+#
+# @existing: QEMU should look for an existing image file.
+#
+# @absolute-paths: QEMU should create a new image with absolute paths
+# for the backing file.
+#
+# Since: 1.1
+##
+{ 'enum': 'NewImageMode'
+  'data': [ 'existing', 'absolute-paths' ] }
+
+##
 # @BlockdevSnapshot
 #
 # @device:  the name of the device to generate the snapshot from.
@@ -1127,7 +1143,8 @@
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
 ##
 { 'type': 'BlockdevSnapshot',
-  'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str' } }
+  'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str',
+            '*mode': 'NewImageMode' } }
 
 ##
 # @BlockdevAction
diff --git a/qmp-commands.hx b/qmp-commands.hx
index fb4f1df..7c03b62 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -705,6 +705,13 @@ A list of dictionaries is accepted, that contains the actions to be performed.
 For snapshots this is the device, the file to use for the new snapshot,
 and the format.  The default format, if not specified, is qcow2.
 
+Each new snapshot defaults to being created by QEMU (wiping any
+contents if the file already exists), but it is also possible to reuse
+an externally-created file.  In the latter case, you should ensure that
+the new image file has the same contents as the current one; QEMU cannot
+perform any meaningful check.  Typically this is achieved by using the
+current image file as the backing file for the new image.
+
 Arguments:
 
 actions array:
@@ -715,6 +722,8 @@ actions array:
       - "device": device name to snapshot (json-string)
       - "snapshot-file": name of new image file (json-string)
       - "format": format of new image (json-string, optional)
+      - "mode": whether and how QEMU should create the snapshot file
+        (NewImageMode, optional, default "absolute-paths")
 
 Example:
 
@@ -725,6 +734,7 @@ Example:
                                          "format": "qcow2" } },
          { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd1",
                                          "snapshot-file": "/some/place/my-image2",
+                                         "mode": "existing",
                                          "format": "qcow2" } } ] } }
 <- { "return": {} }
 
-- 
cgit v1.1


From 6cc2a4157b31c47303da96c5ed7836db3c10def6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 6 Mar 2012 18:55:59 +0100
Subject: qmp: convert blockdev-snapshot-sync to a wrapper around transactions

Simplify the blockdev-snapshot-sync code and gain failsafe operation
by turning it into a wrapper around the new transaction command.  A new
option is also added matching "mode".

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c       | 85 +++++++++++++++-----------------------------------------
 hmp-commands.hx  |  9 ++++--
 hmp.c            |  6 +++-
 qapi-schema.json | 15 +++++-----
 qmp-commands.hx  |  2 ++
 5 files changed, 44 insertions(+), 73 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 0a6edc3..1a500b8 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -649,72 +649,33 @@ void do_commit(Monitor *mon, const QDict *qdict)
     }
 }
 
+static void blockdev_do_action(int kind, void *data, Error **errp)
+{
+    BlockdevAction action;
+    BlockdevActionList list;
+
+    action.kind = kind;
+    action.data = data;
+    list.value = &action;
+    list.next = NULL;
+    qmp_transaction(&list, errp);
+}
+
 void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
                                 bool has_format, const char *format,
+                                bool has_mode, enum NewImageMode mode,
                                 Error **errp)
 {
-    BlockDriverState *bs;
-    BlockDriver *drv, *old_drv, *proto_drv;
-    int ret = 0;
-    int flags;
-    char old_filename[1024];
-
-    bs = bdrv_find(device);
-    if (!bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
-        return;
-    }
-    if (bdrv_in_use(bs)) {
-        error_set(errp, QERR_DEVICE_IN_USE, device);
-        return;
-    }
-
-    pstrcpy(old_filename, sizeof(old_filename), bs->filename);
-
-    old_drv = bs->drv;
-    flags = bs->open_flags;
-
-    if (!has_format) {
-        format = "qcow2";
-    }
-
-    drv = bdrv_find_format(format);
-    if (!drv) {
-        error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
-        return;
-    }
-
-    proto_drv = bdrv_find_protocol(snapshot_file);
-    if (!proto_drv) {
-        error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
-        return;
-    }
-
-    ret = bdrv_img_create(snapshot_file, format, bs->filename,
-                          bs->drv->format_name, NULL, -1, flags);
-    if (ret) {
-        error_set(errp, QERR_UNDEFINED_ERROR);
-        return;
-    }
-
-    bdrv_drain_all();
-    bdrv_flush(bs);
-
-    bdrv_close(bs);
-    ret = bdrv_open(bs, snapshot_file, flags, drv);
-    /*
-     * If reopening the image file we just created fails, fall back
-     * and try to re-open the original image. If that fails too, we
-     * are in serious trouble.
-     */
-    if (ret != 0) {
-        ret = bdrv_open(bs, old_filename, flags, old_drv);
-        if (ret != 0) {
-            error_set(errp, QERR_OPEN_FILE_FAILED, old_filename);
-        } else {
-            error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
-        }
-    }
+    BlockdevSnapshot snapshot = {
+        .device = (char *) device,
+        .snapshot_file = (char *) snapshot_file,
+        .has_format = has_format,
+        .format = (char *) format,
+        .has_mode = has_mode,
+        .mode = mode,
+    };
+    blockdev_do_action(BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC, &snapshot,
+                       errp);
 }
 
 
diff --git a/hmp-commands.hx b/hmp-commands.hx
index ed88877..6980214 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -882,14 +882,17 @@ ETEXI
 
     {
         .name       = "snapshot_blkdev",
-        .args_type  = "device:B,snapshot-file:s?,format:s?",
-        .params     = "device [new-image-file] [format]",
+        .args_type  = "reuse:-n,device:B,snapshot-file:s?,format:s?",
+        .params     = "[-n] device [new-image-file] [format]",
         .help       = "initiates a live snapshot\n\t\t\t"
                       "of device. If a new image file is specified, the\n\t\t\t"
                       "new image file will become the new root image.\n\t\t\t"
                       "If format is specified, the snapshot file will\n\t\t\t"
                       "be created in that format. Otherwise the\n\t\t\t"
-                      "snapshot will be internal! (currently unsupported)",
+                      "snapshot will be internal! (currently unsupported).\n\t\t\t"
+                      "The default format is qcow2.  The -n flag requests QEMU\n\t\t\t"
+                      "to reuse the image found in new-image-file, instead of\n\t\t\t"
+                      "recreating it from scratch.",
         .mhandler.cmd = hmp_snapshot_blkdev,
     },
 
diff --git a/hmp.c b/hmp.c
index 3a54455..290c43d 100644
--- a/hmp.c
+++ b/hmp.c
@@ -692,6 +692,8 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
     const char *device = qdict_get_str(qdict, "device");
     const char *filename = qdict_get_try_str(qdict, "snapshot-file");
     const char *format = qdict_get_try_str(qdict, "format");
+    int reuse = qdict_get_try_bool(qdict, "reuse", 0);
+    enum NewImageMode mode;
     Error *errp = NULL;
 
     if (!filename) {
@@ -702,7 +704,9 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    qmp_blockdev_snapshot_sync(device, filename, !!format, format, &errp);
+    mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+    qmp_blockdev_snapshot_sync(device, filename, !!format, format,
+                               true, mode, &errp);
     hmp_handle_error(mon, &errp);
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index 0882f43..4df6b8f 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1141,6 +1141,9 @@
 # @snapshot-file: the target of the new image. A new file will be created.
 #
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+# 'absolute-paths'.
 ##
 { 'type': 'BlockdevSnapshot',
   'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str',
@@ -1197,21 +1200,19 @@
 #
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
 #
+# @mode: #optional whether and how QEMU should create a new image, default is
+# 'absolute-paths'.
+#
 # Returns: nothing on success
 #          If @device is not a valid block device, DeviceNotFound
 #          If @snapshot-file can't be opened, OpenFileFailed
 #          If @format is invalid, InvalidBlockFormat
 #
-# Notes: One of the last steps taken by this command is to close the current
-#        image being used by @device and open the @snapshot-file one. If that
-#        fails, the command will try to reopen the original image file. If
-#        that also fails OpenFileFailed will be returned and the guest may get
-#        unexpected errors.
-#
 # Since 0.14.0
 ##
 { 'command': 'blockdev-snapshot-sync',
-  'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str' } }
+  'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str',
+            '*mode': 'NewImageMode'} }
 
 ##
 # @human-monitor-command:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 7c03b62..dfe8a5b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -760,6 +760,8 @@ Arguments:
 
 - "device": device name to snapshot (json-string)
 - "snapshot-file": name of new image file (json-string)
+- "mode": whether and how QEMU should create the snapshot file
+  (NewImageMode, optional, default "absolute-paths")
 - "format": format of new image (json-string, optional)
 
 Example:
-- 
cgit v1.1


From bf319ece56bc07608bfdf46b8ef5c61b52be83f6 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 2 Mar 2012 19:27:53 +0100
Subject: qcow2: Factor out count_cow_clusters

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 block/qcow2-cluster.c | 55 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index a791bbe..903454d 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -677,6 +677,41 @@ err:
  }
 
 /*
+ * Returns the number of contiguous clusters that can be used for an allocating
+ * write, but require COW to be performed (this includes yet unallocated space,
+ * which must copy from the backing file)
+ */
+static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
+    uint64_t *l2_table, int l2_index)
+{
+    int i = 0;
+    uint64_t cluster_offset;
+
+    while (i < nb_clusters) {
+        i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
+                &l2_table[l2_index], i, 0);
+        if ((i >= nb_clusters) || be64_to_cpu(l2_table[l2_index + i])) {
+            break;
+        }
+
+        i += count_contiguous_free_clusters(nb_clusters - i,
+                &l2_table[l2_index + i]);
+        if (i >= nb_clusters) {
+            break;
+        }
+
+        cluster_offset = be64_to_cpu(l2_table[l2_index + i]);
+
+        if ((cluster_offset & QCOW_OFLAG_COPIED) ||
+                (cluster_offset & QCOW_OFLAG_COMPRESSED))
+            break;
+    }
+
+    assert(i <= nb_clusters);
+    return i;
+}
+
+/*
  * alloc_cluster_offset
  *
  * For a given offset of the disk image, return cluster offset in qcow2 file.
@@ -739,25 +774,7 @@ again:
 
     /* how many available clusters ? */
 
-    while (i < nb_clusters) {
-        i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
-                &l2_table[l2_index], i, 0);
-        if ((i >= nb_clusters) || be64_to_cpu(l2_table[l2_index + i])) {
-            break;
-        }
-
-        i += count_contiguous_free_clusters(nb_clusters - i,
-                &l2_table[l2_index + i]);
-        if (i >= nb_clusters) {
-            break;
-        }
-
-        cluster_offset = be64_to_cpu(l2_table[l2_index + i]);
-
-        if ((cluster_offset & QCOW_OFLAG_COPIED) ||
-                (cluster_offset & QCOW_OFLAG_COMPRESSED))
-            break;
-    }
+    i = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
     assert(i <= nb_clusters);
     nb_clusters = i;
 
-- 
cgit v1.1


From 256900b16b0264af9e165bceabbf74dcece4ea38 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 2 Mar 2012 19:35:58 +0100
Subject: qcow2: Add qcow2_alloc_clusters_at()

This function allows to allocate clusters at a given offset in the image
file. This is useful if you want to allocate the second part of an area
that must be contiguous.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 block/qcow2-refcount.c | 28 ++++++++++++++++++++++++++++
 block/qcow2.h          |  2 ++
 2 files changed, 30 insertions(+)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 2db2ede..f39928a 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -582,6 +582,34 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
     return offset;
 }
 
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+    int nb_clusters)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t cluster_index;
+    int i, refcount, ret;
+
+    /* Check how many clusters there are free */
+    cluster_index = offset >> s->cluster_bits;
+    for(i = 0; i < nb_clusters; i++) {
+        refcount = get_refcount(bs, cluster_index++);
+
+        if (refcount < 0) {
+            return refcount;
+        } else if (refcount != 0) {
+            break;
+        }
+    }
+
+    /* And then allocate them */
+    ret = update_refcount(bs, offset, i << s->cluster_bits, 1);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return i;
+}
+
 /* only used to allocate compressed sectors. We try to allocate
    contiguous sectors. size must be <= cluster_size */
 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
diff --git a/block/qcow2.h b/block/qcow2.h
index fc35838..5129e3e 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -193,6 +193,8 @@ int qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);
 
 int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+    int nb_clusters);
 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
 void qcow2_free_clusters(BlockDriverState *bs,
     int64_t offset, int64_t size);
-- 
cgit v1.1


From 250196f19c6e7df12965d74a5073e10aba06c802 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 2 Mar 2012 14:10:54 +0100
Subject: qcow2: Reduce number of I/O requests

If the first part of a write request is allocated, but the second isn't
and it can be allocated so that the resulting area is contiguous, handle
it at once. This is a common case for sequential writes.

After this patch, alloc_cluster_offset() only checks if the clusters are
already allocated or how many new clusters can be allocated contigouosly.
The actual cluster allocation is split off into a new function
do_alloc_cluster_offset().

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 block/qcow2-cluster.c | 243 ++++++++++++++++++++++++++++++++++----------------
 block/qcow2.h         |   1 +
 trace-events          |   1 +
 3 files changed, 168 insertions(+), 77 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 903454d..e0fb907 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -589,7 +589,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     BDRVQcowState *s = bs->opaque;
     int i, j = 0, l2_index, ret;
     uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
-    uint64_t cluster_offset = m->cluster_offset;
+    uint64_t cluster_offset = m->alloc_offset;
     bool cow = false;
 
     trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
@@ -712,12 +712,94 @@ static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
 }
 
 /*
+ * Allocates new clusters for the given guest_offset.
+ *
+ * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
+ * contain the number of clusters that have been allocated and are contiguous
+ * in the image file.
+ *
+ * If *host_offset is non-zero, it specifies the offset in the image file at
+ * which the new clusters must start. *nb_clusters can be 0 on return in this
+ * case if the cluster at host_offset is already in use. If *host_offset is
+ * zero, the clusters can be allocated anywhere in the image file.
+ *
+ * *host_offset is updated to contain the offset into the image file at which
+ * the first allocated cluster starts.
+ *
+ * Return 0 on success and -errno in error cases. -EAGAIN means that the
+ * function has been waiting for another request and the allocation must be
+ * restarted, but the whole request should not be failed.
+ */
+static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+    uint64_t *host_offset, unsigned int *nb_clusters, uint64_t *l2_table)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t cluster_offset;
+    QCowL2Meta *old_alloc;
+
+    trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
+                                         *host_offset, *nb_clusters);
+
+    /*
+     * Check if there already is an AIO write request in flight which allocates
+     * the same cluster. In this case we need to wait until the previous
+     * request has completed and updated the L2 table accordingly.
+     */
+    QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
+
+        uint64_t start = guest_offset >> s->cluster_bits;
+        uint64_t end = start + *nb_clusters;
+        uint64_t old_start = old_alloc->offset >> s->cluster_bits;
+        uint64_t old_end = old_start + old_alloc->nb_clusters;
+
+        if (end < old_start || start > old_end) {
+            /* No intersection */
+        } else {
+            if (start < old_start) {
+                /* Stop at the start of a running allocation */
+                *nb_clusters = old_start - start;
+            } else {
+                *nb_clusters = 0;
+            }
+
+            if (*nb_clusters == 0) {
+                /* Wait for the dependency to complete. We need to recheck
+                 * the free/allocated clusters when we continue. */
+                qemu_co_mutex_unlock(&s->lock);
+                qemu_co_queue_wait(&old_alloc->dependent_requests);
+                qemu_co_mutex_lock(&s->lock);
+                return -EAGAIN;
+            }
+        }
+    }
+
+    if (!*nb_clusters) {
+        abort();
+    }
+
+    /* Allocate new clusters */
+    trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
+    if (*host_offset == 0) {
+        cluster_offset = qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
+    } else {
+        cluster_offset = *host_offset;
+        *nb_clusters = qcow2_alloc_clusters_at(bs, cluster_offset, *nb_clusters);
+    }
+
+    if (cluster_offset < 0) {
+        return cluster_offset;
+    }
+    *host_offset = cluster_offset;
+    return 0;
+}
+
+/*
  * alloc_cluster_offset
  *
- * For a given offset of the disk image, return cluster offset in qcow2 file.
- * If the offset is not found, allocate a new cluster.
+ * For a given offset on the virtual disk, find the cluster offset in qcow2
+ * file. If the offset is not found, allocate a new cluster.
  *
- * If the cluster was already allocated, m->nb_clusters is set to 0,
+ * If the cluster was already allocated, m->nb_clusters is set to 0 and
  * other fields in m are meaningless.
  *
  * If the cluster is newly allocated, m->nb_clusters is set to the number of
@@ -734,119 +816,126 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
     int n_start, int n_end, int *num, QCowL2Meta *m)
 {
     BDRVQcowState *s = bs->opaque;
-    int l2_index, ret;
+    int l2_index, ret, sectors;
     uint64_t l2_offset, *l2_table;
-    int64_t cluster_offset;
-    unsigned int nb_clusters, i = 0;
-    QCowL2Meta *old_alloc;
+    unsigned int nb_clusters, keep_clusters;
+    uint64_t cluster_offset;
 
     trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
                                       n_start, n_end);
 
+    /* Find L2 entry for the first involved cluster */
     ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
     if (ret < 0) {
         return ret;
     }
 
+    /*
+     * Calculate the number of clusters to look for. We stop at L2 table
+     * boundaries to keep things simple.
+     */
 again:
-    nb_clusters = size_to_clusters(s, n_end << 9);
-
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+    nb_clusters = MIN(size_to_clusters(s, n_end << BDRV_SECTOR_BITS),
+                      s->l2_size - l2_index);
 
     cluster_offset = be64_to_cpu(l2_table[l2_index]);
 
-    /* We keep all QCOW_OFLAG_COPIED clusters */
-
+    /*
+     * Check how many clusters are already allocated and don't need COW, and how
+     * many need a new allocation.
+     */
     if (cluster_offset & QCOW_OFLAG_COPIED) {
-        nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], 0, 0);
-
-        cluster_offset &= ~QCOW_OFLAG_COPIED;
-        m->nb_clusters = 0;
+        /* We keep all QCOW_OFLAG_COPIED clusters */
+        keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
+                                                  &l2_table[l2_index], 0, 0);
+        assert(keep_clusters <= nb_clusters);
+        nb_clusters -= keep_clusters;
+    } else {
+        /* For the moment, overwrite compressed clusters one by one */
+        if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+            nb_clusters = 1;
+        } else {
+            nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
+        }
 
-        goto out;
+        keep_clusters = 0;
+        cluster_offset = 0;
     }
 
-    /* for the moment, multiple compressed clusters are not managed */
+    cluster_offset &= ~QCOW_OFLAG_COPIED;
 
-    if (cluster_offset & QCOW_OFLAG_COMPRESSED)
-        nb_clusters = 1;
+    /* If there is something left to allocate, do that now */
+    *m = (QCowL2Meta) {
+        .cluster_offset     = cluster_offset,
+        .nb_clusters        = 0,
+    };
+    qemu_co_queue_init(&m->dependent_requests);
 
-    /* how many available clusters ? */
+    if (nb_clusters > 0) {
+        uint64_t alloc_offset;
+        uint64_t alloc_cluster_offset;
+        uint64_t keep_bytes = keep_clusters * s->cluster_size;
 
-    i = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
-    assert(i <= nb_clusters);
-    nb_clusters = i;
+        /* Calculate start and size of allocation */
+        alloc_offset = offset + keep_bytes;
 
-    /*
-     * Check if there already is an AIO write request in flight which allocates
-     * the same cluster. In this case we need to wait until the previous
-     * request has completed and updated the L2 table accordingly.
-     */
-    QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
-
-        uint64_t start = offset >> s->cluster_bits;
-        uint64_t end = start + nb_clusters;
-        uint64_t old_start = old_alloc->offset >> s->cluster_bits;
-        uint64_t old_end = old_start + old_alloc->nb_clusters;
-
-        if (end < old_start || start > old_end) {
-            /* No intersection */
+        if (keep_clusters == 0) {
+            alloc_cluster_offset = 0;
         } else {
-            if (start < old_start) {
-                /* Stop at the start of a running allocation */
-                nb_clusters = old_start - start;
-            } else {
-                nb_clusters = 0;
-            }
-
-            if (nb_clusters == 0) {
-                /* Wait for the dependency to complete. We need to recheck
-                 * the free/allocated clusters when we continue. */
-                qemu_co_mutex_unlock(&s->lock);
-                qemu_co_queue_wait(&old_alloc->dependent_requests);
-                qemu_co_mutex_lock(&s->lock);
-                goto again;
-            }
+            alloc_cluster_offset = cluster_offset + keep_bytes;
         }
-    }
 
-    if (!nb_clusters) {
-        abort();
-    }
-
-    /* save info needed for meta data update */
-    m->offset = offset;
-    m->n_start = n_start;
-    m->nb_clusters = nb_clusters;
-
-    QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
+        /* Allocate, if necessary at a given offset in the image file */
+        ret = do_alloc_cluster_offset(bs, alloc_offset, &alloc_cluster_offset,
+                                      &nb_clusters, l2_table);
+        if (ret == -EAGAIN) {
+            goto again;
+        } else if (ret < 0) {
+            goto fail;
+        }
 
-    /* allocate a new cluster */
-    trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
-    cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
-    if (cluster_offset < 0) {
-        ret = cluster_offset;
-        goto fail;
+        /* save info needed for meta data update */
+        if (nb_clusters > 0) {
+            int requested_sectors = n_end - keep_clusters * s->cluster_sectors;
+            int avail_sectors = (keep_clusters + nb_clusters)
+                                << (s->cluster_bits - BDRV_SECTOR_BITS);
+
+            *m = (QCowL2Meta) {
+                .cluster_offset = keep_clusters == 0 ?
+                                  alloc_cluster_offset : cluster_offset,
+                .alloc_offset   = alloc_cluster_offset,
+                .offset         = alloc_offset,
+                .n_start        = keep_clusters == 0 ? n_start : 0,
+                .nb_clusters    = nb_clusters,
+                .nb_available   = MIN(requested_sectors, avail_sectors),
+            };
+            qemu_co_queue_init(&m->dependent_requests);
+            QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
+        }
     }
 
-out:
+    /* Some cleanup work */
     ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
     if (ret < 0) {
         goto fail_put;
     }
 
-    m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
-    m->cluster_offset = cluster_offset;
+    sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9);
+    if (sectors > n_end) {
+        sectors = n_end;
+    }
 
-    *num = m->nb_available - n_start;
+    assert(sectors > n_start);
+    *num = sectors - n_start;
 
     return 0;
 
 fail:
     qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
 fail_put:
-    QLIST_REMOVE(m, next_in_flight);
+    if (nb_clusters > 0) {
+        QLIST_REMOVE(m, next_in_flight);
+    }
     return ret;
 }
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 5129e3e..e4ac366 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -155,6 +155,7 @@ typedef struct QCowL2Meta
 {
     uint64_t offset;
     uint64_t cluster_offset;
+    uint64_t alloc_offset;
     int n_start;
     int nb_available;
     int nb_clusters;
diff --git a/trace-events b/trace-events
index d818ff1..606d903 100644
--- a/trace-events
+++ b/trace-events
@@ -320,6 +320,7 @@ qcow2_writev_done_part(void *co, int cur_nr_sectors) "co %p cur_nr_sectors %d"
 qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64
 
 qcow2_alloc_clusters_offset(void *co, uint64_t offset, int n_start, int n_end) "co %p offet %" PRIx64 " n_start %d n_end %d"
+qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offet %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d"
 qcow2_cluster_alloc_phys(void *co) "co %p"
 qcow2_cluster_link_l2(void *co, int nb_clusters) "co %p nb_clusters %d"
 
-- 
cgit v1.1


From 3194c8ceeba06c3b54621ea7afd6879bc50c2d1d Mon Sep 17 00:00:00 2001
From: Alex Barcelo <abarcelo@ac.upc.edu>
Date: Tue, 28 Feb 2012 12:25:49 +0100
Subject: coroutine: adding sigaltstack method (.c source)

This file is based in both coroutine-ucontext.c and
pth_mctx.c (from the GNU Portable Threads library).

The mechanism used to change stacks is the sigaltstack
function (variant 2 of the pth library).

v2: Some corrections. Moving global variables into
thread storage (CoroutineThreadState).

Signed-off-by: Alex Barcelo <abarcelo@ac.upc.edu>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 coroutine-sigaltstack.c | 334 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 334 insertions(+)
 create mode 100644 coroutine-sigaltstack.c

diff --git a/coroutine-sigaltstack.c b/coroutine-sigaltstack.c
new file mode 100644
index 0000000..7ff2d33
--- /dev/null
+++ b/coroutine-sigaltstack.c
@@ -0,0 +1,334 @@
+/*
+ * sigaltstack coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011  Kevin Wolf <kwolf@redhat.com>
+ * Copyright (C) 2012  Alex Barcelo <abarcelo@ac.upc.edu>
+** This file is partly based on pth_mctx.c, from the GNU Portable Threads
+**  Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <signal.h>
+#include "qemu-common.h"
+#include "qemu-coroutine-int.h"
+
+enum {
+    /* Maximum free pool size prevents holding too many freed coroutines */
+    POOL_MAX_SIZE = 64,
+};
+
+/** Free list to speed up creation */
+static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int pool_size;
+
+typedef struct {
+    Coroutine base;
+    void *stack;
+    jmp_buf env;
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+    /** Currently executing coroutine */
+    Coroutine *current;
+
+    /** The default coroutine */
+    CoroutineUContext leader;
+
+    /** Information for the signal handler (trampoline) */
+    jmp_buf tr_reenter;
+    volatile sig_atomic_t tr_called;
+    void *tr_handler;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    if (!s) {
+        s = g_malloc0(sizeof(*s));
+        s->current = &s->leader.base;
+        pthread_setspecific(thread_state_key, s);
+    }
+    return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+    CoroutineThreadState *s = opaque;
+
+    g_free(s);
+}
+
+static void __attribute__((destructor)) coroutine_cleanup(void)
+{
+    Coroutine *co;
+    Coroutine *tmp;
+
+    QSLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) {
+        g_free(DO_UPCAST(CoroutineUContext, base, co)->stack);
+        g_free(co);
+    }
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+    int ret;
+
+    ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+    if (ret != 0) {
+        fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+        abort();
+    }
+}
+
+/* "boot" function
+ * This is what starts the coroutine, is called from the trampoline
+ * (from the signal handler when it is not signal handling, read ahead
+ * for more information).
+ */
+static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
+{
+    /* Initialize longjmp environment and switch back the caller */
+    if (!setjmp(self->env)) {
+        longjmp(*(jmp_buf *)co->entry_arg, 1);
+    }
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+/*
+ * This is used as the signal handler. This is called with the brand new stack
+ * (thanks to sigaltstack). We have to return, given that this is a signal
+ * handler and the sigmask and some other things are changed.
+ */
+static void coroutine_trampoline(int signal)
+{
+    CoroutineUContext *self;
+    Coroutine *co;
+    CoroutineThreadState *coTS;
+
+    /* Get the thread specific information */
+    coTS = coroutine_get_thread_state();
+    self = coTS->tr_handler;
+    coTS->tr_called = 1;
+    co = &self->base;
+
+    /*
+     * Here we have to do a bit of a ping pong between the caller, given that
+     * this is a signal handler and we have to do a return "soon". Then the
+     * caller can reestablish everything and do a longjmp here again.
+     */
+    if (!setjmp(coTS->tr_reenter)) {
+        return;
+    }
+
+    /*
+     * Ok, the caller has longjmp'ed back to us, so now prepare
+     * us for the real machine state switching. We have to jump
+     * into another function here to get a new stack context for
+     * the auto variables (which have to be auto-variables
+     * because the start of the thread happens later). Else with
+     * PIC (i.e. Position Independent Code which is used when PTH
+     * is built as a shared library) most platforms would
+     * horrible core dump as experience showed.
+     */
+    coroutine_bootstrap(self, co);
+}
+
+static Coroutine *coroutine_new(void)
+{
+    const size_t stack_size = 1 << 20;
+    CoroutineUContext *co;
+    CoroutineThreadState *coTS;
+    struct sigaction sa;
+    struct sigaction osa;
+    struct sigaltstack ss;
+    struct sigaltstack oss;
+    sigset_t sigs;
+    sigset_t osigs;
+    jmp_buf old_env;
+
+    /* The way to manipulate stack is with the sigaltstack function. We
+     * prepare a stack, with it delivering a signal to ourselves and then
+     * put setjmp/longjmp where needed.
+     * This has been done keeping coroutine-ucontext as a model and with the
+     * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics
+     * of the coroutines and see pth_mctx.c (from the pth project) for the
+     * sigaltstack way of manipulating stacks.
+     */
+
+    co = g_malloc0(sizeof(*co));
+    co->stack = g_malloc(stack_size);
+    co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+    coTS = coroutine_get_thread_state();
+    coTS->tr_handler = co;
+
+    /*
+     * Preserve the SIGUSR2 signal state, block SIGUSR2,
+     * and establish our signal handler. The signal will
+     * later transfer control onto the signal stack.
+     */
+    sigemptyset(&sigs);
+    sigaddset(&sigs, SIGUSR2);
+    pthread_sigmask(SIG_BLOCK, &sigs, &osigs);
+    sa.sa_handler = coroutine_trampoline;
+    sigfillset(&sa.sa_mask);
+    sa.sa_flags = SA_ONSTACK;
+    if (sigaction(SIGUSR2, &sa, &osa) != 0) {
+        abort();
+    }
+
+    /*
+     * Set the new stack.
+     */
+    ss.ss_sp = co->stack;
+    ss.ss_size = stack_size;
+    ss.ss_flags = 0;
+    if (sigaltstack(&ss, &oss) < 0) {
+        abort();
+    }
+
+    /*
+     * Now transfer control onto the signal stack and set it up.
+     * It will return immediately via "return" after the setjmp()
+     * was performed. Be careful here with race conditions.  The
+     * signal can be delivered the first time sigsuspend() is
+     * called.
+     */
+    coTS->tr_called = 0;
+    kill(getpid(), SIGUSR2);
+    sigfillset(&sigs);
+    sigdelset(&sigs, SIGUSR2);
+    while (!coTS->tr_called) {
+        sigsuspend(&sigs);
+    }
+
+    /*
+     * Inform the system that we are back off the signal stack by
+     * removing the alternative signal stack. Be careful here: It
+     * first has to be disabled, before it can be removed.
+     */
+    sigaltstack(NULL, &ss);
+    ss.ss_flags = SS_DISABLE;
+    if (sigaltstack(&ss, NULL) < 0) {
+        abort();
+    }
+    sigaltstack(NULL, &ss);
+    if (!(oss.ss_flags & SS_DISABLE)) {
+        sigaltstack(&oss, NULL);
+    }
+
+    /*
+     * Restore the old SIGUSR2 signal handler and mask
+     */
+    sigaction(SIGUSR2, &osa, NULL);
+    pthread_sigmask(SIG_SETMASK, &osigs, NULL);
+
+    /*
+     * Now enter the trampoline again, but this time not as a signal
+     * handler. Instead we jump into it directly. The functionally
+     * redundant ping-pong pointer arithmentic is neccessary to avoid
+     * type-conversion warnings related to the `volatile' qualifier and
+     * the fact that `jmp_buf' usually is an array type.
+     */
+    if (!setjmp(old_env)) {
+        longjmp(coTS->tr_reenter, 1);
+    }
+
+    /*
+     * Ok, we returned again, so now we're finished
+     */
+
+    return &co->base;
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    Coroutine *co;
+
+    co = QSLIST_FIRST(&pool);
+    if (co) {
+        QSLIST_REMOVE_HEAD(&pool, pool_next);
+        pool_size--;
+    } else {
+        co = coroutine_new();
+    }
+    return co;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+    if (pool_size < POOL_MAX_SIZE) {
+        QSLIST_INSERT_HEAD(&pool, &co->base, pool_next);
+        co->base.caller = NULL;
+        pool_size++;
+        return;
+    }
+
+    g_free(co->stack);
+    g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                                      CoroutineAction action)
+{
+    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    CoroutineThreadState *s = coroutine_get_thread_state();
+    int ret;
+
+    s->current = to_;
+
+    ret = setjmp(from->env);
+    if (ret == 0) {
+        longjmp(to->env, action);
+    }
+    return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    CoroutineThreadState *s = coroutine_get_thread_state();
+
+    return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    return s && s->current->caller;
+}
+
-- 
cgit v1.1


From 519175a2fdfd3766f99bc29f0b199e23d57a3dd2 Mon Sep 17 00:00:00 2001
From: Alex Barcelo <abarcelo@ac.upc.edu>
Date: Tue, 28 Feb 2012 12:25:50 +0100
Subject: coroutine: adding configure choose mechanism for coroutine backend

Configure tries, as a default, ucontext functions for the
coroutines. But now the user can force another backend by
--with-coroutine=BACKEND option

v2: Using --with-coroutine=BACKEND instead of enable
disable individual configure options

Signed-off-by: Alex Barcelo <abarcelo@ac.upc.edu>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 configure | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/configure b/configure
index 39d2b54..bb80822 100755
--- a/configure
+++ b/configure
@@ -194,6 +194,7 @@ opengl=""
 zlib="yes"
 guest_agent="yes"
 libiscsi=""
+coroutine=""
 
 # parse CC options first
 for opt do
@@ -784,6 +785,8 @@ for opt do
   ;;
   --with-pkgversion=*) pkgversion=" ($optarg)"
   ;;
+  --with-coroutine=*) coroutine="$optarg"
+  ;;
   --disable-docs) docs="no"
   ;;
   --enable-docs) docs="yes"
@@ -1110,6 +1113,8 @@ echo "  --disable-usb-redir      disable usb network redirection support"
 echo "  --enable-usb-redir       enable usb network redirection support"
 echo "  --disable-guest-agent    disable building of the QEMU Guest Agent"
 echo "  --enable-guest-agent     enable building of the QEMU Guest Agent"
+echo "  --with-coroutine=BACKEND coroutine backend. Supported options:"
+echo "                           gthread, ucontext, windows"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -2715,21 +2720,36 @@ EOF
 fi
 
 ##########################################
-# check if we have makecontext
-# (and that it's not a glibc stub which always returns -1)
+# check and set a backend for coroutine
 
-ucontext_coroutine=no
-if test "$darwin" != "yes"; then
-  cat > $TMPC << EOF
+# default is ucontext, but always fallback to gthread
+# windows autodetected by make
+if test "$coroutine" = "" -o "$coroutine" = "ucontext"; then
+  if test "$darwin" != "yes"; then
+    cat > $TMPC << EOF
 #include <ucontext.h>
 #ifdef __stub_makecontext
 #error Ignoring glibc stub makecontext which will always fail
 #endif
 int main(void) { makecontext(0, 0, 0); return 0; }
 EOF
-  if compile_prog "" "" ; then
-      ucontext_coroutine=yes
+    if compile_prog "" "" ; then
+        coroutine_backend=ucontext
+    else
+	coroutine_backend=gthread
+    fi
+  else
+    echo "Silently falling back into gthread backend under darwin"
   fi
+elif test "$coroutine" = "gthread" ; then
+  coroutine_backend=gthread
+elif test "$coroutine" = "windows" ; then
+  coroutine_backend=windows
+else
+  echo
+  echo "Error: unknown coroutine backend $coroutine"
+  echo
+  exit 1
 fi
 
 ##########################################
@@ -2931,6 +2951,7 @@ echo "usb net redir     $usb_redir"
 echo "OpenGL support    $opengl"
 echo "libiscsi support  $libiscsi"
 echo "build guest agent $guest_agent"
+echo "coroutine backend $coroutine_backend"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -3246,7 +3267,7 @@ if test "$rbd" = "yes" ; then
   echo "CONFIG_RBD=y" >> $config_host_mak
 fi
 
-if test "$ucontext_coroutine" = "yes" ; then
+if test "$coroutine_backend" = "ucontext" ; then
   echo "CONFIG_UCONTEXT_COROUTINE=y" >> $config_host_mak
 fi
 
-- 
cgit v1.1


From fe91bfa8a26832cc07a6b74b8decfb687499caee Mon Sep 17 00:00:00 2001
From: Alex Barcelo <abarcelo@ac.upc.edu>
Date: Tue, 28 Feb 2012 12:25:51 +0100
Subject: coroutine: adding configure option for sigaltstack coroutine backend

It's possible to use sigaltstack backend with --with-coroutine=sigaltstack

v2: changed from enable/disable configure flags

Signed-off-by: Alex Barcelo <abarcelo@ac.upc.edu>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 Makefile.objs | 4 ++++
 configure     | 6 +++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/Makefile.objs b/Makefile.objs
index b39d76c..5f0b3f7 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -17,8 +17,12 @@ coroutine-obj-y += qemu-coroutine-sleep.o
 ifeq ($(CONFIG_UCONTEXT_COROUTINE),y)
 coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o
 else
+ifeq ($(CONFIG_SIGALTSTACK_COROUTINE),y)
+coroutine-obj-$(CONFIG_POSIX) += coroutine-sigaltstack.o
+else
 coroutine-obj-$(CONFIG_POSIX) += coroutine-gthread.o
 endif
+endif
 coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 
 #######################################################################
diff --git a/configure b/configure
index bb80822..ddb3e39 100755
--- a/configure
+++ b/configure
@@ -1114,7 +1114,7 @@ echo "  --enable-usb-redir       enable usb network redirection support"
 echo "  --disable-guest-agent    disable building of the QEMU Guest Agent"
 echo "  --enable-guest-agent     enable building of the QEMU Guest Agent"
 echo "  --with-coroutine=BACKEND coroutine backend. Supported options:"
-echo "                           gthread, ucontext, windows"
+echo "                           gthread, ucontext, sigaltstack, windows"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -2745,6 +2745,8 @@ elif test "$coroutine" = "gthread" ; then
   coroutine_backend=gthread
 elif test "$coroutine" = "windows" ; then
   coroutine_backend=windows
+elif test "$coroutine" = "sigaltstack" ; then
+  coroutine_backend=sigaltstack
 else
   echo
   echo "Error: unknown coroutine backend $coroutine"
@@ -3269,6 +3271,8 @@ fi
 
 if test "$coroutine_backend" = "ucontext" ; then
   echo "CONFIG_UCONTEXT_COROUTINE=y" >> $config_host_mak
+elif test "$coroutine_backend" = "sigaltstack" ; then
+  echo "CONFIG_SIGALTSTACK_COROUTINE=y" >> $config_host_mak
 fi
 
 if test "$open_by_handle_at" = "yes" ; then
-- 
cgit v1.1


From 7e849a9919aac147a768a775014f2eff98e44323 Mon Sep 17 00:00:00 2001
From: Alex Barcelo <abarcelo@ac.upc.edu>
Date: Thu, 16 Feb 2012 13:14:06 +0100
Subject: test-coroutine: add performance test for nesting

The performance test will also check for nesting. It will do
a certain quantity of cycles, and each of one will do a depth
nesting process.

This is useful for benchmarking the creation of coroutines,
given that nesting is creation-intensive (and the other perf
test does not benchmark that).

Signed-off-by: Alex Barcelo <abarcelo@ac.upc.edu>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 test-coroutine.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/test-coroutine.c b/test-coroutine.c
index bf9f3e9..e5d14eb 100644
--- a/test-coroutine.c
+++ b/test-coroutine.c
@@ -177,6 +177,32 @@ static void perf_lifecycle(void)
     g_test_message("Lifecycle %u iterations: %f s\n", max, duration);
 }
 
+static void perf_nesting(void)
+{
+    unsigned int i, maxcycles, maxnesting;
+    double duration;
+
+    maxcycles = 100000000;
+    maxnesting = 20000;
+    Coroutine *root;
+    NestData nd = {
+        .n_enter  = 0,
+        .n_return = 0,
+        .max      = maxnesting,
+    };
+
+    g_test_timer_start();
+    for (i = 0; i < maxcycles; i++) {
+        root = qemu_coroutine_create(nest);
+        qemu_coroutine_enter(root, &nd);
+    }
+    duration = g_test_timer_elapsed();
+
+    g_test_message("Nesting %u iterations of %u depth each: %f s\n",
+        maxcycles, maxnesting, duration);
+}
+
+
 int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
@@ -187,6 +213,7 @@ int main(int argc, char **argv)
     g_test_add_func("/basic/in_coroutine", test_in_coroutine);
     if (g_test_perf()) {
         g_test_add_func("/perf/lifecycle", perf_lifecycle);
+        g_test_add_func("/perf/nesting", perf_nesting);
     }
     return g_test_run();
 }
-- 
cgit v1.1