98 files changed, 7577 insertions, 724 deletions
diff --git a/block-migration.c b/block-migration.c
index 6acf3e1..9ac7de6 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -23,7 +23,8 @@
 #include "sysemu/blockdev.h"
 #include <assert.h>
 
-#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
+#define BLOCK_SIZE                       (1 << 20)
+#define BDRV_SECTORS_PER_DIRTY_CHUNK     (BLOCK_SIZE >> BDRV_SECTOR_BITS)
 
 #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
 #define BLK_MIG_FLAG_EOS                0x02
@@ -254,7 +255,7 @@ static void set_dirty_tracking(int enable)
     BlkMigDevState *bmds;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        bdrv_set_dirty_tracking(bmds->bs, enable);
+        bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
     }
 }
 
@@ -478,7 +479,7 @@ static int64_t get_remaining_dirty(void)
         dirty += bdrv_get_dirty_count(bmds->bs);
     }
 
-    return dirty * BLOCK_SIZE;
+    return dirty << BDRV_SECTOR_BITS;
 }
 
 static void blk_mig_cleanup(void)
diff --git a/block.c b/block.c
index 6fa7c90..ba67c0d 100644
--- a/block.c
+++ b/block.c
@@ -1286,7 +1286,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
     bs_dest->iostatus           = bs_src->iostatus;
 
     /* dirty bitmap */
-    bs_dest->dirty_count        = bs_src->dirty_count;
     bs_dest->dirty_bitmap       = bs_src->dirty_bitmap;
 
     /* job */
@@ -1674,10 +1673,10 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
 /**
  * Round a region to cluster boundaries
  */
-static void round_to_clusters(BlockDriverState *bs,
-                              int64_t sector_num, int nb_sectors,
-                              int64_t *cluster_sector_num,
-                              int *cluster_nb_sectors)
+void bdrv_round_to_clusters(BlockDriverState *bs,
+                            int64_t sector_num, int nb_sectors,
+                            int64_t *cluster_sector_num,
+                            int *cluster_nb_sectors)
 {
     BlockDriverInfo bdi;
 
@@ -1719,8 +1718,8 @@ static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
      * CoR read and write operations are atomic and guest writes cannot
      * interleave between them.
      */
-    round_to_clusters(bs, sector_num, nb_sectors,
-                      &cluster_sector_num, &cluster_nb_sectors);
+    bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+                           &cluster_sector_num, &cluster_nb_sectors);
 
     do {
         retry = false;
@@ -2035,36 +2034,6 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }
 
-#define BITS_PER_LONG  (sizeof(unsigned long) * 8)
-
-static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
-                             int nb_sectors, int dirty)
-{
-    int64_t start, end;
-    unsigned long val, idx, bit;
-
-    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
-    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
-
-    for (; start <= end; start++) {
-        idx = start / BITS_PER_LONG;
-        bit = start % BITS_PER_LONG;
-        val = bs->dirty_bitmap[idx];
-        if (dirty) {
-            if (!(val & (1UL << bit))) {
-                bs->dirty_count++;
-                val |= 1UL << bit;
-            }
-        } else {
-            if (val & (1UL << bit)) {
-                bs->dirty_count--;
-                val &= ~(1UL << bit);
-            }
-        }
-        bs->dirty_bitmap[idx] = val;
-    }
-}
-
 /* Return < 0 if error. Important errors are:
   -EIO         generic I/O error (may happen for all errors)
   -ENOMEDIUM   No media inserted.
@@ -2216,8 +2185,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
     /* Cover entire cluster so no additional backing file I/O is required when
      * allocating cluster in the image file.
      */
-    round_to_clusters(bs, sector_num, nb_sectors,
-                      &cluster_sector_num, &cluster_nb_sectors);
+    bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+                           &cluster_sector_num, &cluster_nb_sectors);
 
     trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
                                    cluster_sector_num, cluster_nb_sectors);
@@ -2863,8 +2832,9 @@ BlockInfo *bdrv_query_info(BlockDriverState *bs)
     if (bs->dirty_bitmap) {
         info->has_dirty = true;
         info->dirty = g_malloc0(sizeof(*info->dirty));
-        info->dirty->count = bdrv_get_dirty_count(bs) *
-            BDRV_SECTORS_PER_DIRTY_CHUNK * BDRV_SECTOR_SIZE;
+        info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
+        info->dirty->granularity =
+            ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
     }
 
     if (bs->drv) {
@@ -4173,7 +4143,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
     }
 
     if (bs->dirty_bitmap) {
-        set_dirty_bitmap(bs, sector_num, nb_sectors, 0);
+        bdrv_reset_dirty(bs, sector_num, nb_sectors);
     }
 
     if (bs->drv->bdrv_co_discard) {
@@ -4331,22 +4301,20 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
     return true;
 }
 
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
 {
     int64_t bitmap_size;
 
-    bs->dirty_count = 0;
-    if (enable) {
-        if (!bs->dirty_bitmap) {
-            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
-                    BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
-            bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
+    assert((granularity & (granularity - 1)) == 0);
 
-            bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
-        }
+    if (granularity) {
+        granularity >>= BDRV_SECTOR_BITS;
+        assert(!bs->dirty_bitmap);
+        bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+        bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
     } else {
         if (bs->dirty_bitmap) {
-            g_free(bs->dirty_bitmap);
+            hbitmap_free(bs->dirty_bitmap);
             bs->dirty_bitmap = NULL;
         }
     }
@@ -4354,67 +4322,37 @@ void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
 
 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
 {
-    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
-
-    if (bs->dirty_bitmap &&
-        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
-        return !!(bs->dirty_bitmap[chunk / BITS_PER_LONG] &
-            (1UL << (chunk % BITS_PER_LONG)));
+    if (bs->dirty_bitmap) {
+        return hbitmap_get(bs->dirty_bitmap, sector);
     } else {
         return 0;
     }
 }
 
-int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector)
+void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
 {
-    int64_t chunk;
-    int bit, elem;
-
-    /* Avoid an infinite loop.  */
-    assert(bs->dirty_count > 0);
-
-    sector = (sector | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
-    chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
-
-    QEMU_BUILD_BUG_ON(sizeof(bs->dirty_bitmap[0]) * 8 != BITS_PER_LONG);
-    elem = chunk / BITS_PER_LONG;
-    bit = chunk % BITS_PER_LONG;
-    for (;;) {
-        if (sector >= bs->total_sectors) {
-            sector = 0;
-            bit = elem = 0;
-        }
-        if (bit == 0 && bs->dirty_bitmap[elem] == 0) {
-            sector += BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
-            elem++;
-        } else {
-            if (bs->dirty_bitmap[elem] & (1UL << bit)) {
-                return sector;
-            }
-            sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
-            if (++bit == BITS_PER_LONG) {
-                bit = 0;
-                elem++;
-            }
-        }
-    }
+    hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
 }
 
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
                     int nr_sectors)
 {
-    set_dirty_bitmap(bs, cur_sector, nr_sectors, 1);
+    hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
 }
 
 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
                       int nr_sectors)
 {
-    set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
+    hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
 }
 
 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
 {
-    return bs->dirty_count;
+    if (bs->dirty_bitmap) {
+        return hbitmap_count(bs->dirty_bitmap);
+    } else {
+        return 0;
+    }
 }
 
 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
diff --git a/block/bochs.c b/block/bochs.c
index 1b1d9cd..3737583 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -126,7 +126,7 @@ static int bochs_open(BlockDriverState *bs, int flags)
         strcmp(bochs.subtype, GROWING_TYPE) ||
 	((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
 	(le32_to_cpu(bochs.version) != HEADER_V1))) {
-        goto fail;
+        return -EMEDIUMTYPE;
     }
 
     if (le32_to_cpu(bochs.version) == HEADER_V1) {
diff --git a/block/cow.c b/block/cow.c
index a33ce95..4baf904 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -73,7 +73,7 @@ static int cow_open(BlockDriverState *bs, int flags)
     }
 
     if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
         goto fail;
     }
 
diff --git a/block/mirror.c b/block/mirror.c
index 6180aa3..a62ad86 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -15,17 +15,17 @@
 #include "block/blockjob.h"
 #include "block/block_int.h"
 #include "qemu/ratelimit.h"
+#include "qemu/bitmap.h"
 
-enum {
-    /*
-     * Size of data buffer for populating the image file.  This should be large
-     * enough to process multiple clusters in a single call, so that populating
-     * contiguous regions of the image is efficient.
-     */
-    BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
-};
+#define SLICE_TIME    100000000ULL /* ns */
+#define MAX_IN_FLIGHT 16
 
-#define SLICE_TIME 100000000ULL /* ns */
+/* The mirroring buffer is a list of granularity-sized chunks.
+ * Free chunks are organized in a list.
+ */
+typedef struct MirrorBuffer {
+    QSIMPLEQ_ENTRY(MirrorBuffer) next;
+} MirrorBuffer;
 
 typedef struct MirrorBlockJob {
     BlockJob common;
@@ -36,9 +36,26 @@ typedef struct MirrorBlockJob {
     bool synced;
     bool should_complete;
     int64_t sector_num;
+    int64_t granularity;
+    size_t buf_size;
+    unsigned long *cow_bitmap;
+    HBitmapIter hbi;
     uint8_t *buf;
+    QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
+    int buf_free_count;
+
+    unsigned long *in_flight_bitmap;
+    int in_flight;
+    int ret;
 } MirrorBlockJob;
 
+typedef struct MirrorOp {
+    MirrorBlockJob *s;
+    QEMUIOVector qiov;
+    int64_t sector_num;
+    int nb_sectors;
+} MirrorOp;
+
 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
                                             int error)
 {
@@ -52,51 +69,234 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
     }
 }
 
-static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
-                                         BlockErrorAction *p_action)
+static void mirror_iteration_done(MirrorOp *op, int ret)
 {
-    BlockDriverState *source = s->common.bs;
-    BlockDriverState *target = s->target;
-    QEMUIOVector qiov;
-    int ret, nb_sectors;
-    int64_t end;
-    struct iovec iov;
+    MirrorBlockJob *s = op->s;
+    struct iovec *iov;
+    int64_t chunk_num;
+    int i, nb_chunks, sectors_per_chunk;
+
+    trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);
+
+    s->in_flight--;
+    iov = op->qiov.iov;
+    for (i = 0; i < op->qiov.niov; i++) {
+        MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
+        QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next);
+        s->buf_free_count++;
+    }
 
-    end = s->common.len >> BDRV_SECTOR_BITS;
-    s->sector_num = bdrv_get_next_dirty(source, s->sector_num);
-    nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
-    bdrv_reset_dirty(source, s->sector_num, nb_sectors);
+    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    chunk_num = op->sector_num / sectors_per_chunk;
+    nb_chunks = op->nb_sectors / sectors_per_chunk;
+    bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
+    if (s->cow_bitmap && ret >= 0) {
+        bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
+    }
 
-    /* Copy the dirty cluster.  */
-    iov.iov_base = s->buf;
-    iov.iov_len  = nb_sectors * 512;
-    qemu_iovec_init_external(&qiov, &iov, 1);
+    g_slice_free(MirrorOp, op);
+    qemu_coroutine_enter(s->common.co, NULL);
+}
 
-    trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
-    ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
+static void mirror_write_complete(void *opaque, int ret)
+{
+    MirrorOp *op = opaque;
+    MirrorBlockJob *s = op->s;
     if (ret < 0) {
-        *p_action = mirror_error_action(s, true, -ret);
-        goto fail;
+        BlockDriverState *source = s->common.bs;
+        BlockErrorAction action;
+
+        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+        action = mirror_error_action(s, false, -ret);
+        if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
+            s->ret = ret;
+        }
     }
-    ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+    mirror_iteration_done(op, ret);
+}
+
+static void mirror_read_complete(void *opaque, int ret)
+{
+    MirrorOp *op = opaque;
+    MirrorBlockJob *s = op->s;
     if (ret < 0) {
-        *p_action = mirror_error_action(s, false, -ret);
-        s->synced = false;
-        goto fail;
+        BlockDriverState *source = s->common.bs;
+        BlockErrorAction action;
+
+        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+        action = mirror_error_action(s, true, -ret);
+        if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
+            s->ret = ret;
+        }
+
+        mirror_iteration_done(op, ret);
+        return;
+    }
+    bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
+                    mirror_write_complete, op);
+}
+
+static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
+{
+    BlockDriverState *source = s->common.bs;
+    int nb_sectors, sectors_per_chunk, nb_chunks;
+    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
+    MirrorOp *op;
+
+    s->sector_num = hbitmap_iter_next(&s->hbi);
+    if (s->sector_num < 0) {
+        bdrv_dirty_iter_init(source, &s->hbi);
+        s->sector_num = hbitmap_iter_next(&s->hbi);
+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
+        assert(s->sector_num >= 0);
+    }
+
+    hbitmap_next_sector = s->sector_num;
+    sector_num = s->sector_num;
+    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    end = s->common.len >> BDRV_SECTOR_BITS;
+
+    /* Extend the QEMUIOVector to include all adjacent blocks that will
+     * be copied in this operation.
+     *
+     * We have to do this if we have no backing file yet in the destination,
+     * and the cluster size is very large.  Then we need to do COW ourselves.
+     * The first time a cluster is copied, copy it entirely.  Note that,
+     * because both the granularity and the cluster size are powers of two,
+     * the number of sectors to copy cannot exceed one cluster.
+     *
+     * We also want to extend the QEMUIOVector to include more adjacent
+     * dirty blocks if possible, to limit the number of I/O operations and
+     * run efficiently even with a small granularity.
+     */
+    nb_chunks = 0;
+    nb_sectors = 0;
+    next_sector = sector_num;
+    next_chunk = sector_num / sectors_per_chunk;
+
+    /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
+    while (test_bit(next_chunk, s->in_flight_bitmap)) {
+        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+        qemu_coroutine_yield();
+    }
+
+    do {
+        int added_sectors, added_chunks;
+
+        if (!bdrv_get_dirty(source, next_sector) ||
+            test_bit(next_chunk, s->in_flight_bitmap)) {
+            assert(nb_sectors > 0);
+            break;
+        }
+
+        added_sectors = sectors_per_chunk;
+        if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
+            bdrv_round_to_clusters(s->target,
+                                   next_sector, added_sectors,
+                                   &next_sector, &added_sectors);
+
+            /* On the first iteration, the rounding may make us copy
+             * sectors before the first dirty one.
+             */
+            if (next_sector < sector_num) {
+                assert(nb_sectors == 0);
+                sector_num = next_sector;
+                next_chunk = next_sector / sectors_per_chunk;
+            }
+        }
+
+        added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
+        added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
+
+        /* When doing COW, it may happen that there is not enough space for
+         * a full cluster.  Wait if that is the case.
+         */
+        while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
+            trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
+            qemu_coroutine_yield();
+        }
+        if (s->buf_free_count < nb_chunks + added_chunks) {
+            trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
+            break;
+        }
+
+        /* We have enough free space to copy these sectors.  */
+        bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
+
+        nb_sectors += added_sectors;
+        nb_chunks += added_chunks;
+        next_sector += added_sectors;
+        next_chunk += added_chunks;
+    } while (next_sector < end);
+
+    /* Allocate a MirrorOp that is used as an AIO callback.  */
+    op = g_slice_new(MirrorOp);
+    op->s = s;
+    op->sector_num = sector_num;
+    op->nb_sectors = nb_sectors;
+
+    /* Now make a QEMUIOVector taking enough granularity-sized chunks
+     * from s->buf_free.
+     */
+    qemu_iovec_init(&op->qiov, nb_chunks);
+    next_sector = sector_num;
+    while (nb_chunks-- > 0) {
+        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
+        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
+        s->buf_free_count--;
+        qemu_iovec_add(&op->qiov, buf, s->granularity);
+
+        /* Advance the HBitmapIter in parallel, so that we do not examine
+         * the same sector twice.
+         */
+        if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
+            hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
+        }
+
+        next_sector += sectors_per_chunk;
     }
-    return 0;
 
-fail:
-    /* Try again later.  */
-    bdrv_set_dirty(source, s->sector_num, nb_sectors);
-    return ret;
+    bdrv_reset_dirty(source, sector_num, nb_sectors);
+
+    /* Copy the dirty cluster.  */
+    s->in_flight++;
+    trace_mirror_one_iteration(s, sector_num, nb_sectors);
+    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+                   mirror_read_complete, op);
+}
+
+static void mirror_free_init(MirrorBlockJob *s)
+{
+    int granularity = s->granularity;
+    size_t buf_size = s->buf_size;
+    uint8_t *buf = s->buf;
+
+    assert(s->buf_free_count == 0);
+    QSIMPLEQ_INIT(&s->buf_free);
+    while (buf_size != 0) {
+        MirrorBuffer *cur = (MirrorBuffer *)buf;
+        QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next);
+        s->buf_free_count++;
+        buf_size -= granularity;
+        buf += granularity;
+    }
+}
+
+static void mirror_drain(MirrorBlockJob *s)
+{
+    while (s->in_flight > 0) {
+        qemu_coroutine_yield();
+    }
 }
 
 static void coroutine_fn mirror_run(void *opaque)
 {
     MirrorBlockJob *s = opaque;
     BlockDriverState *bs = s->common.bs;
-    int64_t sector_num, end;
+    int64_t sector_num, end, sectors_per_chunk, length;
+    uint64_t last_pause_ns;
+    BlockDriverInfo bdi;
+    char backing_filename[1024];
     int ret = 0;
     int n;
 
@@ -105,20 +305,39 @@ static void coroutine_fn mirror_run(void *opaque)
     }
 
     s->common.len = bdrv_getlength(bs);
-    if (s->common.len < 0) {
+    if (s->common.len <= 0) {
         block_job_completed(&s->common, s->common.len);
         return;
     }
 
+    length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
+    s->in_flight_bitmap = bitmap_new(length);
+
+    /* If we have no backing file yet in the destination, we cannot let
+     * the destination do COW.  Instead, we copy sectors around the
+     * dirty data if needed.  We need a bitmap to do that.
+     */
+    bdrv_get_backing_filename(s->target, backing_filename,
+                              sizeof(backing_filename));
+    if (backing_filename[0] && !s->target->backing_hd) {
+        bdrv_get_info(s->target, &bdi);
+        if (s->granularity < bdi.cluster_size) {
+            s->buf_size = MAX(s->buf_size, bdi.cluster_size);
+            s->cow_bitmap = bitmap_new(length);
+        }
+    }
+
     end = s->common.len >> BDRV_SECTOR_BITS;
-    s->buf = qemu_blockalign(bs, BLOCK_SIZE);
+    s->buf = qemu_blockalign(bs, s->buf_size);
+    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    mirror_free_init(s);
 
     if (s->mode != MIRROR_SYNC_MODE_NONE) {
         /* First part, loop on the sectors and initialize the dirty bitmap.  */
         BlockDriverState *base;
         base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
         for (sector_num = 0; sector_num < end; ) {
-            int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
+            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
             ret = bdrv_co_is_allocated_above(bs, base,
                                              sector_num, next - sector_num, &n);
 
@@ -136,24 +355,40 @@ static void coroutine_fn mirror_run(void *opaque)
         }
     }
 
-    s->sector_num = -1;
+    bdrv_dirty_iter_init(bs, &s->hbi);
+    last_pause_ns = qemu_get_clock_ns(rt_clock);
     for (;;) {
         uint64_t delay_ns;
         int64_t cnt;
         bool should_complete;
 
+        if (s->ret < 0) {
+            ret = s->ret;
+            goto immediate_exit;
+        }
+
         cnt = bdrv_get_dirty_count(bs);
-        if (cnt != 0) {
-            BlockErrorAction action = BDRV_ACTION_REPORT;
-            ret = mirror_iteration(s, &action);
-            if (ret < 0 && action == BDRV_ACTION_REPORT) {
-                goto immediate_exit;
+
+        /* Note that even when no rate limit is applied we need to yield
+         * periodically with no pending I/O so that qemu_aio_flush() returns.
+         * We do so every SLICE_TIME nanoseconds, or when there is an error,
+         * or when the source is clean, whichever comes first.
+         */
+        if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME &&
+            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
+                (cnt == 0 && s->in_flight > 0)) {
+                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
+                qemu_coroutine_yield();
+                continue;
+            } else if (cnt != 0) {
+                mirror_iteration(s);
+                continue;
             }
-            cnt = bdrv_get_dirty_count(bs);
         }
 
         should_complete = false;
-        if (cnt == 0) {
+        if (s->in_flight == 0 && cnt == 0) {
             trace_mirror_before_flush(s);
             ret = bdrv_flush(s->target);
             if (ret < 0) {
@@ -196,23 +431,20 @@ static void coroutine_fn mirror_run(void *opaque)
         trace_mirror_before_sleep(s, cnt, s->synced);
         if (!s->synced) {
             /* Publish progress */
-            s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
+            s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
 
             if (s->common.speed) {
-                delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
+                delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
             } else {
                 delay_ns = 0;
             }
 
-            /* Note that even when no rate limit is applied we need to yield
-             * with no pending I/O here so that bdrv_drain_all() returns.
-             */
             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
             if (block_job_is_cancelled(&s->common)) {
                 break;
             }
         } else if (!should_complete) {
-            delay_ns = (cnt == 0 ? SLICE_TIME : 0);
+            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
         } else if (cnt == 0) {
             /* The two disks are in sync.  Exit and report successful
@@ -222,11 +454,24 @@ static void coroutine_fn mirror_run(void *opaque)
             s->common.cancelled = false;
             break;
         }
+        last_pause_ns = qemu_get_clock_ns(rt_clock);
     }
 
 immediate_exit:
+    if (s->in_flight > 0) {
+        /* We get here only if something went wrong.  Either the job failed,
+         * or it was cancelled prematurely so that we do not guarantee that
+         * the target is a copy of the source.
+         */
+        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
+        mirror_drain(s);
+    }
+
+    assert(s->in_flight == 0);
     qemu_vfree(s->buf);
-    bdrv_set_dirty_tracking(bs, false);
+    g_free(s->cow_bitmap);
+    g_free(s->in_flight_bitmap);
+    bdrv_set_dirty_tracking(bs, 0);
     bdrv_iostatus_disable(s->target);
     if (s->should_complete && ret == 0) {
         if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
@@ -288,14 +533,28 @@ static BlockJobType mirror_job_type = {
 };
 
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, MirrorSyncMode mode,
-                  BlockdevOnError on_source_error,
+                  int64_t speed, int64_t granularity, int64_t buf_size,
+                  MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp)
 {
     MirrorBlockJob *s;
 
+    if (granularity == 0) {
+        /* Choose the default granularity based on the target file's cluster
+         * size, clamped between 4k and 64k.  */
+        BlockDriverInfo bdi;
+        if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
+            granularity = MAX(4096, bdi.cluster_size);
+            granularity = MIN(65536, granularity);
+        } else {
+            granularity = 65536;
+        }
+    }
+
+    assert ((granularity & (granularity - 1)) == 0);
+
     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
         !bdrv_iostatus_is_enabled(bs)) {
@@ -312,7 +571,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     s->on_target_error = on_target_error;
     s->target = target;
     s->mode = mode;
-    bdrv_set_dirty_tracking(bs, true);
+    s->granularity = granularity;
+    s->buf_size = MAX(buf_size, granularity);
+
+    bdrv_set_dirty_tracking(bs, granularity);
     bdrv_set_enable_write_cache(s->target, true);
     bdrv_set_on_error(s->target, on_target_error, on_target_error);
     bdrv_iostatus_enable(s->target);
diff --git a/block/qcow.c b/block/qcow.c
index 4276610..a7135ee 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -112,7 +112,7 @@ static int qcow_open(BlockDriverState *bs, int flags)
     be64_to_cpus(&header.l1_table_offset);
 
     if (header.magic != QCOW_MAGIC) {
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
         goto fail;
     }
     if (header.version != QCOW_VERSION) {
diff --git a/block/qcow2.c b/block/qcow2.c
index f6abff6..7610e56 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -311,7 +311,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
     be32_to_cpus(&header.nb_snapshots);
 
     if (header.magic != QCOW_MAGIC) {
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
         goto fail;
     }
     if (header.version < 2 || header.version > 3) {
diff --git a/block/qed.c b/block/qed.c
index cf85d8f..b8515e5 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -390,7 +390,7 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
     qed_header_le_to_cpu(&le_header, &s->header);
 
     if (s->header.magic != QED_MAGIC) {
-        return -EINVAL;
+        return -EMEDIUMTYPE;
     }
     if (s->header.features & ~QED_FEATURE_MASK) {
         /* image uses unsupported feature bits */
diff --git a/block/vdi.c b/block/vdi.c
index 021abaa..257a592 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -246,7 +246,7 @@ static void vdi_header_print(VdiHeader *header)
 {
     char uuid[37];
     logout("text        %s", header->text);
-    logout("signature   0x%04x\n", header->signature);
+    logout("signature   0x%08x\n", header->signature);
     logout("header size 0x%04x\n", header->header_size);
     logout("image type  0x%04x\n", header->image_type);
     logout("image flags 0x%04x\n", header->image_flags);
@@ -369,10 +369,12 @@ static int vdi_open(BlockDriverState *bs, int flags)
     BDRVVdiState *s = bs->opaque;
     VdiHeader header;
     size_t bmap_size;
+    int ret;
 
     logout("\n");
 
-    if (bdrv_read(bs->file, 0, (uint8_t *)&header, 1) < 0) {
+    ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
+    if (ret < 0) {
         goto fail;
     }
 
@@ -390,33 +392,45 @@ static int vdi_open(BlockDriverState *bs, int flags)
         header.disk_size &= ~(SECTOR_SIZE - 1);
     }
 
-    if (header.version != VDI_VERSION_1_1) {
+    if (header.signature != VDI_SIGNATURE) {
+        logout("bad vdi signature %08x\n", header.signature);
+        ret = -EMEDIUMTYPE;
+        goto fail;
+    } else if (header.version != VDI_VERSION_1_1) {
         logout("unsupported version %u.%u\n",
                header.version >> 16, header.version & 0xffff);
+        ret = -ENOTSUP;
         goto fail;
     } else if (header.offset_bmap % SECTOR_SIZE != 0) {
         /* We only support block maps which start on a sector boundary. */
         logout("unsupported block map offset 0x%x B\n", header.offset_bmap);
+        ret = -ENOTSUP;
         goto fail;
     } else if (header.offset_data % SECTOR_SIZE != 0) {
         /* We only support data blocks which start on a sector boundary. */
         logout("unsupported data offset 0x%x B\n", header.offset_data);
+        ret = -ENOTSUP;
         goto fail;
     } else if (header.sector_size != SECTOR_SIZE) {
         logout("unsupported sector size %u B\n", header.sector_size);
+        ret = -ENOTSUP;
         goto fail;
     } else if (header.block_size != 1 * MiB) {
         logout("unsupported block size %u B\n", header.block_size);
+        ret = -ENOTSUP;
         goto fail;
     } else if (header.disk_size >
                (uint64_t)header.blocks_in_image * header.block_size) {
         logout("unsupported disk size %" PRIu64 " B\n", header.disk_size);
+        ret = -ENOTSUP;
         goto fail;
     } else if (!uuid_is_null(header.uuid_link)) {
         logout("link uuid != 0, unsupported\n");
+        ret = -ENOTSUP;
         goto fail;
     } else if (!uuid_is_null(header.uuid_parent)) {
         logout("parent uuid != 0, unsupported\n");
+        ret = -ENOTSUP;
         goto fail;
     }
 
@@ -432,7 +446,8 @@ static int vdi_open(BlockDriverState *bs, int flags)
     if (bmap_size > 0) {
         s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
     }
-    if (bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size) < 0) {
+    ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
+    if (ret < 0) {
         goto fail_free_bmap;
     }
 
@@ -448,7 +463,7 @@ static int vdi_open(BlockDriverState *bs, int flags)
     g_free(s->bmap);
 
  fail:
-    return -1;
+    return ret;
 }
 
 static int vdi_reopen_prepare(BDRVReopenState *state,
diff --git a/block/vmdk.c b/block/vmdk.c
index 19298c2..8333afb 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -616,7 +616,7 @@ static int vmdk_open_sparse(BlockDriverState *bs,
             return vmdk_open_vmdk4(bs, file, flags);
             break;
         default:
-            return -EINVAL;
+            return -EMEDIUMTYPE;
             break;
     }
 }
@@ -718,7 +718,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
     }
     buf[2047] = '\0';
     if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
-        return -EINVAL;
+        return -EMEDIUMTYPE;
     }
     if (strcmp(ct, "monolithicFlat") &&
         strcmp(ct, "twoGbMaxExtentSparse") &&
diff --git a/blockdev.c b/blockdev.c
index 9126587..63e6f1e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -617,8 +617,13 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
 
     ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv);
     if (ret < 0) {
-        error_report("could not open disk image %s: %s",
-                     file, strerror(-ret));
+        if (ret == -EMEDIUMTYPE) {
+            error_report("could not open disk image %s: not in %s format",
+                         file, drv->format_name);
+        } else {
+            error_report("could not open disk image %s: %s",
+                         file, strerror(-ret));
+        }
         goto err;
     }
 
@@ -642,21 +647,17 @@ void do_commit(Monitor *mon, const QDict *qdict)
 
     if (!strcmp(device, "all")) {
         ret = bdrv_commit_all();
-        if (ret == -EBUSY) {
-            qerror_report(QERR_DEVICE_IN_USE, device);
-            return;
-        }
     } else {
         bs = bdrv_find(device);
         if (!bs) {
-            qerror_report(QERR_DEVICE_NOT_FOUND, device);
+            monitor_printf(mon, "Device '%s' not found\n", device);
             return;
         }
         ret = bdrv_commit(bs);
-        if (ret == -EBUSY) {
-            qerror_report(QERR_DEVICE_IN_USE, device);
-            return;
-        }
+    }
+    if (ret < 0) {
+        monitor_printf(mon, "'commit' error for '%s': %s\n", device,
+                       strerror(-ret));
     }
 }
 
@@ -1188,16 +1189,19 @@ void qmp_block_commit(const char *device,
     drive_get_ref(drive_get_by_blockdev(bs));
 }
 
+#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
+
 void qmp_drive_mirror(const char *device, const char *target,
                       bool has_format, const char *format,
                       enum MirrorSyncMode sync,
                       bool has_mode, enum NewImageMode mode,
                       bool has_speed, int64_t speed,
+                      bool has_granularity, uint32_t granularity,
+                      bool has_buf_size, int64_t buf_size,
                       bool has_on_source_error, BlockdevOnError on_source_error,
                       bool has_on_target_error, BlockdevOnError on_target_error,
                       Error **errp)
 {
-    BlockDriverInfo bdi;
     BlockDriverState *bs;
     BlockDriverState *source, *target_bs;
     BlockDriver *proto_drv;
@@ -1219,6 +1223,21 @@ void qmp_drive_mirror(const char *device, const char *target,
     if (!has_mode) {
         mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
     }
+    if (!has_granularity) {
+        granularity = 0;
+    }
+    if (!has_buf_size) {
+        buf_size = DEFAULT_MIRROR_BUF_SIZE;
+    }
+
+    if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
+        error_set(errp, QERR_INVALID_PARAMETER, device);
+        return;
+    }
+    if (granularity & (granularity - 1)) {
+        error_set(errp, QERR_INVALID_PARAMETER, device);
+        return;
+    }
 
     bs = bdrv_find(device);
     if (!bs) {
@@ -1259,11 +1278,11 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
+    bdrv_get_geometry(bs, &size);
+    size *= 512;
     if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) {
         /* create new image w/o backing file */
         assert(format && drv);
-        bdrv_get_geometry(bs, &size);
-        size *= 512;
         bdrv_img_create(target, format,
                         NULL, NULL, NULL, size, flags, &local_err);
     } else {
@@ -1276,7 +1295,7 @@ void qmp_drive_mirror(const char *device, const char *target,
             bdrv_img_create(target, format,
                             source->filename,
                             source->drv->format_name,
-                            NULL, -1, flags, &local_err);
+                            NULL, size, flags, &local_err);
             break;
         default:
             abort();
@@ -1288,6 +1307,9 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
+    /* Mirroring takes care of copy-on-write using the source's backing
+     * file.
+     */
     target_bs = bdrv_new("");
     ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv);
 
@@ -1297,18 +1319,8 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
-    /* We need a backing file if we will copy parts of a cluster.  */
-    if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 &&
-        bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) {
-        ret = bdrv_open_backing_file(target_bs);
-        if (ret < 0) {
-            bdrv_delete(target_bs);
-            error_set(errp, QERR_OPEN_FILE_FAILED, target);
-            return;
-        }
-    }
-
-    mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
+    mirror_start(bs, target_bs, speed, granularity, buf_size, sync,
+                 on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
         bdrv_delete(target_bs);
diff --git a/cpus.c b/cpus.c
index a4390c3..41779eb 100644
--- a/cpus.c
+++ b/cpus.c
@@ -517,7 +517,7 @@ static void qemu_init_sigbus(void)
     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
 }
 
-static void qemu_kvm_eat_signals(CPUArchState *env)
+static void qemu_kvm_eat_signals(CPUState *cpu)
 {
     struct timespec ts = { 0, 0 };
     siginfo_t siginfo;
@@ -538,7 +538,7 @@ static void qemu_kvm_eat_signals(CPUArchState *env)
 
         switch (r) {
         case SIGBUS:
-            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
+            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
                 sigbus_reraise();
             }
             break;
@@ -560,7 +560,7 @@ static void qemu_init_sigbus(void)
 {
 }
 
-static void qemu_kvm_eat_signals(CPUArchState *env)
+static void qemu_kvm_eat_signals(CPUState *cpu)
 {
 }
 #endif /* !CONFIG_LINUX */
@@ -727,7 +727,7 @@ static void qemu_kvm_wait_io_event(CPUArchState *env)
         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
     }
 
-    qemu_kvm_eat_signals(env);
+    qemu_kvm_eat_signals(cpu);
     qemu_wait_io_event_common(cpu);
 }
 
diff --git a/docs/virtio-balloon-stats.txt b/docs/virtio-balloon-stats.txt
new file mode 100644
index 0000000..f74612f
--- /dev/null
+++ b/docs/virtio-balloon-stats.txt
@@ -0,0 +1,104 @@
+virtio balloon memory statistics
+================================
+
+The virtio balloon driver supports guest memory statistics reporting. These
+statistics are available to QEMU users as QOM (QEMU Object Model) device
+properties via a polling mechanism.
+
+Before querying the available stats, clients first have to enable polling.
+This is done by writing a time interval value (in seconds) to the
+guest-stats-polling-interval property. This value can be:
+
+  > 0  enables polling in the specified interval. If polling is already
+       enabled, the polling time interval is changed to the new value
+
+  0    disables polling. Previous polled statistics are still valid and
+       can be queried.
+
+Once polling is enabled, the virtio-balloon device in QEMU will start
+polling the guest's balloon driver for new stats in the specified time
+interval.
+
+To retrieve those stats, clients have to query the guest-stats property,
+which will return a dictionary containing:
+
+  o A key named 'stats', containing all available stats. If the guest
+    doesn't support a particular stat, or if it couldn't be retrieved,
+    its value will be -1. Currently, the following stats are supported:
+
+      - stat-swap-in
+      - stat-swap-out
+      - stat-major-faults
+      - stat-minor-faults
+      - stat-free-memory
+      - stat-total-memory
+
+  o A key named last-update, which contains the last stats update
+    timestamp in seconds. Since this timestamp is generated by the host,
+    a buggy guest can't influence its value
+
+It's also important to note the following:
+
+ - Previously polled statistics remain available even if the polling is
+   later disabled
+
+ - As noted above, if a guest doesn't support a particular stat its value
+   will always be -1. However, it's also possible that a guest temporarily
+   couldn't update one or even all stats. If this happens, just wait for
+   the next update
+
+ - Polling can be enabled even if the guest doesn't have stats support
+   or the balloon driver wasn't loaded in the guest. If this is the case
+   and stats are queried, an error will be returned
+
+ - The polling timer is only re-armed when the guest responds to the
+   statistics request. This means that if a (buggy) guest doesn't ever
+   respond to the request the timer will never be re-armed, which has
+   the same effect as disabling polling
+
+Here are a few examples. QEMU is started with '-balloon virtio', which
+generates '/machine/peripheral-anon/device[1]' as the QOM path for the
+balloon device.
+
+Enable polling with 2 seconds interval:
+
+{ "execute": "qom-set",
+             "arguments": { "path": "/machine/peripheral-anon/device[1]",
+			 "property": "guest-stats-polling-interval", "value": 2 } }
+
+{ "return": {} }
+
+Change polling to 10 seconds:
+
+{ "execute": "qom-set",
+             "arguments": { "path": "/machine/peripheral-anon/device[1]",
+			 "property": "guest-stats-polling-interval", "value": 10 } }
+
+{ "return": {} }
+
+Get stats:
+
+{ "execute": "qom-get",
+  "arguments": { "path": "/machine/peripheral-anon/device[1]",
+  "property": "guest-stats" } }
+{
+    "return": {
+        "stats": {
+            "stat-swap-out": 0,
+            "stat-free-memory": 844943360,
+            "stat-minor-faults": 219028,
+            "stat-major-faults": 235,
+            "stat-total-memory": 1044406272,
+            "stat-swap-in": 0
+        },
+        "last-update": 1358529861
+    }
+}
+
+Disable polling:
+
+{ "execute": "qom-set",
+             "arguments": { "path": "/machine/peripheral-anon/device[1]",
+			 "property": "stats-polling-interval", "value": 0 } }
+
+{ "return": {} }
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 0934b9b..bdd48f3 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -837,6 +837,45 @@ STEXI
 @item nmi @var{cpu}
 @findex nmi
 Inject an NMI on the given CPU (x86 only).
+
+ETEXI
+
+    {
+        .name       = "memchar_write",
+        .args_type  = "device:s,data:s",
+        .params     = "device data",
+        .help       = "Provide writing interface for CirMemCharDriver. Write"
+                      "'data' to it.",
+        .mhandler.cmd = hmp_memchar_write,
+    },
+
+STEXI
+@item memchar_write @var{device} @var{data}
+@findex memchar_write
+Provide writing interface for CirMemCharDriver. Write @var{data}
+to char device 'memory'.
+
+ETEXI
+
+    {
+        .name       = "memchar_read",
+        .args_type  = "device:s,size:i",
+        .params     = "device size",
+        .help       = "Provide read interface for CirMemCharDriver. Read from"
+                      "it and return the data with size.",
+        .mhandler.cmd = hmp_memchar_read,
+    },
+
+STEXI
+@item memchar_read @var{device}
+@findex memchar_read
+Provide read interface for CirMemCharDriver. Read from char device
+'memory' and return the data.
+
+@var{size} is the size of data want to read from. Refer to unencoded
+size of the raw data, would adjust to the init size of the memchar
+if the requested size is larger than it.
+
 ETEXI
 
     {
diff --git a/hmp.c b/hmp.c
index c7b6ba0..249b89b 100644
--- a/hmp.c
+++ b/hmp.c
@@ -465,29 +465,7 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
-    if (info->has_mem_swapped_in) {
-        monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
-    }
-    if (info->has_mem_swapped_out) {
-        monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
-    }
-    if (info->has_major_page_faults) {
-        monitor_printf(mon, " major_page_faults=%" PRId64,
-                       info->major_page_faults);
-    }
-    if (info->has_minor_page_faults) {
-        monitor_printf(mon, " minor_page_faults=%" PRId64,
-                       info->minor_page_faults);
-    }
-    if (info->has_free_mem) {
-        monitor_printf(mon, " free_mem=%" PRId64, info->free_mem);
-    }
-    if (info->has_total_mem) {
-        monitor_printf(mon, " total_mem=%" PRId64, info->total_mem);
-    }
-
-    monitor_printf(mon, "\n");
+    monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
 
     qapi_free_BalloonInfo(info);
 }
@@ -684,6 +662,40 @@ void hmp_pmemsave(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, &errp);
 }
 
+void hmp_memchar_write(Monitor *mon, const QDict *qdict)
+{
+    uint32_t size;
+    const char *chardev = qdict_get_str(qdict, "device");
+    const char *data = qdict_get_str(qdict, "data");
+    Error *errp = NULL;
+
+    size = strlen(data);
+    qmp_memchar_write(chardev, size, data, false, 0, &errp);
+
+    hmp_handle_error(mon, &errp);
+}
+
+void hmp_memchar_read(Monitor *mon, const QDict *qdict)
+{
+    uint32_t size = qdict_get_int(qdict, "size");
+    const char *chardev = qdict_get_str(qdict, "device");
+    MemCharRead *meminfo;
+    Error *errp = NULL;
+
+    meminfo = qmp_memchar_read(chardev, size, false, 0, &errp);
+    if (errp) {
+        monitor_printf(mon, "%s\n", error_get_pretty(errp));
+        error_free(errp);
+        return;
+    }
+
+    if (meminfo->count > 0) {
+        monitor_printf(mon, "%s\n", meminfo->data);
+    }
+
+    qapi_free_MemCharRead(meminfo);
+}
+
 static void hmp_cont_cb(void *opaque, int err)
 {
     if (!err) {
@@ -796,7 +808,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
 
     qmp_drive_mirror(device, filename, !!format, format,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
-                     true, mode, false, 0,
+                     true, mode, false, 0, false, 0, false, 0,
                      false, 0, false, 0, &errp);
     hmp_handle_error(mon, &errp);
 }
diff --git a/hmp.h b/hmp.h
index 44be683..076d8cf 100644
--- a/hmp.h
+++ b/hmp.h
@@ -43,6 +43,8 @@ void hmp_system_powerdown(Monitor *mon, const QDict *qdict);
 void hmp_cpu(Monitor *mon, const QDict *qdict);
 void hmp_memsave(Monitor *mon, const QDict *qdict);
 void hmp_pmemsave(Monitor *mon, const QDict *qdict);
+void hmp_memchar_write(Monitor *mon, const QDict *qdict);
+void hmp_memchar_read(Monitor *mon, const QDict *qdict);
 void hmp_cont(Monitor *mon, const QDict *qdict);
 void hmp_system_wakeup(Monitor *mon, const QDict *qdict);
 void hmp_inject_nmi(Monitor *mon, const QDict *qdict);
diff --git a/hw/boards.h b/hw/boards.h
index 3ff9665..3813d4e 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -33,6 +33,7 @@ typedef struct QEMUMachine {
     unsigned int no_serial:1,
         no_parallel:1,
         use_virtcon:1,
+        use_sclp:1,
         no_floppy:1,
         no_cdrom:1,
         no_sdcard:1;
diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
index bdcd836..02618f2 100644
--- a/hw/fw_cfg.c
+++ b/hw/fw_cfg.c
@@ -504,7 +504,6 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
     fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16);
     fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type == DT_NOGRAPHIC));
     fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
-    fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
     fw_cfg_bootsplash(s);
     fw_cfg_reboot(s);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 21f50ea..ad0094f 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -241,7 +241,7 @@ static void  ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
             if ((pr->cmd & PORT_CMD_FIS_ON) &&
                 !s->dev[port].init_d2h_sent) {
                 ahci_init_d2h(&s->dev[port]);
-                s->dev[port].init_d2h_sent = 1;
+                s->dev[port].init_d2h_sent = true;
             }
 
             check_cmd(s, port);
@@ -494,7 +494,7 @@ static void ahci_reset_port(AHCIState *s, int port)
     pr->scr_err = 0;
     pr->scr_act = 0;
     d->busy_slot = -1;
-    d->init_d2h_sent = 0;
+    d->init_d2h_sent = false;
 
     ide_state = &s->dev[port].port.ifs[0];
     if (!ide_state->bs) {
@@ -946,7 +946,7 @@ static int handle_cmd(AHCIState *s, int port, int slot)
             ide_state->hcyl = 0xeb;
             debug_print_fis(ide_state->io_buffer, 0x10);
             ide_state->feature = IDE_FEATURE_DMA;
-            s->dev[port].done_atapi_packet = 0;
+            s->dev[port].done_atapi_packet = false;
             /* XXX send PIO setup FIS */
         }
 
@@ -991,7 +991,7 @@ static int ahci_start_transfer(IDEDMA *dma)
 
     if (is_atapi && !ad->done_atapi_packet) {
         /* already prepopulated iobuffer */
-        ad->done_atapi_packet = 1;
+        ad->done_atapi_packet = true;
         goto out;
     }
 
@@ -1035,11 +1035,10 @@ out:
 static void ahci_start_dma(IDEDMA *dma, IDEState *s,
                            BlockDriverCompletionFunc *dma_cb)
 {
+#ifdef DEBUG_AHCI
     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
-
+#endif
     DPRINTF(ad->port_no, "\n");
-    ad->dma_cb = dma_cb;
-    ad->dma_status |= BM_STATUS_DMAING;
     s->io_buffer_offset = 0;
     dma_cb(s, 0);
 }
@@ -1095,7 +1094,6 @@ static int ahci_dma_set_unit(IDEDMA *dma, int unit)
 static int ahci_dma_add_status(IDEDMA *dma, int status)
 {
     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
-    ad->dma_status |= status;
     DPRINTF(ad->port_no, "set status: %x\n", status);
 
     if (status & BM_STATUS_INT) {
@@ -1114,8 +1112,6 @@ static int ahci_dma_set_inactive(IDEDMA *dma)
     /* update d2h status */
     ahci_write_fis_d2h(ad, NULL);
 
-    ad->dma_cb = NULL;
-
     if (!ad->check_bh) {
         /* maybe we still have something to process, check later */
         ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
@@ -1203,6 +1199,82 @@ void ahci_reset(AHCIState *s)
     }
 }
 
+static const VMStateDescription vmstate_ahci_device = {
+    .name = "ahci port",
+    .version_id = 1,
+    .fields = (VMStateField []) {
+        VMSTATE_IDE_BUS(port, AHCIDevice),
+        VMSTATE_UINT32(port_state, AHCIDevice),
+        VMSTATE_UINT32(finished, AHCIDevice),
+        VMSTATE_UINT32(port_regs.lst_addr, AHCIDevice),
+        VMSTATE_UINT32(port_regs.lst_addr_hi, AHCIDevice),
+        VMSTATE_UINT32(port_regs.fis_addr, AHCIDevice),
+        VMSTATE_UINT32(port_regs.fis_addr_hi, AHCIDevice),
+        VMSTATE_UINT32(port_regs.irq_stat, AHCIDevice),
+        VMSTATE_UINT32(port_regs.irq_mask, AHCIDevice),
+        VMSTATE_UINT32(port_regs.cmd, AHCIDevice),
+        VMSTATE_UINT32(port_regs.tfdata, AHCIDevice),
+        VMSTATE_UINT32(port_regs.sig, AHCIDevice),
+        VMSTATE_UINT32(port_regs.scr_stat, AHCIDevice),
+        VMSTATE_UINT32(port_regs.scr_ctl, AHCIDevice),
+        VMSTATE_UINT32(port_regs.scr_err, AHCIDevice),
+        VMSTATE_UINT32(port_regs.scr_act, AHCIDevice),
+        VMSTATE_UINT32(port_regs.cmd_issue, AHCIDevice),
+        VMSTATE_BOOL(done_atapi_packet, AHCIDevice),
+        VMSTATE_INT32(busy_slot, AHCIDevice),
+        VMSTATE_BOOL(init_d2h_sent, AHCIDevice),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static int ahci_state_post_load(void *opaque, int version_id)
+{
+    int i;
+    struct AHCIDevice *ad;
+    AHCIState *s = opaque;
+
+    for (i = 0; i < s->ports; i++) {
+        ad = &s->dev[i];
+        AHCIPortRegs *pr = &ad->port_regs;
+
+        map_page(&ad->lst,
+                 ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
+        map_page(&ad->res_fis,
+                 ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
+        /*
+         * All pending i/o should be flushed out on a migrate. However,
+         * we might not have cleared the busy_slot since this is done
+         * in a bh. Also, issue i/o against any slots that are pending.
+         */
+        if ((ad->busy_slot != -1) &&
+            !(ad->port.ifs[0].status & (BUSY_STAT|DRQ_STAT))) {
+            pr->cmd_issue &= ~(1 << ad->busy_slot);
+            ad->busy_slot = -1;
+        }
+        check_cmd(s, i);
+    }
+
+    return 0;
+}
+
+const VMStateDescription vmstate_ahci = {
+    .name = "ahci",
+    .version_id = 1,
+    .post_load = ahci_state_post_load,
+    .fields = (VMStateField []) {
+        VMSTATE_STRUCT_VARRAY_POINTER_INT32(dev, AHCIState, ports,
+                                     vmstate_ahci_device, AHCIDevice),
+        VMSTATE_UINT32(control_regs.cap, AHCIState),
+        VMSTATE_UINT32(control_regs.ghc, AHCIState),
+        VMSTATE_UINT32(control_regs.irqstatus, AHCIState),
+        VMSTATE_UINT32(control_regs.impl, AHCIState),
+        VMSTATE_UINT32(control_regs.version, AHCIState),
+        VMSTATE_UINT32(idp_index, AHCIState),
+        VMSTATE_INT32(ports, AHCIState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 typedef struct SysbusAHCIState {
     SysBusDevice busdev;
     AHCIState ahci;
@@ -1211,7 +1283,11 @@ typedef struct SysbusAHCIState {
 
 static const VMStateDescription vmstate_sysbus_ahci = {
     .name = "sysbus-ahci",
-    .unmigratable = 1,
+    .unmigratable = 1, /* Still buggy under I/O load */
+    .fields = (VMStateField []) {
+        VMSTATE_AHCI(ahci, AHCIPCIState),
+        VMSTATE_END_OF_LIST()
+    },
 };
 
 static void sysbus_ahci_reset(DeviceState *dev)
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index 1200a56..85f37fe 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -281,11 +281,9 @@ struct AHCIDevice {
     QEMUBH *check_bh;
     uint8_t *lst;
     uint8_t *res_fis;
-    int dma_status;
-    int done_atapi_packet;
-    int busy_slot;
-    int init_d2h_sent;
-    BlockDriverCompletionFunc *dma_cb;
+    bool done_atapi_packet;
+    int32_t busy_slot;
+    bool init_d2h_sent;
     AHCICmdHdr *cur_cmd;
     NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
 };
@@ -297,7 +295,7 @@ typedef struct AHCIState {
     MemoryRegion idp;       /* Index-Data Pair I/O port space */
     unsigned idp_offset;    /* Offset of index in I/O port space */
     uint32_t idp_index;     /* Current IDP index */
-    int ports;
+    int32_t ports;
     qemu_irq irq;
     DMAContext *dma;
 } AHCIState;
@@ -307,6 +305,16 @@ typedef struct AHCIPCIState {
     AHCIState ahci;
 } AHCIPCIState;
 
+extern const VMStateDescription vmstate_ahci;
+
+#define VMSTATE_AHCI(_field, _state) {                               \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(AHCIState),                                 \
+    .vmsd       = &vmstate_ahci,                                     \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, AHCIState),   \
+}
+
 typedef struct NCQFrame {
     uint8_t fis_type;
     uint8_t c;
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 14ad079..3743dc3 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1149,8 +1149,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         }
         ide_set_irq(s->bus);
         break;
+
     case WIN_VERIFY_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_VERIFY:
     case WIN_VERIFY_ONCE:
         /* do sector number check ? */
@@ -1158,8 +1160,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         s->status = READY_STAT | SEEK_STAT;
         ide_set_irq(s->bus);
         break;
+
     case WIN_READ_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_READ:
     case WIN_READ_ONCE:
         if (s->drive_kind == IDE_CD) {
@@ -1173,8 +1177,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         s->req_nb_sectors = 1;
         ide_sector_read(s);
         break;
+
     case WIN_WRITE_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_WRITE:
     case WIN_WRITE_ONCE:
     case CFA_WRITE_SECT_WO_ERASE:
@@ -1189,8 +1195,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         ide_transfer_start(s, s->io_buffer, 512, ide_sector_write);
         s->media_changed = 1;
         break;
+
     case WIN_MULTREAD_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_MULTREAD:
         if (!s->bs) {
             goto abort_cmd;
@@ -1202,8 +1210,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         s->req_nb_sectors = s->mult_sectors;
         ide_sector_read(s);
         break;
+
     case WIN_MULTWRITE_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_MULTWRITE:
     case CFA_WRITE_MULTI_WO_ERASE:
         if (!s->bs) {
@@ -1222,8 +1232,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         ide_transfer_start(s, s->io_buffer, 512 * n, ide_sector_write);
         s->media_changed = 1;
         break;
+
     case WIN_READDMA_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_READDMA:
     case WIN_READDMA_ONCE:
         if (!s->bs) {
@@ -1232,8 +1244,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
 	ide_cmd_lba48_transform(s, lba48);
         ide_sector_start_dma(s, IDE_DMA_READ);
         break;
+
     case WIN_WRITEDMA_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_WRITEDMA:
     case WIN_WRITEDMA_ONCE:
         if (!s->bs) {
@@ -1243,14 +1257,17 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         ide_sector_start_dma(s, IDE_DMA_WRITE);
         s->media_changed = 1;
         break;
+
     case WIN_READ_NATIVE_MAX_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_READ_NATIVE_MAX:
 	ide_cmd_lba48_transform(s, lba48);
         ide_set_sector(s, s->nb_sectors - 1);
         s->status = READY_STAT | SEEK_STAT;
         ide_set_irq(s->bus);
         break;
+
     case WIN_CHECKPOWERMODE1:
     case WIN_CHECKPOWERMODE2:
         s->error = 0;
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index 1fb803d..cc30adc 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -79,9 +79,15 @@
 #define ICH9_IDP_INDEX          0x10
 #define ICH9_IDP_INDEX_LOG2     0x04
 
-static const VMStateDescription vmstate_ahci = {
-    .name = "ahci",
-    .unmigratable = 1,
+static const VMStateDescription vmstate_ich9_ahci = {
+    .name = "ich9_ahci",
+    .unmigratable = 1, /* Still buggy under I/O load */
+    .version_id = 1,
+    .fields = (VMStateField []) {
+        VMSTATE_PCI_DEVICE(card, AHCIPCIState),
+        VMSTATE_AHCI(ahci, AHCIPCIState),
+        VMSTATE_END_OF_LIST()
+    },
 };
 
 static void pci_ich9_reset(DeviceState *dev)
@@ -152,7 +158,7 @@ static void ich_ahci_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_INTEL_82801IR;
     k->revision = 0x02;
     k->class_id = PCI_CLASS_STORAGE_SATA;
-    dc->vmsd = &vmstate_ahci;
+    dc->vmsd = &vmstate_ich9_ahci;
     dc->reset = pci_ich9_reset;
 }
 
diff --git a/hw/m25p80.c b/hw/m25p80.c
index d392656..788c196 100644
--- a/hw/m25p80.c
+++ b/hw/m25p80.c
@@ -358,6 +358,8 @@ static void complete_collecting_data(Flash *s)
     s->cur_addr |= s->data[1] << 8;
     s->cur_addr |= s->data[2];
 
+    s->state = STATE_IDLE;
+
     switch (s->cmd_in_progress) {
     case DPP:
     case QPP:
diff --git a/hw/pc.c b/hw/pc.c
index 780b1e4..34b6dff 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -551,6 +551,18 @@ int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
     return index;
 }
 
+/* Calculates the limit to CPU APIC ID values
+ *
+ * This function returns the limit for the APIC ID value, so that all
+ * CPU APIC IDs are < pc_apic_id_limit().
+ *
+ * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
+ */
+static unsigned int pc_apic_id_limit(unsigned int max_cpus)
+{
+    return x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
+}
+
 static void *bochs_bios_init(void)
 {
     void *fw_cfg;
@@ -558,9 +570,24 @@ static void *bochs_bios_init(void)
     size_t smbios_len;
     uint64_t *numa_fw_cfg;
     int i, j;
+    unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
 
     fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
-
+    /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
+     *
+     * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug
+     * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC
+     * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the
+     * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS
+     * may see".
+     *
+     * So, this means we must not use max_cpus, here, but the maximum possible
+     * APIC ID value, plus one.
+     *
+     * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is
+     *     the APIC ID, not the "CPU index"
+     */
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)apic_id_limit);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
@@ -579,21 +606,24 @@ static void *bochs_bios_init(void)
      * of nodes, one word for each VCPU->node and one word for each node to
      * hold the amount of memory.
      */
-    numa_fw_cfg = g_new0(uint64_t, 1 + max_cpus + nb_numa_nodes);
+    numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
     numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
     for (i = 0; i < max_cpus; i++) {
+        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
+        assert(apic_id < apic_id_limit);
         for (j = 0; j < nb_numa_nodes; j++) {
             if (test_bit(i, node_cpumask[j])) {
-                numa_fw_cfg[i + 1] = cpu_to_le64(j);
+                numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
                 break;
             }
         }
     }
     for (i = 0; i < nb_numa_nodes; i++) {
-        numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
+        numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
     }
     fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
-                     (1 + max_cpus + nb_numa_nodes) * sizeof(*numa_fw_cfg));
+                     (1 + apic_id_limit + nb_numa_nodes) *
+                     sizeof(*numa_fw_cfg));
 
     return fw_cfg;
 }
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 0a6923d..b9a9b2e 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -235,10 +235,18 @@ static void pc_init_pci(QEMUMachineInitArgs *args)
 
 static void pc_init_pci_1_3(QEMUMachineInitArgs *args)
 {
-    enable_kvm_pv_eoi();
+    enable_compat_apic_id_mode();
     pc_init_pci(args);
 }
 
+/* PC machine init function for pc-0.14 to pc-1.2 */
+static void pc_init_pci_1_2(QEMUMachineInitArgs *args)
+{
+    disable_kvm_pv_eoi();
+    pc_init_pci_1_3(args);
+}
+
+/* PC init function for pc-0.10 to pc-0.13, and reused by xenfv */
 static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
 {
     ram_addr_t ram_size = args->ram_size;
@@ -247,6 +255,8 @@ static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
     const char *kernel_cmdline = args->kernel_cmdline;
     const char *initrd_filename = args->initrd_filename;
     const char *boot_device = args->boot_device;
+    disable_kvm_pv_eoi();
+    enable_compat_apic_id_mode();
     pc_init1(get_system_memory(),
              get_system_io(),
              ram_size, boot_device,
@@ -264,6 +274,8 @@ static void pc_init_isa(QEMUMachineInitArgs *args)
     const char *boot_device = args->boot_device;
     if (cpu_model == NULL)
         cpu_model = "486";
+    disable_kvm_pv_eoi();
+    enable_compat_apic_id_mode();
     pc_init1(get_system_memory(),
              get_system_io(),
              ram_size, boot_device,
@@ -286,7 +298,7 @@ static QEMUMachine pc_i440fx_machine_v1_4 = {
     .name = "pc-i440fx-1.4",
     .alias = "pc",
     .desc = "Standard PC (i440FX + PIIX, 1996)",
-    .init = pc_init_pci_1_3,
+    .init = pc_init_pci,
     .max_cpus = 255,
     .is_default = 1,
     DEFAULT_MACHINE_OPTIONS,
@@ -342,7 +354,7 @@ static QEMUMachine pc_machine_v1_3 = {
 static QEMUMachine pc_machine_v1_2 = {
     .name = "pc-1.2",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_1_2,
@@ -386,7 +398,7 @@ static QEMUMachine pc_machine_v1_2 = {
 static QEMUMachine pc_machine_v1_1 = {
     .name = "pc-1.1",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_1_1,
@@ -422,7 +434,7 @@ static QEMUMachine pc_machine_v1_1 = {
 static QEMUMachine pc_machine_v1_0 = {
     .name = "pc-1.0",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_1_0,
@@ -438,7 +450,7 @@ static QEMUMachine pc_machine_v1_0 = {
 static QEMUMachine pc_machine_v0_15 = {
     .name = "pc-0.15",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_0_15,
@@ -471,7 +483,7 @@ static QEMUMachine pc_machine_v0_15 = {
 static QEMUMachine pc_machine_v0_14 = {
     .name = "pc-0.14",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_0_14, 
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 6de810b..065ea87 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -413,6 +413,7 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
     /* No PCI init: the BIOS will do it */
 
     fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, machine_arch);
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 9ed303a..2778e45 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -299,6 +299,7 @@ static void ppc_heathrow_init(QEMUMachineInitArgs *args)
     /* No PCI init: the BIOS will do it */
 
     fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_HEATHROW);
diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index 1b40c2e..9f2f419 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -1,8 +1,9 @@
 obj-y = s390-virtio-bus.o s390-virtio.o
-
-obj-y := $(addprefix ../,$(obj-y))
 obj-y += s390-virtio-hcall.o
 obj-y += sclp.o
 obj-y += event-facility.o
 obj-y += sclpquiesce.o sclpconsole.o
 obj-y += ipl.o
+obj-y += css.o
+obj-y += s390-virtio-ccw.o
+obj-y += virtio-ccw.o
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
new file mode 100644
index 0000000..3244201
--- /dev/null
+++ b/hw/s390x/css.c
@@ -0,0 +1,1277 @@
+/*
+ * Channel subsystem base support.
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include <hw/qdev.h>
+#include "qemu/bitops.h"
+#include "cpu.h"
+#include "ioinst.h"
+#include "css.h"
+#include "trace.h"
+
+typedef struct CrwContainer {
+    CRW crw;
+    QTAILQ_ENTRY(CrwContainer) sibling;
+} CrwContainer;
+
+typedef struct ChpInfo {
+    uint8_t in_use;
+    uint8_t type;
+    uint8_t is_virtual;
+} ChpInfo;
+
+typedef struct SubchSet {
+    SubchDev *sch[MAX_SCHID + 1];
+    unsigned long schids_used[BITS_TO_LONGS(MAX_SCHID + 1)];
+    unsigned long devnos_used[BITS_TO_LONGS(MAX_SCHID + 1)];
+} SubchSet;
+
+typedef struct CssImage {
+    SubchSet *sch_set[MAX_SSID + 1];
+    ChpInfo chpids[MAX_CHPID + 1];
+} CssImage;
+
+typedef struct ChannelSubSys {
+    QTAILQ_HEAD(, CrwContainer) pending_crws;
+    bool do_crw_mchk;
+    bool crws_lost;
+    uint8_t max_cssid;
+    uint8_t max_ssid;
+    bool chnmon_active;
+    uint64_t chnmon_area;
+    CssImage *css[MAX_CSSID + 1];
+    uint8_t default_cssid;
+} ChannelSubSys;
+
+static ChannelSubSys *channel_subsys;
+
+int css_create_css_image(uint8_t cssid, bool default_image)
+{
+    trace_css_new_image(cssid, default_image ? "(default)" : "");
+    if (cssid > MAX_CSSID) {
+        return -EINVAL;
+    }
+    if (channel_subsys->css[cssid]) {
+        return -EBUSY;
+    }
+    channel_subsys->css[cssid] = g_malloc0(sizeof(CssImage));
+    if (default_image) {
+        channel_subsys->default_cssid = cssid;
+    }
+    return 0;
+}
+
+static uint16_t css_build_subchannel_id(SubchDev *sch)
+{
+    if (channel_subsys->max_cssid > 0) {
+        return (sch->cssid << 8) | (1 << 3) | (sch->ssid << 1) | 1;
+    }
+    return (sch->ssid << 1) | 1;
+}
+
+static void css_inject_io_interrupt(SubchDev *sch)
+{
+    S390CPU *cpu = s390_cpu_addr2state(0);
+    uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11;
+
+    trace_css_io_interrupt(sch->cssid, sch->ssid, sch->schid,
+                           sch->curr_status.pmcw.intparm, isc, "");
+    s390_io_interrupt(cpu,
+                      css_build_subchannel_id(sch),
+                      sch->schid,
+                      sch->curr_status.pmcw.intparm,
+                      (0x80 >> isc) << 24);
+}
+
+void css_conditional_io_interrupt(SubchDev *sch)
+{
+    /*
+     * If the subchannel is not currently status pending, make it pending
+     * with alert status.
+     */
+    if (!(sch->curr_status.scsw.ctrl & SCSW_STCTL_STATUS_PEND)) {
+        S390CPU *cpu = s390_cpu_addr2state(0);
+        uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11;
+
+        trace_css_io_interrupt(sch->cssid, sch->ssid, sch->schid,
+                               sch->curr_status.pmcw.intparm, isc,
+                               "(unsolicited)");
+        sch->curr_status.scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
+        sch->curr_status.scsw.ctrl |=
+            SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+        /* Inject an I/O interrupt. */
+        s390_io_interrupt(cpu,
+                          css_build_subchannel_id(sch),
+                          sch->schid,
+                          sch->curr_status.pmcw.intparm,
+                          (0x80 >> isc) << 24);
+    }
+}
+
+static void sch_handle_clear_func(SubchDev *sch)
+{
+    PMCW *p = &sch->curr_status.pmcw;
+    SCSW *s = &sch->curr_status.scsw;
+    int path;
+
+    /* Path management: In our simple css, we always choose the only path. */
+    path = 0x80;
+
+    /* Reset values prior to 'issueing the clear signal'. */
+    p->lpum = 0;
+    p->pom = 0xff;
+    s->flags &= ~SCSW_FLAGS_MASK_PNO;
+
+    /* We always 'attempt to issue the clear signal', and we always succeed. */
+    sch->orb = NULL;
+    sch->channel_prog = 0x0;
+    sch->last_cmd_valid = false;
+    s->ctrl &= ~SCSW_ACTL_CLEAR_PEND;
+    s->ctrl |= SCSW_STCTL_STATUS_PEND;
+
+    s->dstat = 0;
+    s->cstat = 0;
+    p->lpum = path;
+
+}
+
+static void sch_handle_halt_func(SubchDev *sch)
+{
+
+    PMCW *p = &sch->curr_status.pmcw;
+    SCSW *s = &sch->curr_status.scsw;
+    int path;
+
+    /* Path management: In our simple css, we always choose the only path. */
+    path = 0x80;
+
+    /* We always 'attempt to issue the halt signal', and we always succeed. */
+    sch->orb = NULL;
+    sch->channel_prog = 0x0;
+    sch->last_cmd_valid = false;
+    s->ctrl &= ~SCSW_ACTL_HALT_PEND;
+    s->ctrl |= SCSW_STCTL_STATUS_PEND;
+
+    if ((s->ctrl & (SCSW_ACTL_SUBCH_ACTIVE | SCSW_ACTL_DEVICE_ACTIVE)) ||
+        !((s->ctrl & SCSW_ACTL_START_PEND) ||
+          (s->ctrl & SCSW_ACTL_SUSP))) {
+        s->dstat = SCSW_DSTAT_DEVICE_END;
+    }
+    s->cstat = 0;
+    p->lpum = path;
+
+}
+
+static void copy_sense_id_to_guest(SenseId *dest, SenseId *src)
+{
+    int i;
+
+    dest->reserved = src->reserved;
+    dest->cu_type = cpu_to_be16(src->cu_type);
+    dest->cu_model = src->cu_model;
+    dest->dev_type = cpu_to_be16(src->dev_type);
+    dest->dev_model = src->dev_model;
+    dest->unused = src->unused;
+    for (i = 0; i < ARRAY_SIZE(dest->ciw); i++) {
+        dest->ciw[i].type = src->ciw[i].type;
+        dest->ciw[i].command = src->ciw[i].command;
+        dest->ciw[i].count = cpu_to_be16(src->ciw[i].count);
+    }
+}
+
+static CCW1 copy_ccw_from_guest(hwaddr addr)
+{
+    CCW1 tmp;
+    CCW1 ret;
+
+    cpu_physical_memory_read(addr, &tmp, sizeof(tmp));
+    ret.cmd_code = tmp.cmd_code;
+    ret.flags = tmp.flags;
+    ret.count = be16_to_cpu(tmp.count);
+    ret.cda = be32_to_cpu(tmp.cda);
+
+    return ret;
+}
+
+static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr)
+{
+    int ret;
+    bool check_len;
+    int len;
+    CCW1 ccw;
+
+    if (!ccw_addr) {
+        return -EIO;
+    }
+
+    ccw = copy_ccw_from_guest(ccw_addr);
+
+    /* Check for invalid command codes. */
+    if ((ccw.cmd_code & 0x0f) == 0) {
+        return -EINVAL;
+    }
+    if (((ccw.cmd_code & 0x0f) == CCW_CMD_TIC) &&
+        ((ccw.cmd_code & 0xf0) != 0)) {
+        return -EINVAL;
+    }
+
+    if (ccw.flags & CCW_FLAG_SUSPEND) {
+        return -EINPROGRESS;
+    }
+
+    check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC));
+
+    /* Look at the command. */
+    switch (ccw.cmd_code) {
+    case CCW_CMD_NOOP:
+        /* Nothing to do. */
+        ret = 0;
+        break;
+    case CCW_CMD_BASIC_SENSE:
+        if (check_len) {
+            if (ccw.count != sizeof(sch->sense_data)) {
+                ret = -EINVAL;
+                break;
+            }
+        }
+        len = MIN(ccw.count, sizeof(sch->sense_data));
+        cpu_physical_memory_write(ccw.cda, sch->sense_data, len);
+        sch->curr_status.scsw.count = ccw.count - len;
+        memset(sch->sense_data, 0, sizeof(sch->sense_data));
+        ret = 0;
+        break;
+    case CCW_CMD_SENSE_ID:
+    {
+        SenseId sense_id;
+
+        copy_sense_id_to_guest(&sense_id, &sch->id);
+        /* Sense ID information is device specific. */
+        if (check_len) {
+            if (ccw.count != sizeof(sense_id)) {
+                ret = -EINVAL;
+                break;
+            }
+        }
+        len = MIN(ccw.count, sizeof(sense_id));
+        /*
+         * Only indicate 0xff in the first sense byte if we actually
+         * have enough place to store at least bytes 0-3.
+         */
+        if (len >= 4) {
+            sense_id.reserved = 0xff;
+        } else {
+            sense_id.reserved = 0;
+        }
+        cpu_physical_memory_write(ccw.cda, &sense_id, len);
+        sch->curr_status.scsw.count = ccw.count - len;
+        ret = 0;
+        break;
+    }
+    case CCW_CMD_TIC:
+        if (sch->last_cmd_valid && (sch->last_cmd.cmd_code == CCW_CMD_TIC)) {
+            ret = -EINVAL;
+            break;
+        }
+        if (ccw.flags & (CCW_FLAG_CC | CCW_FLAG_DC)) {
+            ret = -EINVAL;
+            break;
+        }
+        sch->channel_prog = ccw.cda;
+        ret = -EAGAIN;
+        break;
+    default:
+        if (sch->ccw_cb) {
+            /* Handle device specific commands. */
+            ret = sch->ccw_cb(sch, ccw);
+        } else {
+            ret = -ENOSYS;
+        }
+        break;
+    }
+    sch->last_cmd = ccw;
+    sch->last_cmd_valid = true;
+    if (ret == 0) {
+        if (ccw.flags & CCW_FLAG_CC) {
+            sch->channel_prog += 8;
+            ret = -EAGAIN;
+        }
+    }
+
+    return ret;
+}
+
+static void sch_handle_start_func(SubchDev *sch)
+{
+
+    PMCW *p = &sch->curr_status.pmcw;
+    SCSW *s = &sch->curr_status.scsw;
+    ORB *orb = sch->orb;
+    int path;
+    int ret;
+
+    /* Path management: In our simple css, we always choose the only path. */
+    path = 0x80;
+
+    if (!(s->ctrl & SCSW_ACTL_SUSP)) {
+        /* Look at the orb and try to execute the channel program. */
+        p->intparm = orb->intparm;
+        if (!(orb->lpm & path)) {
+            /* Generate a deferred cc 3 condition. */
+            s->flags |= SCSW_FLAGS_MASK_CC;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND);
+            return;
+        }
+    } else {
+        s->ctrl &= ~(SCSW_ACTL_SUSP | SCSW_ACTL_RESUME_PEND);
+    }
+    sch->last_cmd_valid = false;
+    do {
+        ret = css_interpret_ccw(sch, sch->channel_prog);
+        switch (ret) {
+        case -EAGAIN:
+            /* ccw chain, continue processing */
+            break;
+        case 0:
+            /* success */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+                    SCSW_STCTL_STATUS_PEND;
+            s->dstat = SCSW_DSTAT_CHANNEL_END | SCSW_DSTAT_DEVICE_END;
+            break;
+        case -ENOSYS:
+            /* unsupported command, generate unit check (command reject) */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->dstat = SCSW_DSTAT_UNIT_CHECK;
+            /* Set sense bit 0 in ecw0. */
+            sch->sense_data[0] = 0x80;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+                    SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+            break;
+        case -EFAULT:
+            /* memory problem, generate channel data check */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->cstat = SCSW_CSTAT_DATA_CHECK;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+                    SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+            break;
+        case -EBUSY:
+            /* subchannel busy, generate deferred cc 1 */
+            s->flags &= ~SCSW_FLAGS_MASK_CC;
+            s->flags |= (1 << 8);
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+            break;
+        case -EINPROGRESS:
+            /* channel program has been suspended */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->ctrl |= SCSW_ACTL_SUSP;
+            break;
+        default:
+            /* error, generate channel program check */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->cstat = SCSW_CSTAT_PROG_CHECK;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+                    SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+            break;
+        }
+    } while (ret == -EAGAIN);
+
+}
+
+/*
+ * On real machines, this would run asynchronously to the main vcpus.
+ * We might want to make some parts of the ssch handling (interpreting
+ * read/writes) asynchronous later on if we start supporting more than
+ * our current very simple devices.
+ */
+static void do_subchannel_work(SubchDev *sch)
+{
+
+    SCSW *s = &sch->curr_status.scsw;
+
+    if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) {
+        sch_handle_clear_func(sch);
+    } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) {
+        sch_handle_halt_func(sch);
+    } else if (s->ctrl & SCSW_FCTL_START_FUNC) {
+        sch_handle_start_func(sch);
+    } else {
+        /* Cannot happen. */
+        return;
+    }
+    css_inject_io_interrupt(sch);
+}
+
+static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src)
+{
+    int i;
+
+    dest->intparm = cpu_to_be32(src->intparm);
+    dest->flags = cpu_to_be16(src->flags);
+    dest->devno = cpu_to_be16(src->devno);
+    dest->lpm = src->lpm;
+    dest->pnom = src->pnom;
+    dest->lpum = src->lpum;
+    dest->pim = src->pim;
+    dest->mbi = cpu_to_be16(src->mbi);
+    dest->pom = src->pom;
+    dest->pam = src->pam;
+    for (i = 0; i < ARRAY_SIZE(dest->chpid); i++) {
+        dest->chpid[i] = src->chpid[i];
+    }
+    dest->chars = cpu_to_be32(src->chars);
+}
+
+static void copy_scsw_to_guest(SCSW *dest, const SCSW *src)
+{
+    dest->flags = cpu_to_be16(src->flags);
+    dest->ctrl = cpu_to_be16(src->ctrl);
+    dest->cpa = cpu_to_be32(src->cpa);
+    dest->dstat = src->dstat;
+    dest->cstat = src->cstat;
+    dest->count = cpu_to_be16(src->count);
+}
+
+static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src)
+{
+    int i;
+
+    copy_pmcw_to_guest(&dest->pmcw, &src->pmcw);
+    copy_scsw_to_guest(&dest->scsw, &src->scsw);
+    dest->mba = cpu_to_be64(src->mba);
+    for (i = 0; i < ARRAY_SIZE(dest->mda); i++) {
+        dest->mda[i] = src->mda[i];
+    }
+}
+
+int css_do_stsch(SubchDev *sch, SCHIB *schib)
+{
+    /* Use current status. */
+    copy_schib_to_guest(schib, &sch->curr_status);
+    return 0;
+}
+
+static void copy_pmcw_from_guest(PMCW *dest, const PMCW *src)
+{
+    int i;
+
+    dest->intparm = be32_to_cpu(src->intparm);
+    dest->flags = be16_to_cpu(src->flags);
+    dest->devno = be16_to_cpu(src->devno);
+    dest->lpm = src->lpm;
+    dest->pnom = src->pnom;
+    dest->lpum = src->lpum;
+    dest->pim = src->pim;
+    dest->mbi = be16_to_cpu(src->mbi);
+    dest->pom = src->pom;
+    dest->pam = src->pam;
+    for (i = 0; i < ARRAY_SIZE(dest->chpid); i++) {
+        dest->chpid[i] = src->chpid[i];
+    }
+    dest->chars = be32_to_cpu(src->chars);
+}
+
+static void copy_scsw_from_guest(SCSW *dest, const SCSW *src)
+{
+    dest->flags = be16_to_cpu(src->flags);
+    dest->ctrl = be16_to_cpu(src->ctrl);
+    dest->cpa = be32_to_cpu(src->cpa);
+    dest->dstat = src->dstat;
+    dest->cstat = src->cstat;
+    dest->count = be16_to_cpu(src->count);
+}
+
+static void copy_schib_from_guest(SCHIB *dest, const SCHIB *src)
+{
+    int i;
+
+    copy_pmcw_from_guest(&dest->pmcw, &src->pmcw);
+    copy_scsw_from_guest(&dest->scsw, &src->scsw);
+    dest->mba = be64_to_cpu(src->mba);
+    for (i = 0; i < ARRAY_SIZE(dest->mda); i++) {
+        dest->mda[i] = src->mda[i];
+    }
+}
+
+int css_do_msch(SubchDev *sch, SCHIB *orig_schib)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+    SCHIB schib;
+
+    if (!(sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_DNV)) {
+        ret = 0;
+        goto out;
+    }
+
+    if (s->ctrl & SCSW_STCTL_STATUS_PEND) {
+        ret = -EINPROGRESS;
+        goto out;
+    }
+
+    if (s->ctrl &
+        (SCSW_FCTL_START_FUNC|SCSW_FCTL_HALT_FUNC|SCSW_FCTL_CLEAR_FUNC)) {
+        ret = -EBUSY;
+        goto out;
+    }
+
+    copy_schib_from_guest(&schib, orig_schib);
+    /* Only update the program-modifiable fields. */
+    p->intparm = schib.pmcw.intparm;
+    p->flags &= ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
+                  PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
+                  PMCW_FLAGS_MASK_MP);
+    p->flags |= schib.pmcw.flags &
+            (PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
+             PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
+             PMCW_FLAGS_MASK_MP);
+    p->lpm = schib.pmcw.lpm;
+    p->mbi = schib.pmcw.mbi;
+    p->pom = schib.pmcw.pom;
+    p->chars &= ~(PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_CSENSE);
+    p->chars |= schib.pmcw.chars &
+            (PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_CSENSE);
+    sch->curr_status.mba = schib.mba;
+
+    ret = 0;
+
+out:
+    return ret;
+}
+
+int css_do_xsch(SubchDev *sch)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = -ENODEV;
+        goto out;
+    }
+
+    if (!(s->ctrl & SCSW_CTRL_MASK_FCTL) ||
+        ((s->ctrl & SCSW_CTRL_MASK_FCTL) != SCSW_FCTL_START_FUNC) ||
+        (!(s->ctrl &
+           (SCSW_ACTL_RESUME_PEND | SCSW_ACTL_START_PEND | SCSW_ACTL_SUSP))) ||
+        (s->ctrl & SCSW_ACTL_SUBCH_ACTIVE)) {
+        ret = -EINPROGRESS;
+        goto out;
+    }
+
+    if (s->ctrl & SCSW_CTRL_MASK_STCTL) {
+        ret = -EBUSY;
+        goto out;
+    }
+
+    /* Cancel the current operation. */
+    s->ctrl &= ~(SCSW_FCTL_START_FUNC |
+                 SCSW_ACTL_RESUME_PEND |
+                 SCSW_ACTL_START_PEND |
+                 SCSW_ACTL_SUSP);
+    sch->channel_prog = 0x0;
+    sch->last_cmd_valid = false;
+    sch->orb = NULL;
+    s->dstat = 0;
+    s->cstat = 0;
+    ret = 0;
+
+out:
+    return ret;
+}
+
+int css_do_csch(SubchDev *sch)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = -ENODEV;
+        goto out;
+    }
+
+    /* Trigger the clear function. */
+    s->ctrl &= ~(SCSW_CTRL_MASK_FCTL | SCSW_CTRL_MASK_ACTL);
+    s->ctrl |= SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_CLEAR_FUNC;
+
+    do_subchannel_work(sch);
+    ret = 0;
+
+out:
+    return ret;
+}
+
+int css_do_hsch(SubchDev *sch)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = -ENODEV;
+        goto out;
+    }
+
+    if (((s->ctrl & SCSW_CTRL_MASK_STCTL) == SCSW_STCTL_STATUS_PEND) ||
+        (s->ctrl & (SCSW_STCTL_PRIMARY |
+                    SCSW_STCTL_SECONDARY |
+                    SCSW_STCTL_ALERT))) {
+        ret = -EINPROGRESS;
+        goto out;
+    }
+
+    if (s->ctrl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) {
+        ret = -EBUSY;
+        goto out;
+    }
+
+    /* Trigger the halt function. */
+    s->ctrl |= SCSW_FCTL_HALT_FUNC;
+    s->ctrl &= ~SCSW_FCTL_START_FUNC;
+    if (((s->ctrl & SCSW_CTRL_MASK_ACTL) ==
+         (SCSW_ACTL_SUBCH_ACTIVE | SCSW_ACTL_DEVICE_ACTIVE)) &&
+        ((s->ctrl & SCSW_CTRL_MASK_STCTL) == SCSW_STCTL_INTERMEDIATE)) {
+        s->ctrl &= ~SCSW_STCTL_STATUS_PEND;
+    }
+    s->ctrl |= SCSW_ACTL_HALT_PEND;
+
+    do_subchannel_work(sch);
+    ret = 0;
+
+out:
+    return ret;
+}
+
+static void css_update_chnmon(SubchDev *sch)
+{
+    if (!(sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_MME)) {
+        /* Not active. */
+        return;
+    }
+    /* The counter is conveniently located at the beginning of the struct. */
+    if (sch->curr_status.pmcw.chars & PMCW_CHARS_MASK_MBFC) {
+        /* Format 1, per-subchannel area. */
+        uint32_t count;
+
+        count = ldl_phys(sch->curr_status.mba);
+        count++;
+        stl_phys(sch->curr_status.mba, count);
+    } else {
+        /* Format 0, global area. */
+        uint32_t offset;
+        uint16_t count;
+
+        offset = sch->curr_status.pmcw.mbi << 5;
+        count = lduw_phys(channel_subsys->chnmon_area + offset);
+        count++;
+        stw_phys(channel_subsys->chnmon_area + offset, count);
+    }
+}
+
+int css_do_ssch(SubchDev *sch, ORB *orb)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = -ENODEV;
+        goto out;
+    }
+
+    if (s->ctrl & SCSW_STCTL_STATUS_PEND) {
+        ret = -EINPROGRESS;
+        goto out;
+    }
+
+    if (s->ctrl & (SCSW_FCTL_START_FUNC |
+                   SCSW_FCTL_HALT_FUNC |
+                   SCSW_FCTL_CLEAR_FUNC)) {
+        ret = -EBUSY;
+        goto out;
+    }
+
+    /* If monitoring is active, update counter. */
+    if (channel_subsys->chnmon_active) {
+        css_update_chnmon(sch);
+    }
+    sch->orb = orb;
+    sch->channel_prog = orb->cpa;
+    /* Trigger the start function. */
+    s->ctrl |= (SCSW_FCTL_START_FUNC | SCSW_ACTL_START_PEND);
+    s->flags &= ~SCSW_FLAGS_MASK_PNO;
+
+    do_subchannel_work(sch);
+    ret = 0;
+
+out:
+    return ret;
+}
+
+static void copy_irb_to_guest(IRB *dest, const IRB *src)
+{
+    int i;
+
+    copy_scsw_to_guest(&dest->scsw, &src->scsw);
+
+    for (i = 0; i < ARRAY_SIZE(dest->esw); i++) {
+        dest->esw[i] = cpu_to_be32(src->esw[i]);
+    }
+    for (i = 0; i < ARRAY_SIZE(dest->ecw); i++) {
+        dest->ecw[i] = cpu_to_be32(src->ecw[i]);
+    }
+    for (i = 0; i < ARRAY_SIZE(dest->emw); i++) {
+        dest->emw[i] = cpu_to_be32(src->emw[i]);
+    }
+}
+
+int css_do_tsch(SubchDev *sch, IRB *target_irb)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    uint16_t stctl;
+    uint16_t fctl;
+    uint16_t actl;
+    IRB irb;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = 3;
+        goto out;
+    }
+
+    stctl = s->ctrl & SCSW_CTRL_MASK_STCTL;
+    fctl = s->ctrl & SCSW_CTRL_MASK_FCTL;
+    actl = s->ctrl & SCSW_CTRL_MASK_ACTL;
+
+    /* Prepare the irb for the guest. */
+    memset(&irb, 0, sizeof(IRB));
+
+    /* Copy scsw from current status. */
+    memcpy(&irb.scsw, s, sizeof(SCSW));
+    if (stctl & SCSW_STCTL_STATUS_PEND) {
+        if (s->cstat & (SCSW_CSTAT_DATA_CHECK |
+                        SCSW_CSTAT_CHN_CTRL_CHK |
+                        SCSW_CSTAT_INTF_CTRL_CHK)) {
+            irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF;
+            irb.esw[0] = 0x04804000;
+        } else {
+            irb.esw[0] = 0x00800000;
+        }
+        /* If a unit check is pending, copy sense data. */
+        if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) &&
+            (p->chars & PMCW_CHARS_MASK_CSENSE)) {
+            irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL;
+            memcpy(irb.ecw, sch->sense_data, sizeof(sch->sense_data));
+            irb.esw[1] = 0x02000000 | (sizeof(sch->sense_data) << 8);
+        }
+    }
+    /* Store the irb to the guest. */
+    copy_irb_to_guest(target_irb, &irb);
+
+    /* Clear conditions on subchannel, if applicable. */
+    if (stctl & SCSW_STCTL_STATUS_PEND) {
+        s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+        if ((stctl != (SCSW_STCTL_INTERMEDIATE | SCSW_STCTL_STATUS_PEND)) ||
+            ((fctl & SCSW_FCTL_HALT_FUNC) &&
+             (actl & SCSW_ACTL_SUSP))) {
+            s->ctrl &= ~SCSW_CTRL_MASK_FCTL;
+        }
+        if (stctl != (SCSW_STCTL_INTERMEDIATE | SCSW_STCTL_STATUS_PEND)) {
+            s->flags &= ~SCSW_FLAGS_MASK_PNO;
+            s->ctrl &= ~(SCSW_ACTL_RESUME_PEND |
+                         SCSW_ACTL_START_PEND |
+                         SCSW_ACTL_HALT_PEND |
+                         SCSW_ACTL_CLEAR_PEND |
+                         SCSW_ACTL_SUSP);
+        } else {
+            if ((actl & SCSW_ACTL_SUSP) &&
+                (fctl & SCSW_FCTL_START_FUNC)) {
+                s->flags &= ~SCSW_FLAGS_MASK_PNO;
+                if (fctl & SCSW_FCTL_HALT_FUNC) {
+                    s->ctrl &= ~(SCSW_ACTL_RESUME_PEND |
+                                 SCSW_ACTL_START_PEND |
+                                 SCSW_ACTL_HALT_PEND |
+                                 SCSW_ACTL_CLEAR_PEND |
+                                 SCSW_ACTL_SUSP);
+                } else {
+                    s->ctrl &= ~SCSW_ACTL_RESUME_PEND;
+                }
+            }
+        }
+        /* Clear pending sense data. */
+        if (p->chars & PMCW_CHARS_MASK_CSENSE) {
+            memset(sch->sense_data, 0 , sizeof(sch->sense_data));
+        }
+    }
+
+    ret = ((stctl & SCSW_STCTL_STATUS_PEND) == 0);
+
+out:
+    return ret;
+}
+
+static void copy_crw_to_guest(CRW *dest, const CRW *src)
+{
+    dest->flags = cpu_to_be16(src->flags);
+    dest->rsid = cpu_to_be16(src->rsid);
+}
+
+int css_do_stcrw(CRW *crw)
+{
+    CrwContainer *crw_cont;
+    int ret;
+
+    crw_cont = QTAILQ_FIRST(&channel_subsys->pending_crws);
+    if (crw_cont) {
+        QTAILQ_REMOVE(&channel_subsys->pending_crws, crw_cont, sibling);
+        copy_crw_to_guest(crw, &crw_cont->crw);
+        g_free(crw_cont);
+        ret = 0;
+    } else {
+        /* List was empty, turn crw machine checks on again. */
+        memset(crw, 0, sizeof(*crw));
+        channel_subsys->do_crw_mchk = true;
+        ret = 1;
+    }
+
+    return ret;
+}
+
+int css_do_tpi(IOIntCode *int_code, int lowcore)
+{
+    /* No pending interrupts for !KVM. */
+    return 0;
+ }
+
+int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid,
+                         int rfmt, void *buf)
+{
+    int i, desc_size;
+    uint32_t words[8];
+    uint32_t chpid_type_word;
+    CssImage *css;
+
+    if (!m && !cssid) {
+        css = channel_subsys->css[channel_subsys->default_cssid];
+    } else {
+        css = channel_subsys->css[cssid];
+    }
+    if (!css) {
+        return 0;
+    }
+    desc_size = 0;
+    for (i = f_chpid; i <= l_chpid; i++) {
+        if (css->chpids[i].in_use) {
+            chpid_type_word = 0x80000000 | (css->chpids[i].type << 8) | i;
+            if (rfmt == 0) {
+                words[0] = cpu_to_be32(chpid_type_word);
+                words[1] = 0;
+                memcpy(buf + desc_size, words, 8);
+                desc_size += 8;
+            } else if (rfmt == 1) {
+                words[0] = cpu_to_be32(chpid_type_word);
+                words[1] = 0;
+                words[2] = 0;
+                words[3] = 0;
+                words[4] = 0;
+                words[5] = 0;
+                words[6] = 0;
+                words[7] = 0;
+                memcpy(buf + desc_size, words, 32);
+                desc_size += 32;
+            }
+        }
+    }
+    return desc_size;
+}
+
+void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo)
+{
+    /* dct is currently ignored (not really meaningful for our devices) */
+    /* TODO: Don't ignore mbk. */
+    if (update && !channel_subsys->chnmon_active) {
+        /* Enable measuring. */
+        channel_subsys->chnmon_area = mbo;
+        channel_subsys->chnmon_active = true;
+    }
+    if (!update && channel_subsys->chnmon_active) {
+        /* Disable measuring. */
+        channel_subsys->chnmon_area = 0;
+        channel_subsys->chnmon_active = false;
+    }
+}
+
+int css_do_rsch(SubchDev *sch)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = -ENODEV;
+        goto out;
+    }
+
+    if (s->ctrl & SCSW_STCTL_STATUS_PEND) {
+        ret = -EINPROGRESS;
+        goto out;
+    }
+
+    if (((s->ctrl & SCSW_CTRL_MASK_FCTL) != SCSW_FCTL_START_FUNC) ||
+        (s->ctrl & SCSW_ACTL_RESUME_PEND) ||
+        (!(s->ctrl & SCSW_ACTL_SUSP))) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* If monitoring is active, update counter. */
+    if (channel_subsys->chnmon_active) {
+        css_update_chnmon(sch);
+    }
+
+    s->ctrl |= SCSW_ACTL_RESUME_PEND;
+    do_subchannel_work(sch);
+    ret = 0;
+
+out:
+    return ret;
+}
+
+int css_do_rchp(uint8_t cssid, uint8_t chpid)
+{
+    uint8_t real_cssid;
+
+    if (cssid > channel_subsys->max_cssid) {
+        return -EINVAL;
+    }
+    if (channel_subsys->max_cssid == 0) {
+        real_cssid = channel_subsys->default_cssid;
+    } else {
+        real_cssid = cssid;
+    }
+    if (!channel_subsys->css[real_cssid]) {
+        return -EINVAL;
+    }
+
+    if (!channel_subsys->css[real_cssid]->chpids[chpid].in_use) {
+        return -ENODEV;
+    }
+
+    if (!channel_subsys->css[real_cssid]->chpids[chpid].is_virtual) {
+        fprintf(stderr,
+                "rchp unsupported for non-virtual chpid %x.%02x!\n",
+                real_cssid, chpid);
+        return -ENODEV;
+    }
+
+    /* We don't really use a channel path, so we're done here. */
+    css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT,
+                  channel_subsys->max_cssid > 0 ? 1 : 0, chpid);
+    if (channel_subsys->max_cssid > 0) {
+        css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT, 0, real_cssid << 8);
+    }
+    return 0;
+}
+
+bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid)
+{
+    SubchSet *set;
+
+    if (cssid > MAX_CSSID || ssid > MAX_SSID || !channel_subsys->css[cssid] ||
+        !channel_subsys->css[cssid]->sch_set[ssid]) {
+        return true;
+    }
+    set = channel_subsys->css[cssid]->sch_set[ssid];
+    return schid > find_last_bit(set->schids_used,
+                                 (MAX_SCHID + 1) / sizeof(unsigned long));
+}
+
+static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type)
+{
+    CssImage *css;
+
+    trace_css_chpid_add(cssid, chpid, type);
+    if (cssid > MAX_CSSID) {
+        return -EINVAL;
+    }
+    css = channel_subsys->css[cssid];
+    if (!css) {
+        return -EINVAL;
+    }
+    if (css->chpids[chpid].in_use) {
+        return -EEXIST;
+    }
+    css->chpids[chpid].in_use = 1;
+    css->chpids[chpid].type = type;
+    css->chpids[chpid].is_virtual = 1;
+
+    css_generate_chp_crws(cssid, chpid);
+
+    return 0;
+}
+
+void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type)
+{
+    PMCW *p = &sch->curr_status.pmcw;
+    SCSW *s = &sch->curr_status.scsw;
+    int i;
+    CssImage *css = channel_subsys->css[sch->cssid];
+
+    assert(css != NULL);
+    memset(p, 0, sizeof(PMCW));
+    p->flags |= PMCW_FLAGS_MASK_DNV;
+    p->devno = sch->devno;
+    /* single path */
+    p->pim = 0x80;
+    p->pom = 0xff;
+    p->pam = 0x80;
+    p->chpid[0] = chpid;
+    if (!css->chpids[chpid].in_use) {
+        css_add_virtual_chpid(sch->cssid, chpid, type);
+    }
+
+    memset(s, 0, sizeof(SCSW));
+    sch->curr_status.mba = 0;
+    for (i = 0; i < ARRAY_SIZE(sch->curr_status.mda); i++) {
+        sch->curr_status.mda[i] = 0;
+    }
+}
+
+SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, uint16_t schid)
+{
+    uint8_t real_cssid;
+
+    real_cssid = (!m && (cssid == 0)) ? channel_subsys->default_cssid : cssid;
+
+    if (!channel_subsys->css[real_cssid]) {
+        return NULL;
+    }
+
+    if (!channel_subsys->css[real_cssid]->sch_set[ssid]) {
+        return NULL;
+    }
+
+    return channel_subsys->css[real_cssid]->sch_set[ssid]->sch[schid];
+}
+
+bool css_subch_visible(SubchDev *sch)
+{
+    if (sch->ssid > channel_subsys->max_ssid) {
+        return false;
+    }
+
+    if (sch->cssid != channel_subsys->default_cssid) {
+        return (channel_subsys->max_cssid > 0);
+    }
+
+    return true;
+}
+
+bool css_present(uint8_t cssid)
+{
+    return (channel_subsys->css[cssid] != NULL);
+}
+
+bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno)
+{
+    if (!channel_subsys->css[cssid]) {
+        return false;
+    }
+    if (!channel_subsys->css[cssid]->sch_set[ssid]) {
+        return false;
+    }
+
+    return !!test_bit(devno,
+                      channel_subsys->css[cssid]->sch_set[ssid]->devnos_used);
+}
+
+void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid,
+                      uint16_t devno, SubchDev *sch)
+{
+    CssImage *css;
+    SubchSet *s_set;
+
+    trace_css_assign_subch(sch ? "assign" : "deassign", cssid, ssid, schid,
+                           devno);
+    if (!channel_subsys->css[cssid]) {
+        fprintf(stderr,
+                "Suspicious call to %s (%x.%x.%04x) for non-existing css!\n",
+                __func__, cssid, ssid, schid);
+        return;
+    }
+    css = channel_subsys->css[cssid];
+
+    if (!css->sch_set[ssid]) {
+        css->sch_set[ssid] = g_malloc0(sizeof(SubchSet));
+    }
+    s_set = css->sch_set[ssid];
+
+    s_set->sch[schid] = sch;
+    if (sch) {
+        set_bit(schid, s_set->schids_used);
+        set_bit(devno, s_set->devnos_used);
+    } else {
+        clear_bit(schid, s_set->schids_used);
+        clear_bit(devno, s_set->devnos_used);
+    }
+}
+
+void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid)
+{
+    CrwContainer *crw_cont;
+
+    trace_css_crw(rsc, erc, rsid, chain ? "(chained)" : "");
+    /* TODO: Maybe use a static crw pool? */
+    crw_cont = g_try_malloc0(sizeof(CrwContainer));
+    if (!crw_cont) {
+        channel_subsys->crws_lost = true;
+        return;
+    }
+    crw_cont->crw.flags = (rsc << 8) | erc;
+    if (chain) {
+        crw_cont->crw.flags |= CRW_FLAGS_MASK_C;
+    }
+    crw_cont->crw.rsid = rsid;
+    if (channel_subsys->crws_lost) {
+        crw_cont->crw.flags |= CRW_FLAGS_MASK_R;
+        channel_subsys->crws_lost = false;
+    }
+
+    QTAILQ_INSERT_TAIL(&channel_subsys->pending_crws, crw_cont, sibling);
+
+    if (channel_subsys->do_crw_mchk) {
+        S390CPU *cpu = s390_cpu_addr2state(0);
+
+        channel_subsys->do_crw_mchk = false;
+        /* Inject crw pending machine check. */
+        s390_crw_mchk(cpu);
+    }
+}
+
+void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
+                           int hotplugged, int add)
+{
+    uint8_t guest_cssid;
+    bool chain_crw;
+
+    if (add && !hotplugged) {
+        return;
+    }
+    if (channel_subsys->max_cssid == 0) {
+        /* Default cssid shows up as 0. */
+        guest_cssid = (cssid == channel_subsys->default_cssid) ? 0 : cssid;
+    } else {
+        /* Show real cssid to the guest. */
+        guest_cssid = cssid;
+    }
+    /*
+     * Only notify for higher subchannel sets/channel subsystems if the
+     * guest has enabled it.
+     */
+    if ((ssid > channel_subsys->max_ssid) ||
+        (guest_cssid > channel_subsys->max_cssid) ||
+        ((channel_subsys->max_cssid == 0) &&
+         (cssid != channel_subsys->default_cssid))) {
+        return;
+    }
+    chain_crw = (channel_subsys->max_ssid > 0) ||
+            (channel_subsys->max_cssid > 0);
+    css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, chain_crw ? 1 : 0, schid);
+    if (chain_crw) {
+        css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, 0,
+                      (guest_cssid << 8) | (ssid << 4));
+    }
+}
+
+void css_generate_chp_crws(uint8_t cssid, uint8_t chpid)
+{
+    /* TODO */
+}
+
+int css_enable_mcsse(void)
+{
+    trace_css_enable_facility("mcsse");
+    channel_subsys->max_cssid = MAX_CSSID;
+    return 0;
+}
+
+int css_enable_mss(void)
+{
+    trace_css_enable_facility("mss");
+    channel_subsys->max_ssid = MAX_SSID;
+    return 0;
+}
+
+static void css_init(void)
+{
+    channel_subsys = g_malloc0(sizeof(*channel_subsys));
+    QTAILQ_INIT(&channel_subsys->pending_crws);
+    channel_subsys->do_crw_mchk = true;
+    channel_subsys->crws_lost = false;
+    channel_subsys->chnmon_active = false;
+}
+machine_init(css_init);
+
+void css_reset_sch(SubchDev *sch)
+{
+    PMCW *p = &sch->curr_status.pmcw;
+
+    p->intparm = 0;
+    p->flags &= ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
+                  PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
+                  PMCW_FLAGS_MASK_MP | PMCW_FLAGS_MASK_TF);
+    p->flags |= PMCW_FLAGS_MASK_DNV;
+    p->devno = sch->devno;
+    p->pim = 0x80;
+    p->lpm = p->pim;
+    p->pnom = 0;
+    p->lpum = 0;
+    p->mbi = 0;
+    p->pom = 0xff;
+    p->pam = 0x80;
+    p->chars &= ~(PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_XMWME |
+                  PMCW_CHARS_MASK_CSENSE);
+
+    memset(&sch->curr_status.scsw, 0, sizeof(sch->curr_status.scsw));
+    sch->curr_status.mba = 0;
+
+    sch->channel_prog = 0x0;
+    sch->last_cmd_valid = false;
+    sch->orb = NULL;
+}
+
+void css_reset(void)
+{
+    CrwContainer *crw_cont;
+
+    /* Clean up monitoring. */
+    channel_subsys->chnmon_active = false;
+    channel_subsys->chnmon_area = 0;
+
+    /* Clear pending CRWs. */
+    while ((crw_cont = QTAILQ_FIRST(&channel_subsys->pending_crws))) {
+        QTAILQ_REMOVE(&channel_subsys->pending_crws, crw_cont, sibling);
+        g_free(crw_cont);
+    }
+    channel_subsys->do_crw_mchk = true;
+    channel_subsys->crws_lost = false;
+
+    /* Reset maximum ids. */
+    channel_subsys->max_cssid = 0;
+    channel_subsys->max_ssid = 0;
+}
diff --git a/hw/s390x/css.h b/hw/s390x/css.h
new file mode 100644
index 0000000..85ed05d
--- /dev/null
+++ b/hw/s390x/css.h
@@ -0,0 +1,99 @@
+/*
+ * Channel subsystem structures and definitions.
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef CSS_H
+#define CSS_H
+
+#include "ioinst.h"
+
+/* Channel subsystem constants. */
+#define MAX_SCHID 65535
+#define MAX_SSID 3
+#define MAX_CSSID 254 /* 255 is reserved */
+#define MAX_CHPID 255
+
+#define MAX_CIWS 62
+
+typedef struct CIW {
+    uint8_t type;
+    uint8_t command;
+    uint16_t count;
+} QEMU_PACKED CIW;
+
+typedef struct SenseId {
+    /* common part */
+    uint8_t reserved;        /* always 0x'FF' */
+    uint16_t cu_type;        /* control unit type */
+    uint8_t cu_model;        /* control unit model */
+    uint16_t dev_type;       /* device type */
+    uint8_t dev_model;       /* device model */
+    uint8_t unused;          /* padding byte */
+    /* extended part */
+    CIW ciw[MAX_CIWS];       /* variable # of CIWs */
+} QEMU_PACKED SenseId;
+
+/* Channel measurements, from linux/drivers/s390/cio/cmf.c. */
+typedef struct CMB {
+    uint16_t ssch_rsch_count;
+    uint16_t sample_count;
+    uint32_t device_connect_time;
+    uint32_t function_pending_time;
+    uint32_t device_disconnect_time;
+    uint32_t control_unit_queuing_time;
+    uint32_t device_active_only_time;
+    uint32_t reserved[2];
+} QEMU_PACKED CMB;
+
+typedef struct CMBE {
+    uint32_t ssch_rsch_count;
+    uint32_t sample_count;
+    uint32_t device_connect_time;
+    uint32_t function_pending_time;
+    uint32_t device_disconnect_time;
+    uint32_t control_unit_queuing_time;
+    uint32_t device_active_only_time;
+    uint32_t device_busy_time;
+    uint32_t initial_command_response_time;
+    uint32_t reserved[7];
+} QEMU_PACKED CMBE;
+
+struct SubchDev {
+    /* channel-subsystem related things: */
+    uint8_t cssid;
+    uint8_t ssid;
+    uint16_t schid;
+    uint16_t devno;
+    SCHIB curr_status;
+    uint8_t sense_data[32];
+    hwaddr channel_prog;
+    CCW1 last_cmd;
+    bool last_cmd_valid;
+    ORB *orb;
+    /* transport-provided data: */
+    int (*ccw_cb) (SubchDev *, CCW1);
+    SenseId id;
+    void *driver_data;
+};
+
+typedef SubchDev *(*css_subch_cb_func)(uint8_t m, uint8_t cssid, uint8_t ssid,
+                                       uint16_t schid);
+int css_create_css_image(uint8_t cssid, bool default_image);
+bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno);
+void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid,
+                      uint16_t devno, SubchDev *sch);
+void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type);
+void css_reset(void);
+void css_reset_sch(SubchDev *sch);
+void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid);
+void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
+                           int hotplugged, int add);
+void css_generate_chp_crws(uint8_t cssid, uint8_t chpid);
+#endif
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 7cbbf99..86e8415 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -159,7 +159,7 @@ static void s390_ipl_class_init(ObjectClass *klass, void *data)
     dc->no_user = 1;
 }
 
-static TypeInfo s390_ipl_info = {
+static const TypeInfo s390_ipl_info = {
     .class_init = s390_ipl_class_init,
     .parent = TYPE_SYS_BUS_DEVICE,
     .name  = "s390-ipl",
diff --git a/hw/s390-virtio-bus.c b/hw/s390x/s390-virtio-bus.c
index b5d1f2b..32f63b0 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390x/s390-virtio-bus.c
@@ -17,12 +17,12 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "hw.h"
+#include "hw/hw.h"
 #include "block/block.h"
 #include "sysemu/sysemu.h"
-#include "boards.h"
+#include "hw/boards.h"
 #include "monitor/monitor.h"
-#include "loader.h"
+#include "hw/loader.h"
 #include "elf.h"
 #include "hw/virtio.h"
 #include "hw/virtio-rng.h"
@@ -31,7 +31,7 @@
 #include "hw/sysbus.h"
 #include "sysemu/kvm.h"
 
-#include "hw/s390-virtio-bus.h"
+#include "hw/s390x/s390-virtio-bus.h"
 #include "hw/virtio-bus.h"
 
 /* #define DEBUG_S390 */
@@ -508,6 +508,13 @@ static int s390_virtio_busdev_init(DeviceState *dev)
     return _info->init(_dev);
 }
 
+static void s390_virtio_busdev_reset(DeviceState *dev)
+{
+    VirtIOS390Device *_dev = (VirtIOS390Device *)dev;
+
+    virtio_reset(_dev->vdev);
+}
+
 static void virtio_s390_device_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -515,6 +522,7 @@ static void virtio_s390_device_class_init(ObjectClass *klass, void *data)
     dc->init = s390_virtio_busdev_init;
     dc->bus_type = TYPE_S390_VIRTIO_BUS;
     dc->unplug = qdev_simple_unplug_cb;
+    dc->reset = s390_virtio_busdev_reset;
 }
 
 static const TypeInfo virtio_s390_device_info = {
diff --git a/hw/s390-virtio-bus.h b/hw/s390x/s390-virtio-bus.h
index 438b37f..4aacf83 100644
--- a/hw/s390-virtio-bus.h
+++ b/hw/s390x/s390-virtio-bus.h
@@ -19,12 +19,12 @@
 #ifndef HW_S390_VIRTIO_BUS_H
 #define HW_S390_VIRTIO_BUS_H 1
 
-#include "virtio-blk.h"
-#include "virtio-net.h"
-#include "virtio-rng.h"
-#include "virtio-serial.h"
-#include "virtio-scsi.h"
-#include "virtio-bus.h"
+#include "hw/virtio-blk.h"
+#include "hw/virtio-net.h"
+#include "hw/virtio-rng.h"
+#include "hw/virtio-serial.h"
+#include "hw/virtio-scsi.h"
+#include "hw/virtio-bus.h"
 
 #define VIRTIO_DEV_OFFS_TYPE		0	/* 8 bits */
 #define VIRTIO_DEV_OFFS_NUM_VQ		1	/* 8 bits */
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
new file mode 100644
index 0000000..6549211
--- /dev/null
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -0,0 +1,134 @@
+/*
+ * virtio ccw machine
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "hw/boards.h"
+#include "exec/address-spaces.h"
+#include "s390-virtio.h"
+#include "sclp.h"
+#include "ioinst.h"
+#include "css.h"
+#include "virtio-ccw.h"
+
+static int virtio_ccw_hcall_notify(const uint64_t *args)
+{
+    uint64_t subch_id = args[0];
+    uint64_t queue = args[1];
+    SubchDev *sch;
+    int cssid, ssid, schid, m;
+
+    if (ioinst_disassemble_sch_ident(subch_id, &m, &cssid, &ssid, &schid)) {
+        return -EINVAL;
+    }
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (!sch || !css_subch_visible(sch)) {
+        return -EINVAL;
+    }
+    virtio_queue_notify(virtio_ccw_get_vdev(sch), queue);
+    return 0;
+
+}
+
+static int virtio_ccw_hcall_early_printk(const uint64_t *args)
+{
+    uint64_t mem = args[0];
+
+    if (mem < ram_size) {
+        /* Early printk */
+        return 0;
+    }
+    return -EINVAL;
+}
+
+static void virtio_ccw_register_hcalls(void)
+{
+    s390_register_virtio_hypercall(KVM_S390_VIRTIO_CCW_NOTIFY,
+                                   virtio_ccw_hcall_notify);
+    /* Tolerate early printk. */
+    s390_register_virtio_hypercall(KVM_S390_VIRTIO_NOTIFY,
+                                   virtio_ccw_hcall_early_printk);
+}
+
+static void ccw_init(QEMUMachineInitArgs *args)
+{
+    ram_addr_t my_ram_size = args->ram_size;
+    MemoryRegion *sysmem = get_system_memory();
+    MemoryRegion *ram = g_new(MemoryRegion, 1);
+    int shift = 0;
+    uint8_t *storage_keys;
+    int ret;
+    VirtualCssBus *css_bus;
+
+    /* s390x ram size detection needs a 16bit multiplier + an increment. So
+       guests > 64GB can be specified in 2MB steps etc. */
+    while ((my_ram_size >> (20 + shift)) > 65535) {
+        shift++;
+    }
+    my_ram_size = my_ram_size >> (20 + shift) << (20 + shift);
+
+    /* lets propagate the changed ram size into the global variable. */
+    ram_size = my_ram_size;
+
+    /* get a BUS */
+    css_bus = virtual_css_bus_init();
+    s390_sclp_init();
+    s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline,
+                      args->initrd_filename);
+
+    /* register hypercalls */
+    virtio_ccw_register_hcalls();
+
+    /* allocate RAM */
+    memory_region_init_ram(ram, "s390.ram", my_ram_size);
+    vmstate_register_ram_global(ram);
+    memory_region_add_subregion(sysmem, 0, ram);
+
+    /* allocate storage keys */
+    storage_keys = g_malloc0(my_ram_size / TARGET_PAGE_SIZE);
+
+    /* init CPUs */
+    s390_init_cpus(args->cpu_model, storage_keys);
+
+    if (kvm_enabled()) {
+        kvm_s390_enable_css_support(s390_cpu_addr2state(0));
+    }
+    /*
+     * Create virtual css and set it as default so that non mcss-e
+     * enabled guests only see virtio devices.
+     */
+    ret = css_create_css_image(VIRTUAL_CSSID, true);
+    assert(ret == 0);
+
+    /* Create VirtIO network adapters */
+    s390_create_virtio_net(BUS(css_bus), "virtio-net-ccw");
+}
+
+static QEMUMachine ccw_machine = {
+    .name = "s390-ccw-virtio",
+    .alias = "s390-ccw",
+    .desc = "VirtIO-ccw based S390 machine",
+    .init = ccw_init,
+    .block_default_type = IF_VIRTIO,
+    .no_cdrom = 1,
+    .no_floppy = 1,
+    .no_serial = 1,
+    .no_parallel = 1,
+    .no_sdcard = 1,
+    .use_sclp = 1,
+    .max_cpus = 255,
+    DEFAULT_MACHINE_OPTIONS,
+};
+
+static void ccw_machine_init(void)
+{
+    qemu_register_machine(&ccw_machine);
+}
+
+machine_init(ccw_machine_init)
diff --git a/hw/s390x/s390-virtio-hcall.c b/hw/s390x/s390-virtio-hcall.c
index d7938c0..ee62649 100644
--- a/hw/s390x/s390-virtio-hcall.c
+++ b/hw/s390x/s390-virtio-hcall.c
@@ -10,7 +10,7 @@
  */
 
 #include "cpu.h"
-#include "hw/s390-virtio.h"
+#include "hw/s390x/s390-virtio.h"
 
 #define MAX_DIAG_SUBCODES 255
 
diff --git a/hw/s390-virtio.c b/hw/s390x/s390-virtio.c
index 5edaabb..2a1d9ac 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -21,22 +21,22 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "hw.h"
+#include "hw/hw.h"
 #include "block/block.h"
 #include "sysemu/blockdev.h"
 #include "sysemu/sysemu.h"
 #include "net/net.h"
-#include "boards.h"
+#include "hw/boards.h"
 #include "monitor/monitor.h"
-#include "loader.h"
+#include "hw/loader.h"
 #include "hw/virtio.h"
 #include "hw/sysbus.h"
 #include "sysemu/kvm.h"
 #include "exec/address-spaces.h"
 
-#include "hw/s390-virtio-bus.h"
+#include "hw/s390x/s390-virtio-bus.h"
 #include "hw/s390x/sclp.h"
-#include "hw/s390-virtio.h"
+#include "hw/s390x/s390-virtio.h"
 
 //#define DEBUG_S390
 
@@ -86,6 +86,9 @@ static int s390_virtio_hcall_reset(const uint64_t *args)
     VirtIOS390Device *dev;
 
     dev = s390_virtio_bus_find_mem(s390_bus, mem);
+    if (dev == NULL) {
+        return -EINVAL;
+    }
     virtio_reset(dev->vdev);
     stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_STATUS, 0);
     s390_virtio_device_sync(dev);
@@ -147,13 +150,73 @@ unsigned s390_del_running_cpu(CPUS390XState *env)
     return s390_running_cpus;
 }
 
+void s390_init_ipl_dev(const char *kernel_filename,
+                       const char *kernel_cmdline,
+                       const char *initrd_filename)
+{
+    DeviceState *dev;
+
+    dev  = qdev_create(NULL, "s390-ipl");
+    if (kernel_filename) {
+        qdev_prop_set_string(dev, "kernel", kernel_filename);
+    }
+    if (initrd_filename) {
+        qdev_prop_set_string(dev, "initrd", initrd_filename);
+    }
+    qdev_prop_set_string(dev, "cmdline", kernel_cmdline);
+    qdev_init_nofail(dev);
+}
+
+void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys)
+{
+    int i;
+
+    if (cpu_model == NULL) {
+        cpu_model = "host";
+    }
+
+    ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
+
+    for (i = 0; i < smp_cpus; i++) {
+        S390CPU *cpu;
+
+        cpu = cpu_s390x_init(cpu_model);
+
+        ipi_states[i] = cpu;
+        cpu->env.halted = 1;
+        cpu->env.exception_index = EXCP_HLT;
+        cpu->env.storage_keys = storage_keys;
+    }
+}
+
+
+void s390_create_virtio_net(BusState *bus, const char *name)
+{
+    int i;
+
+    for (i = 0; i < nb_nics; i++) {
+        NICInfo *nd = &nd_table[i];
+        DeviceState *dev;
+
+        if (!nd->model) {
+            nd->model = g_strdup("virtio");
+        }
+
+        if (strcmp(nd->model, "virtio")) {
+            fprintf(stderr, "S390 only supports VirtIO nics\n");
+            exit(1);
+        }
+
+        dev = qdev_create(bus, name);
+        qdev_set_nic_properties(dev, nd);
+        qdev_init_nofail(dev);
+    }
+}
+
 /* PC hardware initialisation */
 static void s390_init(QEMUMachineInitArgs *args)
 {
     ram_addr_t my_ram_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    CPUS390XState *env = NULL;
-    DeviceState *dev;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     int shift = 0;
@@ -161,7 +224,6 @@ static void s390_init(QEMUMachineInitArgs *args)
     void *virtio_region;
     hwaddr virtio_region_len;
     hwaddr virtio_region_start;
-    int i;
 
     /* s390x ram size detection needs a 16bit multiplier + an increment. So
        guests > 64GB can be specified in 2MB steps etc. */
@@ -176,15 +238,8 @@ static void s390_init(QEMUMachineInitArgs *args)
     /* get a BUS */
     s390_bus = s390_virtio_bus_init(&my_ram_size);
     s390_sclp_init();
-    dev  = qdev_create(NULL, "s390-ipl");
-    if (args->kernel_filename) {
-        qdev_prop_set_string(dev, "kernel", args->kernel_filename);
-    }
-    if (args->initrd_filename) {
-        qdev_prop_set_string(dev, "initrd", args->initrd_filename);
-    }
-    qdev_prop_set_string(dev, "cmdline", args->kernel_cmdline);
-    qdev_init_nofail(dev);
+    s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline,
+                      args->initrd_filename);
 
     /* register hypercalls */
     s390_virtio_register_hcalls();
@@ -207,46 +262,10 @@ static void s390_init(QEMUMachineInitArgs *args)
     storage_keys = g_malloc0(my_ram_size / TARGET_PAGE_SIZE);
 
     /* init CPUs */
-    if (cpu_model == NULL) {
-        cpu_model = "host";
-    }
-
-    ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
-
-    for (i = 0; i < smp_cpus; i++) {
-        S390CPU *cpu;
-        CPUS390XState *tmp_env;
-
-        cpu = cpu_s390x_init(cpu_model);
-        tmp_env = &cpu->env;
-        if (!env) {
-            env = tmp_env;
-        }
-        ipi_states[i] = cpu;
-        tmp_env->halted = 1;
-        tmp_env->exception_index = EXCP_HLT;
-        tmp_env->storage_keys = storage_keys;
-    }
-
+    s390_init_cpus(args->cpu_model, storage_keys);
 
     /* Create VirtIO network adapters */
-    for(i = 0; i < nb_nics; i++) {
-        NICInfo *nd = &nd_table[i];
-        DeviceState *dev;
-
-        if (!nd->model) {
-            nd->model = g_strdup("virtio");
-        }
-
-        if (strcmp(nd->model, "virtio")) {
-            fprintf(stderr, "S390 only supports VirtIO nics\n");
-            exit(1);
-        }
-
-        dev = qdev_create((BusState *)s390_bus, "virtio-net-s390");
-        qdev_set_nic_properties(dev, nd);
-        qdev_init_nofail(dev);
-    }
+    s390_create_virtio_net((BusState *)s390_bus, "virtio-net-s390");
 }
 
 static QEMUMachine s390_machine = {
diff --git a/hw/s390-virtio.h b/hw/s390x/s390-virtio.h
index 25bb610..a6c4c19 100644
--- a/hw/s390-virtio.h
+++ b/hw/s390x/s390-virtio.h
@@ -15,8 +15,14 @@
 #define KVM_S390_VIRTIO_NOTIFY          0
 #define KVM_S390_VIRTIO_RESET           1
 #define KVM_S390_VIRTIO_SET_STATUS      2
+#define KVM_S390_VIRTIO_CCW_NOTIFY      3
 
 typedef int (*s390_virtio_fn)(const uint64_t *args);
 void s390_register_virtio_hypercall(uint64_t code, s390_virtio_fn fn);
 
+void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys);
+void s390_init_ipl_dev(const char *kernel_filename,
+                       const char *kernel_cmdline,
+                       const char *initrd_filename);
+void s390_create_virtio_net(BusState *bus, const char *name);
 #endif
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
new file mode 100644
index 0000000..231f81e
--- /dev/null
+++ b/hw/s390x/virtio-ccw.c
@@ -0,0 +1,960 @@
+/*
+ * virtio ccw target implementation
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "hw/hw.h"
+#include "block/block.h"
+#include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
+#include "net/net.h"
+#include "monitor/monitor.h"
+#include "hw/virtio.h"
+#include "hw/virtio-serial.h"
+#include "hw/virtio-net.h"
+#include "hw/sysbus.h"
+#include "qemu/bitops.h"
+#include "hw/virtio-bus.h"
+
+#include "ioinst.h"
+#include "css.h"
+#include "virtio-ccw.h"
+#include "trace.h"
+
+static int virtual_css_bus_reset(BusState *qbus)
+{
+    /* This should actually be modelled via the generic css */
+    css_reset();
+
+    /* we dont traverse ourself, return 0 */
+    return 0;
+}
+
+
+static void virtual_css_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *k = BUS_CLASS(klass);
+
+    k->reset = virtual_css_bus_reset;
+}
+
+static const TypeInfo virtual_css_bus_info = {
+    .name = TYPE_VIRTUAL_CSS_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = sizeof(VirtualCssBus),
+    .class_init = virtual_css_bus_class_init,
+};
+
+static const VirtIOBindings virtio_ccw_bindings;
+
+VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch)
+{
+    VirtIODevice *vdev = NULL;
+
+    if (sch->driver_data) {
+        vdev = ((VirtioCcwDevice *)sch->driver_data)->vdev;
+    }
+    return vdev;
+}
+
+VirtualCssBus *virtual_css_bus_init(void)
+{
+    VirtualCssBus *cbus;
+    BusState *bus;
+    DeviceState *dev;
+
+    /* Create bridge device */
+    dev = qdev_create(NULL, "virtual-css-bridge");
+    qdev_init_nofail(dev);
+
+    /* Create bus on bridge device */
+    bus = qbus_create(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css");
+    cbus = VIRTUAL_CSS_BUS(bus);
+
+    /* Enable hotplugging */
+    bus->allow_hotplug = 1;
+
+    return cbus;
+}
+
+/* Communication blocks used by several channel commands. */
+typedef struct VqInfoBlock {
+    uint64_t queue;
+    uint32_t align;
+    uint16_t index;
+    uint16_t num;
+} QEMU_PACKED VqInfoBlock;
+
+typedef struct VqConfigBlock {
+    uint16_t index;
+    uint16_t num_max;
+} QEMU_PACKED VqConfigBlock;
+
+typedef struct VirtioFeatDesc {
+    uint32_t features;
+    uint8_t index;
+} QEMU_PACKED VirtioFeatDesc;
+
+/* Specify where the virtqueues for the subchannel are in guest memory. */
+static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t addr, uint32_t align,
+                              uint16_t index, uint16_t num)
+{
+    VirtioCcwDevice *dev = sch->driver_data;
+
+    if (index > VIRTIO_PCI_QUEUE_MAX) {
+        return -EINVAL;
+    }
+
+    /* Current code in virtio.c relies on 4K alignment. */
+    if (addr && (align != 4096)) {
+        return -EINVAL;
+    }
+
+    if (!dev) {
+        return -EINVAL;
+    }
+
+    virtio_queue_set_addr(dev->vdev, index, addr);
+    if (!addr) {
+        virtio_queue_set_vector(dev->vdev, index, 0);
+    } else {
+        /* Fail if we don't have a big enough queue. */
+        /* TODO: Add interface to handle vring.num changing */
+        if (virtio_queue_get_num(dev->vdev, index) > num) {
+            return -EINVAL;
+        }
+        virtio_queue_set_vector(dev->vdev, index, index);
+    }
+    /* tell notify handler in case of config change */
+    dev->vdev->config_vector = VIRTIO_PCI_QUEUE_MAX;
+    return 0;
+}
+
+static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
+{
+    int ret;
+    VqInfoBlock info;
+    uint8_t status;
+    VirtioFeatDesc features;
+    void *config;
+    hwaddr indicators;
+    VqConfigBlock vq_config;
+    VirtioCcwDevice *dev = sch->driver_data;
+    bool check_len;
+    int len;
+    hwaddr hw_len;
+
+    if (!dev) {
+        return -EINVAL;
+    }
+
+    trace_virtio_ccw_interpret_ccw(sch->cssid, sch->ssid, sch->schid,
+                                   ccw.cmd_code);
+    check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC));
+
+    /* Look at the command. */
+    switch (ccw.cmd_code) {
+    case CCW_CMD_SET_VQ:
+        if (check_len) {
+            if (ccw.count != sizeof(info)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(info)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            info.queue = ldq_phys(ccw.cda);
+            info.align = ldl_phys(ccw.cda + sizeof(info.queue));
+            info.index = lduw_phys(ccw.cda + sizeof(info.queue)
+                                   + sizeof(info.align));
+            info.num = lduw_phys(ccw.cda + sizeof(info.queue)
+                                 + sizeof(info.align)
+                                 + sizeof(info.index));
+            ret = virtio_ccw_set_vqs(sch, info.queue, info.align, info.index,
+                                     info.num);
+            sch->curr_status.scsw.count = 0;
+        }
+        break;
+    case CCW_CMD_VDEV_RESET:
+        virtio_reset(dev->vdev);
+        ret = 0;
+        break;
+    case CCW_CMD_READ_FEAT:
+        if (check_len) {
+            if (ccw.count != sizeof(features)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(features)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            features.index = ldub_phys(ccw.cda + sizeof(features.features));
+            if (features.index < ARRAY_SIZE(dev->host_features)) {
+                features.features = dev->host_features[features.index];
+            } else {
+                /* Return zeroes if the guest supports more feature bits. */
+                features.features = 0;
+            }
+            stl_le_phys(ccw.cda, features.features);
+            sch->curr_status.scsw.count = ccw.count - sizeof(features);
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_WRITE_FEAT:
+        if (check_len) {
+            if (ccw.count != sizeof(features)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(features)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            features.index = ldub_phys(ccw.cda + sizeof(features.features));
+            features.features = ldl_le_phys(ccw.cda);
+            if (features.index < ARRAY_SIZE(dev->host_features)) {
+                if (dev->vdev->set_features) {
+                    dev->vdev->set_features(dev->vdev, features.features);
+                }
+                dev->vdev->guest_features = features.features;
+            } else {
+                /*
+                 * If the guest supports more feature bits, assert that it
+                 * passes us zeroes for those we don't support.
+                 */
+                if (features.features) {
+                    fprintf(stderr, "Guest bug: features[%i]=%x (expected 0)\n",
+                            features.index, features.features);
+                    /* XXX: do a unit check here? */
+                }
+            }
+            sch->curr_status.scsw.count = ccw.count - sizeof(features);
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_READ_CONF:
+        if (check_len) {
+            if (ccw.count > dev->vdev->config_len) {
+                ret = -EINVAL;
+                break;
+            }
+        }
+        len = MIN(ccw.count, dev->vdev->config_len);
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            dev->vdev->get_config(dev->vdev, dev->vdev->config);
+            /* XXX config space endianness */
+            cpu_physical_memory_write(ccw.cda, dev->vdev->config, len);
+            sch->curr_status.scsw.count = ccw.count - len;
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_WRITE_CONF:
+        if (check_len) {
+            if (ccw.count > dev->vdev->config_len) {
+                ret = -EINVAL;
+                break;
+            }
+        }
+        len = MIN(ccw.count, dev->vdev->config_len);
+        hw_len = len;
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            config = cpu_physical_memory_map(ccw.cda, &hw_len, 0);
+            if (!config) {
+                ret = -EFAULT;
+            } else {
+                len = hw_len;
+                /* XXX config space endianness */
+                memcpy(dev->vdev->config, config, len);
+                cpu_physical_memory_unmap(config, hw_len, 0, hw_len);
+                if (dev->vdev->set_config) {
+                    dev->vdev->set_config(dev->vdev, dev->vdev->config);
+                }
+                sch->curr_status.scsw.count = ccw.count - len;
+                ret = 0;
+            }
+        }
+        break;
+    case CCW_CMD_WRITE_STATUS:
+        if (check_len) {
+            if (ccw.count != sizeof(status)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(status)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            status = ldub_phys(ccw.cda);
+            virtio_set_status(dev->vdev, status);
+            if (dev->vdev->status == 0) {
+                virtio_reset(dev->vdev);
+            }
+            sch->curr_status.scsw.count = ccw.count - sizeof(status);
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_SET_IND:
+        if (check_len) {
+            if (ccw.count != sizeof(indicators)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(indicators)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        indicators = ldq_phys(ccw.cda);
+        if (!indicators) {
+            ret = -EFAULT;
+        } else {
+            dev->indicators = indicators;
+            sch->curr_status.scsw.count = ccw.count - sizeof(indicators);
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_SET_CONF_IND:
+        if (check_len) {
+            if (ccw.count != sizeof(indicators)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(indicators)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        indicators = ldq_phys(ccw.cda);
+        if (!indicators) {
+            ret = -EFAULT;
+        } else {
+            dev->indicators2 = indicators;
+            sch->curr_status.scsw.count = ccw.count - sizeof(indicators);
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_READ_VQ_CONF:
+        if (check_len) {
+            if (ccw.count != sizeof(vq_config)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(vq_config)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            vq_config.index = lduw_phys(ccw.cda);
+            vq_config.num_max = virtio_queue_get_num(dev->vdev,
+                                                     vq_config.index);
+            stw_phys(ccw.cda + sizeof(vq_config.index), vq_config.num_max);
+            sch->curr_status.scsw.count = ccw.count - sizeof(vq_config);
+            ret = 0;
+        }
+        break;
+    default:
+        ret = -ENOSYS;
+        break;
+    }
+    return ret;
+}
+
+static int virtio_ccw_device_init(VirtioCcwDevice *dev, VirtIODevice *vdev)
+{
+    unsigned int cssid = 0;
+    unsigned int ssid = 0;
+    unsigned int schid;
+    unsigned int devno;
+    bool have_devno = false;
+    bool found = false;
+    SubchDev *sch;
+    int ret;
+    int num;
+    DeviceState *parent = DEVICE(dev);
+
+    sch = g_malloc0(sizeof(SubchDev));
+
+    sch->driver_data = dev;
+    dev->sch = sch;
+
+    dev->vdev = vdev;
+    dev->indicators = 0;
+
+    /* Initialize subchannel structure. */
+    sch->channel_prog = 0x0;
+    sch->last_cmd_valid = false;
+    sch->orb = NULL;
+    /*
+     * Use a device number if provided. Otherwise, fall back to subchannel
+     * number.
+     */
+    if (dev->bus_id) {
+        num = sscanf(dev->bus_id, "%x.%x.%04x", &cssid, &ssid, &devno);
+        if (num == 3) {
+            if ((cssid > MAX_CSSID) || (ssid > MAX_SSID)) {
+                ret = -EINVAL;
+                error_report("Invalid cssid or ssid: cssid %x, ssid %x",
+                             cssid, ssid);
+                goto out_err;
+            }
+            /* Enforce use of virtual cssid. */
+            if (cssid != VIRTUAL_CSSID) {
+                ret = -EINVAL;
+                error_report("cssid %x not valid for virtio devices", cssid);
+                goto out_err;
+            }
+            if (css_devno_used(cssid, ssid, devno)) {
+                ret = -EEXIST;
+                error_report("Device %x.%x.%04x already exists", cssid, ssid,
+                             devno);
+                goto out_err;
+            }
+            sch->cssid = cssid;
+            sch->ssid = ssid;
+            sch->devno = devno;
+            have_devno = true;
+        } else {
+            ret = -EINVAL;
+            error_report("Malformed devno parameter '%s'", dev->bus_id);
+            goto out_err;
+        }
+    }
+
+    /* Find the next free id. */
+    if (have_devno) {
+        for (schid = 0; schid <= MAX_SCHID; schid++) {
+            if (!css_find_subch(1, cssid, ssid, schid)) {
+                sch->schid = schid;
+                css_subch_assign(cssid, ssid, schid, devno, sch);
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            ret = -ENODEV;
+            error_report("No free subchannel found for %x.%x.%04x", cssid, ssid,
+                         devno);
+            goto out_err;
+        }
+        trace_virtio_ccw_new_device(cssid, ssid, schid, devno,
+                                    "user-configured");
+    } else {
+        cssid = VIRTUAL_CSSID;
+        for (ssid = 0; ssid <= MAX_SSID; ssid++) {
+            for (schid = 0; schid <= MAX_SCHID; schid++) {
+                if (!css_find_subch(1, cssid, ssid, schid)) {
+                    sch->cssid = cssid;
+                    sch->ssid = ssid;
+                    sch->schid = schid;
+                    devno = schid;
+                    /*
+                     * If the devno is already taken, look further in this
+                     * subchannel set.
+                     */
+                    while (css_devno_used(cssid, ssid, devno)) {
+                        if (devno == MAX_SCHID) {
+                            devno = 0;
+                        } else if (devno == schid - 1) {
+                            ret = -ENODEV;
+                            error_report("No free devno found");
+                            goto out_err;
+                        } else {
+                            devno++;
+                        }
+                    }
+                    sch->devno = devno;
+                    css_subch_assign(cssid, ssid, schid, devno, sch);
+                    found = true;
+                    break;
+                }
+            }
+            if (found) {
+                break;
+            }
+        }
+        if (!found) {
+            ret = -ENODEV;
+            error_report("Virtual channel subsystem is full!");
+            goto out_err;
+        }
+        trace_virtio_ccw_new_device(cssid, ssid, schid, devno,
+                                    "auto-configured");
+    }
+
+    /* Build initial schib. */
+    css_sch_build_virtual_schib(sch, 0, VIRTIO_CCW_CHPID_TYPE);
+
+    sch->ccw_cb = virtio_ccw_cb;
+
+    /* Build senseid data. */
+    memset(&sch->id, 0, sizeof(SenseId));
+    sch->id.reserved = 0xff;
+    sch->id.cu_type = VIRTIO_CCW_CU_TYPE;
+    sch->id.cu_model = dev->vdev->device_id;
+
+    virtio_bind_device(vdev, &virtio_ccw_bindings, DEVICE(dev));
+    /* Only the first 32 feature bits are used. */
+    dev->host_features[0] = vdev->get_features(vdev, dev->host_features[0]);
+    dev->host_features[0] |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY;
+    dev->host_features[0] |= 0x1 << VIRTIO_F_BAD_FEATURE;
+
+    css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid,
+                          parent->hotplugged, 1);
+    return 0;
+
+out_err:
+    dev->sch = NULL;
+    g_free(sch);
+    return ret;
+}
+
+static int virtio_ccw_exit(VirtioCcwDevice *dev)
+{
+    SubchDev *sch = dev->sch;
+
+    if (sch) {
+        css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL);
+        g_free(sch);
+    }
+    dev->indicators = 0;
+    return 0;
+}
+
+static int virtio_ccw_net_init(VirtioCcwDevice *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_net_init((DeviceState *)dev, &dev->nic, &dev->net);
+    if (!vdev) {
+        return -1;
+    }
+
+    return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_net_exit(VirtioCcwDevice *dev)
+{
+    virtio_net_exit(dev->vdev);
+    return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_blk_init(VirtioCcwDevice *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_blk_init((DeviceState *)dev, &dev->blk);
+    if (!vdev) {
+        return -1;
+    }
+
+    return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_blk_exit(VirtioCcwDevice *dev)
+{
+    virtio_blk_exit(dev->vdev);
+    blockdev_mark_auto_del(dev->blk.conf.bs);
+    return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_serial_init(VirtioCcwDevice *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_serial_init((DeviceState *)dev, &dev->serial);
+    if (!vdev) {
+        return -1;
+    }
+
+    return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_serial_exit(VirtioCcwDevice *dev)
+{
+    virtio_serial_exit(dev->vdev);
+    return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_balloon_init(VirtioCcwDevice *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_balloon_init((DeviceState *)dev);
+    if (!vdev) {
+        return -1;
+    }
+
+    return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_balloon_exit(VirtioCcwDevice *dev)
+{
+    virtio_balloon_exit(dev->vdev);
+    return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_scsi_init(VirtioCcwDevice *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_scsi_init((DeviceState *)dev, &dev->scsi);
+    if (!vdev) {
+        return -1;
+    }
+
+    return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_scsi_exit(VirtioCcwDevice *dev)
+{
+    virtio_scsi_exit(dev->vdev);
+    return virtio_ccw_exit(dev);
+}
+
+/* DeviceState to VirtioCcwDevice. Note: used on datapath,
+ * be careful and test performance if you change this.
+ */
+static inline VirtioCcwDevice *to_virtio_ccw_dev_fast(DeviceState *d)
+{
+    return container_of(d, VirtioCcwDevice, parent_obj);
+}
+
+static void virtio_ccw_notify(DeviceState *d, uint16_t vector)
+{
+    VirtioCcwDevice *dev = to_virtio_ccw_dev_fast(d);
+    SubchDev *sch = dev->sch;
+    uint64_t indicators;
+
+    if (vector >= 128) {
+        return;
+    }
+
+    if (vector < VIRTIO_PCI_QUEUE_MAX) {
+        indicators = ldq_phys(dev->indicators);
+        indicators |= 1ULL << vector;
+        stq_phys(dev->indicators, indicators);
+    } else {
+        vector = 0;
+        indicators = ldq_phys(dev->indicators2);
+        indicators |= 1ULL << vector;
+        stq_phys(dev->indicators2, indicators);
+    }
+
+    css_conditional_io_interrupt(sch);
+
+}
+
+static unsigned virtio_ccw_get_features(DeviceState *d)
+{
+    VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
+
+    /* Only the first 32 feature bits are used. */
+    return dev->host_features[0];
+}
+
+static void virtio_ccw_reset(DeviceState *d)
+{
+    VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
+
+    virtio_reset(dev->vdev);
+    css_reset_sch(dev->sch);
+}
+
+/**************** Virtio-ccw Bus Device Descriptions *******************/
+
+static const VirtIOBindings virtio_ccw_bindings = {
+    .notify = virtio_ccw_notify,
+    .get_features = virtio_ccw_get_features,
+};
+
+static Property virtio_ccw_net_properties[] = {
+    DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+    DEFINE_VIRTIO_NET_FEATURES(VirtioCcwDevice, host_features[0]),
+    DEFINE_NIC_PROPERTIES(VirtioCcwDevice, nic),
+    DEFINE_PROP_UINT32("x-txtimer", VirtioCcwDevice,
+                       net.txtimer, TX_TIMER_INTERVAL),
+    DEFINE_PROP_INT32("x-txburst", VirtioCcwDevice,
+                      net.txburst, TX_BURST),
+    DEFINE_PROP_STRING("tx", VirtioCcwDevice, net.tx),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_net_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->init = virtio_ccw_net_init;
+    k->exit = virtio_ccw_net_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_net_properties;
+}
+
+static const TypeInfo virtio_ccw_net = {
+    .name          = "virtio-net-ccw",
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init    = virtio_ccw_net_class_init,
+};
+
+static Property virtio_ccw_blk_properties[] = {
+    DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+    DEFINE_BLOCK_PROPERTIES(VirtioCcwDevice, blk.conf),
+    DEFINE_PROP_STRING("serial", VirtioCcwDevice, blk.serial),
+#ifdef __linux__
+    DEFINE_PROP_BIT("scsi", VirtioCcwDevice, blk.scsi, 0, true),
+#endif
+    DEFINE_VIRTIO_BLK_FEATURES(VirtioCcwDevice, host_features[0]),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_blk_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->init = virtio_ccw_blk_init;
+    k->exit = virtio_ccw_blk_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_blk_properties;
+}
+
+static const TypeInfo virtio_ccw_blk = {
+    .name          = "virtio-blk-ccw",
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init    = virtio_ccw_blk_class_init,
+};
+
+static Property virtio_ccw_serial_properties[] = {
+    DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+    DEFINE_PROP_UINT32("max_ports", VirtioCcwDevice,
+                       serial.max_virtserial_ports, 31),
+    DEFINE_VIRTIO_COMMON_FEATURES(VirtioCcwDevice, host_features[0]),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_serial_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->init = virtio_ccw_serial_init;
+    k->exit = virtio_ccw_serial_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_serial_properties;
+}
+
+static const TypeInfo virtio_ccw_serial = {
+    .name          = "virtio-serial-ccw",
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init    = virtio_ccw_serial_class_init,
+};
+
+static Property virtio_ccw_balloon_properties[] = {
+    DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+    DEFINE_VIRTIO_COMMON_FEATURES(VirtioCcwDevice, host_features[0]),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_balloon_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->init = virtio_ccw_balloon_init;
+    k->exit = virtio_ccw_balloon_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_balloon_properties;
+}
+
+static const TypeInfo virtio_ccw_balloon = {
+    .name          = "virtio-balloon-ccw",
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init    = virtio_ccw_balloon_class_init,
+};
+
+static Property virtio_ccw_scsi_properties[] = {
+    DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+    DEFINE_VIRTIO_SCSI_PROPERTIES(VirtioCcwDevice, host_features[0], scsi),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_scsi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->init = virtio_ccw_scsi_init;
+    k->exit = virtio_ccw_scsi_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_scsi_properties;
+}
+
+static const TypeInfo virtio_ccw_scsi = {
+    .name          = "virtio-scsi-ccw",
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init    = virtio_ccw_scsi_class_init,
+};
+
+static int virtio_ccw_busdev_init(DeviceState *dev)
+{
+    VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
+    VirtIOCCWDeviceClass *_info = VIRTIO_CCW_DEVICE_GET_CLASS(dev);
+
+    virtio_ccw_bus_new(&_dev->bus, _dev);
+
+    return _info->init(_dev);
+}
+
+static int virtio_ccw_busdev_exit(DeviceState *dev)
+{
+    VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
+    VirtIOCCWDeviceClass *_info = VIRTIO_CCW_DEVICE_GET_CLASS(dev);
+
+    return _info->exit(_dev);
+}
+
+static int virtio_ccw_busdev_unplug(DeviceState *dev)
+{
+    VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
+    SubchDev *sch = _dev->sch;
+
+    /*
+     * We should arrive here only for device_del, since we don't support
+     * direct hot(un)plug of channels, but only through virtio.
+     */
+    assert(sch != NULL);
+    /* Subchannel is now disabled and no longer valid. */
+    sch->curr_status.pmcw.flags &= ~(PMCW_FLAGS_MASK_ENA |
+                                     PMCW_FLAGS_MASK_DNV);
+
+    css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid, 1, 0);
+
+    object_unparent(OBJECT(dev));
+    qdev_free(dev);
+    return 0;
+}
+
+static void virtio_ccw_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->init = virtio_ccw_busdev_init;
+    dc->exit = virtio_ccw_busdev_exit;
+    dc->unplug = virtio_ccw_busdev_unplug;
+    dc->bus_type = TYPE_VIRTUAL_CSS_BUS;
+
+}
+
+static const TypeInfo virtio_ccw_device_info = {
+    .name = TYPE_VIRTIO_CCW_DEVICE,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init = virtio_ccw_device_class_init,
+    .class_size = sizeof(VirtIOCCWDeviceClass),
+    .abstract = true,
+};
+
+/***************** Virtual-css Bus Bridge Device ********************/
+/* Only required to have the virtio bus as child in the system bus */
+
+static int virtual_css_bridge_init(SysBusDevice *dev)
+{
+    /* nothing */
+    return 0;
+}
+
+static void virtual_css_bridge_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+
+    k->init = virtual_css_bridge_init;
+    dc->no_user = 1;
+}
+
+static const TypeInfo virtual_css_bridge_info = {
+    .name          = "virtual-css-bridge",
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SysBusDevice),
+    .class_init    = virtual_css_bridge_class_init,
+};
+
+/* virtio-ccw-bus */
+
+void virtio_ccw_bus_new(VirtioBusState *bus, VirtioCcwDevice *dev)
+{
+    DeviceState *qdev = DEVICE(dev);
+    BusState *qbus;
+
+    qbus_create_inplace((BusState *)bus, TYPE_VIRTIO_CCW_BUS, qdev, NULL);
+    qbus = BUS(bus);
+    qbus->allow_hotplug = 0;
+}
+
+static void virtio_ccw_bus_class_init(ObjectClass *klass, void *data)
+{
+    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
+    BusClass *bus_class = BUS_CLASS(klass);
+
+    bus_class->max_dev = 1;
+    k->notify = virtio_ccw_notify;
+    k->get_features = virtio_ccw_get_features;
+}
+
+static const TypeInfo virtio_ccw_bus_info = {
+    .name = TYPE_VIRTIO_CCW_BUS,
+    .parent = TYPE_VIRTIO_BUS,
+    .instance_size = sizeof(VirtioCcwBusState),
+    .class_init = virtio_ccw_bus_class_init,
+};
+
+static void virtio_ccw_register(void)
+{
+    type_register_static(&virtio_ccw_bus_info);
+    type_register_static(&virtual_css_bus_info);
+    type_register_static(&virtio_ccw_device_info);
+    type_register_static(&virtio_ccw_serial);
+    type_register_static(&virtio_ccw_blk);
+    type_register_static(&virtio_ccw_net);
+    type_register_static(&virtio_ccw_balloon);
+    type_register_static(&virtio_ccw_scsi);
+    type_register_static(&virtual_css_bridge_info);
+}
+
+type_init(virtio_ccw_register)
diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h
new file mode 100644
index 0000000..48474b3
--- /dev/null
+++ b/hw/s390x/virtio-ccw.h
@@ -0,0 +1,98 @@
+/*
+ * virtio ccw target definitions
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390X_VIRTIO_CCW_H
+#define HW_S390X_VIRTIO_CCW_H
+
+#include <hw/virtio-blk.h>
+#include <hw/virtio-net.h>
+#include <hw/virtio-serial.h>
+#include <hw/virtio-scsi.h>
+#include <hw/virtio-bus.h>
+
+#define VIRTUAL_CSSID 0xfe
+
+#define VIRTIO_CCW_CU_TYPE 0x3832
+#define VIRTIO_CCW_CHPID_TYPE 0x32
+
+#define CCW_CMD_SET_VQ       0x13
+#define CCW_CMD_VDEV_RESET   0x33
+#define CCW_CMD_READ_FEAT    0x12
+#define CCW_CMD_WRITE_FEAT   0x11
+#define CCW_CMD_READ_CONF    0x22
+#define CCW_CMD_WRITE_CONF   0x21
+#define CCW_CMD_WRITE_STATUS 0x31
+#define CCW_CMD_SET_IND      0x43
+#define CCW_CMD_SET_CONF_IND 0x53
+#define CCW_CMD_READ_VQ_CONF 0x32
+
+#define TYPE_VIRTIO_CCW_DEVICE "virtio-ccw-device"
+#define VIRTIO_CCW_DEVICE(obj) \
+     OBJECT_CHECK(VirtioCcwDevice, (obj), TYPE_VIRTIO_CCW_DEVICE)
+#define VIRTIO_CCW_DEVICE_CLASS(klass) \
+     OBJECT_CLASS_CHECK(VirtIOCCWDeviceClass, (klass), TYPE_VIRTIO_CCW_DEVICE)
+#define VIRTIO_CCW_DEVICE_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(VirtIOCCWDeviceClass, (obj), TYPE_VIRTIO_CCW_DEVICE)
+
+typedef struct VirtioBusState VirtioCcwBusState;
+typedef struct VirtioBusClass VirtioCcwBusClass;
+
+#define TYPE_VIRTIO_CCW_BUS "virtio-ccw-bus"
+#define VIRTIO_CCW_BUS(obj) \
+     OBJECT_CHECK(VirtioCcwBus, (obj), TYPE_VIRTIO_CCW_BUS)
+#define VIRTIO_CCW_BUS_GET_CLASS(obj) \
+    OBJECT_CHECK(VirtioCcwBusState, (obj), TYPE_VIRTIO_CCW_BUS)
+#define VIRTIO_CCW_BUS_CLASS(klass) \
+    OBJECT_CLASS_CHECK(VirtioCcwBusClass, klass, TYPE_VIRTIO_CCW_BUS)
+
+typedef struct VirtioCcwDevice VirtioCcwDevice;
+
+void virtio_ccw_bus_new(VirtioBusState *bus, VirtioCcwDevice *dev);
+
+typedef struct VirtIOCCWDeviceClass {
+    DeviceClass parent_class;
+    int (*init)(VirtioCcwDevice *dev);
+    int (*exit)(VirtioCcwDevice *dev);
+} VirtIOCCWDeviceClass;
+
+/* Change here if we want to support more feature bits. */
+#define VIRTIO_CCW_FEATURE_SIZE 1
+
+struct VirtioCcwDevice {
+    DeviceState parent_obj;
+    SubchDev *sch;
+    VirtIODevice *vdev;
+    char *bus_id;
+    VirtIOBlkConf blk;
+    NICConf nic;
+    uint32_t host_features[VIRTIO_CCW_FEATURE_SIZE];
+    virtio_serial_conf serial;
+    virtio_net_conf net;
+    VirtIOSCSIConf scsi;
+    VirtioBusState bus;
+    /* Guest provided values: */
+    hwaddr indicators;
+    hwaddr indicators2;
+};
+
+/* virtual css bus type */
+typedef struct VirtualCssBus {
+    BusState parent_obj;
+} VirtualCssBus;
+
+#define TYPE_VIRTUAL_CSS_BUS "virtual-css-bus"
+#define VIRTUAL_CSS_BUS(obj) \
+     OBJECT_CHECK(VirtualCssBus, (obj), TYPE_VIRTUAL_CSS_BUS)
+
+VirtualCssBus *virtual_css_bus_init(void);
+void virtio_ccw_device_update_status(SubchDev *sch);
+VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch);
+#endif
diff --git a/hw/sun4m.c b/hw/sun4m.c
index 035a011..9903f44 100644
--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -1021,6 +1021,7 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size,
                  hwdef->ecc_version);
 
     fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
@@ -1665,6 +1666,7 @@ static void sun4d_hw_init(const struct sun4d_hwdef *hwdef, ram_addr_t RAM_size,
                "Sun4d");
 
     fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
@@ -1865,6 +1867,7 @@ static void sun4c_hw_init(const struct sun4c_hwdef *hwdef, ram_addr_t RAM_size,
                "Sun4c");
 
     fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
diff --git a/hw/sun4u.c b/hw/sun4u.c
index b891b84..9fbda29 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -878,6 +878,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
                            (uint8_t *)&nd_table[0].macaddr);
 
     fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 3040bc6..c0a7902 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -14,6 +14,7 @@
  */
 
 #include "qemu/iov.h"
+#include "qemu/timer.h"
 #include "qemu-common.h"
 #include "virtio.h"
 #include "pc.h"
@@ -22,6 +23,7 @@
 #include "virtio-balloon.h"
 #include "sysemu/kvm.h"
 #include "exec/address-spaces.h"
+#include "qapi/visitor.h"
 
 #if defined(__linux__)
 #include <sys/mman.h>
@@ -36,6 +38,9 @@ typedef struct VirtIOBalloon
     uint64_t stats[VIRTIO_BALLOON_S_NR];
     VirtQueueElement stats_vq_elem;
     size_t stats_vq_offset;
+    QEMUTimer *stats_timer;
+    int64_t stats_last_update;
+    int64_t stats_poll_interval;
     DeviceState *qdev;
 } VirtIOBalloon;
 
@@ -53,6 +58,16 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+static const char *balloon_stat_names[] = {
+   [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
+   [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
+   [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults",
+   [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults",
+   [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory",
+   [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory",
+   [VIRTIO_BALLOON_S_NR] = NULL
+};
+
 /*
  * reset_stats - Mark all items in the stats array as unset
  *
@@ -67,6 +82,118 @@ static inline void reset_stats(VirtIOBalloon *dev)
     for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
 }
 
+static bool balloon_stats_supported(const VirtIOBalloon *s)
+{
+    return s->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ);
+}
+
+static bool balloon_stats_enabled(const VirtIOBalloon *s)
+{
+    return s->stats_poll_interval > 0;
+}
+
+static void balloon_stats_destroy_timer(VirtIOBalloon *s)
+{
+    if (balloon_stats_enabled(s)) {
+        qemu_del_timer(s->stats_timer);
+        qemu_free_timer(s->stats_timer);
+        s->stats_timer = NULL;
+        s->stats_poll_interval = 0;
+    }
+}
+
+static void balloon_stats_change_timer(VirtIOBalloon *s, int secs)
+{
+    qemu_mod_timer(s->stats_timer, qemu_get_clock_ms(vm_clock) + secs * 1000);
+}
+
+static void balloon_stats_poll_cb(void *opaque)
+{
+    VirtIOBalloon *s = opaque;
+
+    if (!balloon_stats_supported(s)) {
+        /* re-schedule */
+        balloon_stats_change_timer(s, s->stats_poll_interval);
+        return;
+    }
+
+    virtqueue_push(s->svq, &s->stats_vq_elem, s->stats_vq_offset);
+    virtio_notify(&s->vdev, s->svq);
+}
+
+static void balloon_stats_get_all(Object *obj, struct Visitor *v,
+                                  void *opaque, const char *name, Error **errp)
+{
+    VirtIOBalloon *s = opaque;
+    int i;
+
+    if (!s->stats_last_update) {
+        error_setg(errp, "guest hasn't updated any stats yet");
+        return;
+    }
+
+    visit_start_struct(v, NULL, "guest-stats", name, 0, errp);
+    visit_type_int(v, &s->stats_last_update, "last-update", errp);
+
+    visit_start_struct(v, NULL, NULL, "stats", 0, errp);
+    for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
+        visit_type_int64(v, (int64_t *) &s->stats[i], balloon_stat_names[i],
+                         errp);
+    }
+    visit_end_struct(v, errp);
+
+    visit_end_struct(v, errp);
+}
+
+static void balloon_stats_get_poll_interval(Object *obj, struct Visitor *v,
+                                            void *opaque, const char *name,
+                                            Error **errp)
+{
+    VirtIOBalloon *s = opaque;
+    visit_type_int(v, &s->stats_poll_interval, name, errp);
+}
+
+static void balloon_stats_set_poll_interval(Object *obj, struct Visitor *v,
+                                            void *opaque, const char *name,
+                                            Error **errp)
+{
+    VirtIOBalloon *s = opaque;
+    int64_t value;
+
+    visit_type_int(v, &value, name, errp);
+    if (error_is_set(errp)) {
+        return;
+    }
+
+    if (value < 0) {
+        error_setg(errp, "timer value must be greater than zero");
+        return;
+    }
+
+    if (value == s->stats_poll_interval) {
+        return;
+    }
+
+    if (value == 0) {
+        /* timer=0 disables the timer */
+        balloon_stats_destroy_timer(s);
+        return;
+    }
+
+    if (balloon_stats_enabled(s)) {
+        /* timer interval change */
+        s->stats_poll_interval = value;
+        balloon_stats_change_timer(s, value);
+        return;
+    }
+
+    /* create a new timer */
+    g_assert(s->stats_timer == NULL);
+    s->stats_timer = qemu_new_timer_ms(vm_clock, balloon_stats_poll_cb, s);
+    s->stats_poll_interval = value;
+    balloon_stats_change_timer(s, 0);
+}
+
 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIOBalloon *s = to_virtio_balloon(vdev);
@@ -107,9 +234,10 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
     VirtQueueElement *elem = &s->stats_vq_elem;
     VirtIOBalloonStat stat;
     size_t offset = 0;
+    qemu_timeval tv;
 
     if (!virtqueue_pop(vq, elem)) {
-        return;
+        goto out;
     }
 
     /* Initialize the stats to get rid of any stale values.  This is only
@@ -128,6 +256,18 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
             s->stats[tag] = val;
     }
     s->stats_vq_offset = offset;
+
+    if (qemu_gettimeofday(&tv) < 0) {
+        fprintf(stderr, "warning: %s: failed to get time of day\n", __func__);
+        goto out;
+    }
+
+    s->stats_last_update = tv.tv_sec;
+
+out:
+    if (balloon_stats_enabled(s)) {
+        balloon_stats_change_timer(s, s->stats_poll_interval);
+    }
 }
 
 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
@@ -164,28 +304,6 @@ static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
 static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
 {
     VirtIOBalloon *dev = opaque;
-
-#if 0
-    /* Disable guest-provided stats for now. For more details please check:
-     * https://bugzilla.redhat.com/show_bug.cgi?id=623903
-     *
-     * If you do enable it (which is probably not going to happen as we
-     * need a new command for it), remember that you also need to fill the
-     * appropriate members of the BalloonInfo structure so that the stats
-     * are returned to the client.
-     */
-    if (dev->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ)) {
-        virtqueue_push(dev->svq, &dev->stats_vq_elem, dev->stats_vq_offset);
-        virtio_notify(&dev->vdev, dev->svq);
-        return;
-    }
-#endif
-
-    /* Stats are not supported.  Clear out any stale values that might
-     * have been set by a more featureful guest kernel.
-     */
-    reset_stats(dev);
-
     info->actual = ram_size - ((uint64_t) dev->actual <<
                                VIRTIO_BALLOON_PFN_SHIFT);
 }
@@ -255,12 +373,18 @@ VirtIODevice *virtio_balloon_init(DeviceState *dev)
     s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
     s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats);
 
-    reset_stats(s);
-
     s->qdev = dev;
     register_savevm(dev, "virtio-balloon", -1, 1,
                     virtio_balloon_save, virtio_balloon_load, s);
 
+    object_property_add(OBJECT(dev), "guest-stats", "guest statistics",
+                        balloon_stats_get_all, NULL, NULL, s, NULL);
+
+    object_property_add(OBJECT(dev), "guest-stats-polling-interval", "int",
+                        balloon_stats_get_poll_interval,
+                        balloon_stats_set_poll_interval,
+                        NULL, s, NULL);
+
     return &s->vdev;
 }
 
@@ -268,6 +392,7 @@ void virtio_balloon_exit(VirtIODevice *vdev)
 {
     VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
 
+    balloon_stats_destroy_timer(s);
     qemu_remove_balloon_handler(s);
     unregister_savevm(s->qdev, "virtio-balloon", s);
     virtio_cleanup(vdev);
diff --git a/hw/xilinx_ethlite.c b/hw/xilinx_ethlite.c
index 2254851..11dfbc3 100644
--- a/hw/xilinx_ethlite.c
+++ b/hw/xilinx_ethlite.c
@@ -89,7 +89,7 @@ eth_read(void *opaque, hwaddr addr, unsigned int size)
         case R_RX_CTRL1:
         case R_RX_CTRL0:
             r = s->regs[addr];
-            D(qemu_log("%s %x=%x\n", __func__, addr * 4, r));
+            D(qemu_log("%s " TARGET_FMT_plx "=%x\n", __func__, addr * 4, r));
             break;
 
         default:
@@ -115,7 +115,8 @@ eth_write(void *opaque, hwaddr addr,
             if (addr == R_TX_CTRL1)
                 base = 0x800 / 4;
 
-            D(qemu_log("%s addr=%x val=%x\n", __func__, addr * 4, value));
+            D(qemu_log("%s addr=" TARGET_FMT_plx " val=%x\n",
+                       __func__, addr * 4, value));
             if ((value & (CTRL_P | CTRL_S)) == CTRL_S) {
                 qemu_send_packet(&s->nic->nc,
                                  (void *) &s->regs[base],
@@ -135,12 +136,16 @@ eth_write(void *opaque, hwaddr addr,
             break;
 
         /* Keep these native.  */
+        case R_RX_CTRL0:
+        case R_RX_CTRL1:
+            if (!(value & CTRL_S)) {
+                qemu_flush_queued_packets(&s->nic->nc);
+            }
         case R_TX_LEN0:
         case R_TX_LEN1:
         case R_TX_GIE0:
-        case R_RX_CTRL0:
-        case R_RX_CTRL1:
-            D(qemu_log("%s addr=%x val=%x\n", __func__, addr * 4, value));
+            D(qemu_log("%s addr=" TARGET_FMT_plx " val=%x\n",
+                       __func__, addr * 4, value));
             s->regs[addr] = value;
             break;
 
@@ -163,9 +168,9 @@ static const MemoryRegionOps eth_ops = {
 static int eth_can_rx(NetClientState *nc)
 {
     struct xlx_ethlite *s = DO_UPCAST(NICState, nc, nc)->opaque;
-    int r;
-    r = !(s->regs[R_RX_CTRL0] & CTRL_S);
-    return r;
+    unsigned int rxbase = s->rxbuf * (0x800 / 4);
+
+    return !(s->regs[rxbase + R_RX_CTRL0] & CTRL_S);
 }
 
 static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size)
@@ -182,7 +187,7 @@ static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size)
         return -1;
     }
 
-    D(qemu_log("%s %d rxbase=%x\n", __func__, size, rxbase));
+    D(qemu_log("%s %zd rxbase=%x\n", __func__, size, rxbase));
     memcpy(&s->regs[rxbase + R_RX_BUF0], buf, size);
 
     s->regs[rxbase + R_RX_CTRL0] |= CTRL_S;
diff --git a/include/block/block.h b/include/block/block.h
index ffd1936..5c3b911 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -309,6 +309,10 @@ int bdrv_get_flags(BlockDriverState *bs);
 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
                           const uint8_t *buf, int nb_sectors);
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+void bdrv_round_to_clusters(BlockDriverState *bs,
+                            int64_t sector_num, int nb_sectors,
+                            int64_t *cluster_sector_num,
+                            int *cluster_nb_sectors);
 
 const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
 void bdrv_get_backing_filename(BlockDriverState *bs,
@@ -351,13 +355,12 @@ void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
 
-#define BDRV_SECTORS_PER_DIRTY_CHUNK 2048
-
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable);
+struct HBitmapIter;
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
-int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector);
+void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
 int64_t bdrv_get_dirty_count(BlockDriverState *bs);
 
 void bdrv_enable_copy_on_read(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f83ffb8..f7279b9 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -32,6 +32,7 @@
 #include "qapi-types.h"
 #include "qapi/qmp/qerror.h"
 #include "monitor/monitor.h"
+#include "qemu/hbitmap.h"
 
 #define BLOCK_FLAG_ENCRYPT          1
 #define BLOCK_FLAG_COMPAT6          4
@@ -275,8 +276,7 @@ struct BlockDriverState {
     bool iostatus_enabled;
     BlockDeviceIoStatus iostatus;
     char device_name[32];
-    unsigned long *dirty_bitmap;
-    int64_t dirty_count;
+    HBitmap *dirty_bitmap;
     int in_use; /* users other than guest access, eg. block migration */
     QTAILQ_ENTRY(BlockDriverState) list;
 
@@ -344,6 +344,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
  * @bs: Block device to operate on.
  * @target: Block device to write to.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @granularity: The chosen granularity for the dirty bitmap.
+ * @buf_size: The amount of data that can be in flight at one time.
  * @mode: Whether to collapse all images in the chain to the target.
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
@@ -357,8 +359,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
  * @bs will be switched to read from @target.
  */
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, MirrorSyncMode mode,
-                  BlockdevOnError on_source_error,
+                  int64_t speed, int64_t granularity, int64_t buf_size,
+                  MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index ca464bb..af2379f 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -68,6 +68,9 @@
 #if !defined(ECANCELED)
 #define ECANCELED 4097
 #endif
+#if !defined(EMEDIUMTYPE)
+#define EMEDIUMTYPE 4098
+#endif
 #ifndef TIME_MAX
 #define TIME_MAX LONG_MAX
 #endif
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
new file mode 100644
index 0000000..73f5d1d
--- /dev/null
+++ b/include/qemu/hbitmap.h
@@ -0,0 +1,208 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef HBITMAP_H
+#define HBITMAP_H 1
+
+#include <limits.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include "bitops.h"
+
+typedef struct HBitmap HBitmap;
+typedef struct HBitmapIter HBitmapIter;
+
+#define BITS_PER_LEVEL         (BITS_PER_LONG == 32 ? 5 : 6)
+
+/* For 32-bit, the largest that fits in a 4 GiB address space.
+ * For 64-bit, the number of sectors in 1 PiB.  Good luck, in
+ * either case... :)
+ */
+#define HBITMAP_LOG_MAX_SIZE   (BITS_PER_LONG == 32 ? 34 : 41)
+
+/* We need to place a sentinel in level 0 to speed up iteration.  Thus,
+ * we do this instead of HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL.  The
+ * difference is that it allocates an extra level when HBITMAP_LOG_MAX_SIZE
+ * is an exact multiple of BITS_PER_LEVEL.
+ */
+#define HBITMAP_LEVELS         ((HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL) + 1)
+
+struct HBitmapIter {
+    const HBitmap *hb;
+
+    /* Copied from hb for access in the inline functions (hb is opaque).  */
+    int granularity;
+
+    /* Entry offset into the last-level array of longs.  */
+    size_t pos;
+
+    /* The currently-active path in the tree.  Each item of cur[i] stores
+     * the bits (i.e. the subtrees) yet to be processed under that node.
+     */
+    unsigned long cur[HBITMAP_LEVELS];
+};
+
+/**
+ * hbitmap_alloc:
+ * @size: Number of bits in the bitmap.
+ * @granularity: Granularity of the bitmap.  Aligned groups of 2^@granularity
+ * bits will be represented by a single bit.  Each operation on a
+ * range of bits first rounds the bits to determine which group they land
+ * in, and then affect the entire set; iteration will only visit the first
+ * bit of each group.
+ *
+ * Allocate a new HBitmap.
+ */
+HBitmap *hbitmap_alloc(uint64_t size, int granularity);
+
+/**
+ * hbitmap_empty:
+ * @hb: HBitmap to operate on.
+ *
+ * Return whether the bitmap is empty.
+ */
+bool hbitmap_empty(const HBitmap *hb);
+
+/**
+ * hbitmap_granularity:
+ * @hb: HBitmap to operate on.
+ *
+ * Return the granularity of the HBitmap.
+ */
+int hbitmap_granularity(const HBitmap *hb);
+
+/**
+ * hbitmap_count:
+ * @hb: HBitmap to operate on.
+ *
+ * Return the number of bits set in the HBitmap.
+ */
+uint64_t hbitmap_count(const HBitmap *hb);
+
+/**
+ * hbitmap_set:
+ * @hb: HBitmap to operate on.
+ * @start: First bit to set (0-based).
+ * @count: Number of bits to set.
+ *
+ * Set a consecutive range of bits in an HBitmap.
+ */
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_reset:
+ * @hb: HBitmap to operate on.
+ * @start: First bit to reset (0-based).
+ * @count: Number of bits to reset.
+ *
+ * Reset a consecutive range of bits in an HBitmap.
+ */
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_get:
+ * @hb: HBitmap to operate on.
+ * @item: Bit to query (0-based).
+ *
+ * Return whether the @item-th bit in an HBitmap is set.
+ */
+bool hbitmap_get(const HBitmap *hb, uint64_t item);
+
+/**
+ * hbitmap_free:
+ * @hb: HBitmap to operate on.
+ *
+ * Free an HBitmap and all of its associated memory.
+ */
+void hbitmap_free(HBitmap *hb);
+
+/**
+ * hbitmap_iter_init:
+ * @hbi: HBitmapIter to initialize.
+ * @hb: HBitmap to iterate on.
+ * @first: First bit to visit (0-based, must be strictly less than the
+ * size of the bitmap).
+ *
+ * Set up @hbi to iterate on the HBitmap @hb.  hbitmap_iter_next will return
+ * the lowest-numbered bit that is set in @hb, starting at @first.
+ *
+ * Concurrent setting of bits is acceptable, and will at worst cause the
+ * iteration to miss some of those bits.  Resetting bits before the current
+ * position of the iterator is also okay.  However, concurrent resetting of
+ * bits can lead to unexpected behavior if the iterator has not yet reached
+ * those bits.
+ */
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first);
+
+/* hbitmap_iter_skip_words:
+ * @hbi: HBitmapIter to operate on.
+ *
+ * Internal function used by hbitmap_iter_next and hbitmap_iter_next_word.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi);
+
+/**
+ * hbitmap_iter_next:
+ * @hbi: HBitmapIter to operate on.
+ *
+ * Return the next bit that is set in @hbi's associated HBitmap,
+ * or -1 if all remaining bits are zero.
+ */
+static inline int64_t hbitmap_iter_next(HBitmapIter *hbi)
+{
+    unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
+    int64_t item;
+
+    if (cur == 0) {
+        cur = hbitmap_iter_skip_words(hbi);
+        if (cur == 0) {
+            return -1;
+        }
+    }
+
+    /* The next call will resume work from the next bit.  */
+    hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
+    item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ffsl(cur) - 1;
+
+    return item << hbi->granularity;
+}
+
+/**
+ * hbitmap_iter_next_word:
+ * @hbi: HBitmapIter to operate on.
+ * @p_cur: Location where to store the next non-zero word.
+ *
+ * Return the index of the next nonzero word that is set in @hbi's
+ * associated HBitmap, and set *p_cur to the content of that word
+ * (bits before the index that was passed to hbitmap_iter_init are
+ * trimmed on the first call).  Return -1, and set *p_cur to zero,
+ * if all remaining words are zero.
+ */
+static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_cur)
+{
+    unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
+
+    if (cur == 0) {
+        cur = hbitmap_iter_skip_words(hbi);
+        if (cur == 0) {
+            *p_cur = 0;
+            return -1;
+        }
+    }
+
+    /* The next call will resume work from the next word.  */
+    hbi->cur[HBITMAP_LEVELS - 1] = 0;
+    *p_cur = cur;
+    return hbi->pos;
+}
+
+
+#endif
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 81c9a75..2a32be4 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -26,6 +26,7 @@
 #define HOST_UTILS_H 1
 
 #include "qemu/compiler.h"   /* QEMU_GNUC_PREREQ */
+#include <string.h>     /* ffsl */
 
 #if defined(__x86_64__)
 #define __HAVE_FAST_MULU64__
@@ -237,4 +238,29 @@ static inline int ctpop64(uint64_t val)
 #endif
 }
 
+/* glibc does not provide an inline version of ffsl, so always define
+ * ours.  We need to give it a different name, however.
+ */
+#ifdef __GLIBC__
+#define ffsl qemu_ffsl
+#endif
+static inline int ffsl(long val)
+{
+    if (!val) {
+        return 0;
+    }
+
+#if QEMU_GNUC_PREREQ(3, 4)
+    return __builtin_ctzl(val) + 1;
+#else
+    if (sizeof(long) == 4) {
+        return ctz32(val) + 1;
+    } else if (sizeof(long) == 8) {
+        return ctz64(val) + 1;
+    } else {
+        abort();
+    }
+#endif
+}
+
 #endif
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 773caf9..46f2247 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -40,6 +40,8 @@ typedef struct CPUState CPUState;
 
 /**
  * CPUClass:
+ * @class_by_name: Callback to map -cpu command line model name to an
+ * instantiatable CPU type.
  * @reset: Callback to reset the #CPUState to its initial state.
  *
  * Represents a CPU family or model.
@@ -49,6 +51,8 @@ typedef struct CPUClass {
     DeviceClass parent_class;
     /*< public >*/
 
+    ObjectClass *(*class_by_name)(const char *cpu_model);
+
     void (*reset)(CPUState *cpu);
 } CPUClass;
 
@@ -89,10 +93,8 @@ struct CPUState {
     bool stop;
     bool stopped;
 
-#if !defined(CONFIG_USER_ONLY)
     int kvm_fd;
     bool kvm_vcpu_dirty;
-#endif
     struct KVMState *kvm_state;
     struct kvm_run *kvm_run;
 
@@ -108,6 +110,17 @@ struct CPUState {
 void cpu_reset(CPUState *cpu);
 
 /**
+ * cpu_class_by_name:
+ * @typename: The CPU base type.
+ * @cpu_model: The model string without any parameters.
+ *
+ * Looks up a CPU #ObjectClass matching name @cpu_model.
+ *
+ * Returns: A #CPUClass or %NULL if not matching class is found.
+ */
+ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model);
+
+/**
  * qemu_cpu_has_work:
  * @cpu: The vCPU to check.
  *
diff --git a/include/qom/object.h b/include/qom/object.h
index 8e16ea8..48e80ba 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -691,6 +691,14 @@ ObjectClass *object_class_get_parent(ObjectClass *klass);
 const char *object_class_get_name(ObjectClass *klass);
 
 /**
+ * object_class_is_abstract:
+ * @klass: The class to obtain the abstractness for.
+ *
+ * Returns: %true if @klass is abstract, %false otherwise.
+ */
+bool object_class_is_abstract(ObjectClass *klass);
+
+/**
  * object_class_by_name:
  * @typename: The QOM typename to obtain the class for.
  *
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 81bd817..f7f6854 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -13,9 +13,16 @@ void cpu_synchronize_all_post_init(void);
 
 void qtest_clock_warp(int64_t dest);
 
+#ifndef CONFIG_USER_ONLY
 /* vl.c */
 extern int smp_cores;
 extern int smp_threads;
+#else
+/* *-user doesn't have configurable SMP topology */
+#define smp_cores   1
+#define smp_threads 1
+#endif
+
 void set_numa_modes(void);
 void set_cpu_log(const char *optarg);
 void set_cpu_log_filename(const char *optarg);
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 6bdd513..6e6dfb3 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -36,6 +36,7 @@
 #define KVM_FEATURE_ASYNC_PF     0
 #define KVM_FEATURE_STEAL_TIME   0
 #define KVM_FEATURE_PV_EOI       0
+#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 0
 #endif
 
 extern int kvm_allowed;
@@ -158,7 +159,7 @@ int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap);
 int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset);
 #endif
 
-int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr);
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
 int kvm_on_sigbus(int code, void *addr);
 
 /* internal API */
@@ -195,6 +196,9 @@ int kvm_arch_init(KVMState *s);
 
 int kvm_arch_init_vcpu(CPUState *cpu);
 
+/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
+unsigned long kvm_arch_vcpu_id(CPUState *cpu);
+
 void kvm_arch_reset_vcpu(CPUState *cpu);
 
 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
diff --git a/kvm-all.c b/kvm-all.c
index 6278d61..04ec2d5 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -222,7 +222,7 @@ int kvm_init_vcpu(CPUState *cpu)
 
     DPRINTF("kvm_init_vcpu\n");
 
-    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, cpu->cpu_index);
+    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)kvm_arch_vcpu_id(cpu));
     if (ret < 0) {
         DPRINTF("kvm_create_vcpu failed\n");
         goto err;
@@ -2026,9 +2026,8 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
     return 0;
 }
 
-int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
     return kvm_arch_on_sigbus_vcpu(cpu, code, addr);
 }
 
diff --git a/kvm-stub.c b/kvm-stub.c
index 47f8dca..760aadc 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -112,7 +112,7 @@ int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign, uint
     return -ENOSYS;
 }
 
-int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
 {
     return 1;
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index 6d7252b..3a4817b 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -325,6 +325,80 @@
 { 'command': 'query-chardev', 'returns': ['ChardevInfo'] }
 
 ##
+# @DataFormat:
+#
+# An enumeration of data format.
+#
+# @utf8: The data format is 'utf8'.
+#
+# @base64: The data format is 'base64'.
+#
+# Since: 1.4
+##
+{ 'enum': 'DataFormat'
+  'data': [ 'utf8', 'base64' ] }
+
+##
+# @memchar-write:
+#
+# Provide writing interface for memchardev. Write data to char
+# device 'memory'.
+#
+# @device: the name of the memory char device.
+#
+# @size: the size to write in bytes.
+#
+# @data: the source data write to memchar.
+#
+# @format: #optional the format of the data write to chardev 'memory',
+#          by default is 'utf8'.
+#
+# Returns: Nothing on success
+#          If @device is not a valid char device, DeviceNotFound
+#
+# Since: 1.4
+##
+{ 'command': 'memchar-write',
+  'data': {'device': 'str', 'size': 'int', 'data': 'str',
+           '*format': 'DataFormat'} }
+
+##
+# @MemCharRead
+#
+# Result of QMP command memchar-read.
+#
+# @data: The data read from memchar as string.
+#
+# @count: The numbers of bytes read from.
+#
+# Since: 1.4
+##
+{ 'type': 'MemCharRead',
+  'data': { 'data': 'str', 'count': 'int' } }
+
+##
+# @memchar-read:
+#
+# Provide read interface for memchardev. Read from the char
+# device 'memory' and return the data.
+#
+# @device: the name of the memory char device.
+#
+# @size: the size to read in bytes.
+#
+# @format: #optional the format of the data want to read from
+#          memchardev, by default is 'utf8'.
+#
+# Returns: @MemCharRead
+#          If @device is not a valid memchr device, DeviceNotFound
+#
+# Since: 1.4
+##
+{ 'command': 'memchar-read',
+  'data': {'device': 'str', 'size': 'int', '*format': 'DataFormat'},
+  'returns': 'MemCharRead' }
+
+##
 # @CommandInfo:
 #
 # Information about a QMP command
@@ -667,10 +741,12 @@
 #
 # @count: number of dirty bytes according to the dirty bitmap
 #
+# @granularity: granularity of the dirty bitmap in bytes (since 1.4)
+#
 # Since: 1.3
 ##
 { 'type': 'BlockDirtyInfo',
-  'data': {'count': 'int'} }
+  'data': {'count': 'int', 'granularity': 'int'} }
 
 ##
 # @BlockInfo:
@@ -977,28 +1053,10 @@
 #
 # @actual: the number of bytes the balloon currently contains
 #
-# @mem_swapped_in: #optional number of pages swapped in within the guest
-#
-# @mem_swapped_out: #optional number of pages swapped out within the guest
-#
-# @major_page_faults: #optional number of major page faults within the guest
-#
-# @minor_page_faults: #optional number of minor page faults within the guest
-#
-# @free_mem: #optional amount of memory (in bytes) free in the guest
-#
-# @total_mem: #optional amount of memory (in bytes) visible to the guest
-#
 # Since: 0.14.0
 #
-# Notes: all current versions of QEMU do not fill out optional information in
-#        this structure.
 ##
-{ 'type': 'BalloonInfo',
-  'data': {'actual': 'int', '*mem_swapped_in': 'int',
-           '*mem_swapped_out': 'int', '*major_page_faults': 'int',
-           '*minor_page_faults': 'int', '*free_mem': 'int',
-           '*total_mem': 'int'} }
+{ 'type': 'BalloonInfo', 'data': {'actual': 'int' } }
 
 ##
 # @query-balloon:
@@ -1634,6 +1692,14 @@
 #        (all the disk, only the sectors allocated in the topmost image, or
 #        only new I/O).
 #
+# @granularity: #optional granularity of the dirty bitmap, default is 64K
+#               if the image format doesn't have clusters, 4K if the clusters
+#               are smaller than that, else the cluster size.  Must be a
+#               power of 2 between 512 and 64M (since 1.4).
+#
+# @buf-size: #optional maximum amount of data in flight from source to
+#            target (since 1.4).
+#
 # @on-source-error: #optional the action to take on an error on the source,
 #                   default 'report'.  'stop' and 'enospc' can only be used
 #                   if the block device supports io-status (see BlockInfo).
@@ -1650,7 +1716,8 @@
 { 'command': 'drive-mirror',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int', '*on-source-error': 'BlockdevOnError',
+            '*speed': 'int', '*granularity': 'uint32',
+            '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError' } }
 
 ##
diff --git a/qemu-char.c b/qemu-char.c
index da1db1d..ac5d62d 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -98,6 +98,7 @@
 #include "ui/qemu-spice.h"
 
 #define READ_BUF_LEN 4096
+#define CBUFF_SIZE 65536
 
 /***********************************************************/
 /* character device */
@@ -2643,6 +2644,199 @@ size_t qemu_chr_mem_osize(const CharDriverState *chr)
     return d->outbuf_size;
 }
 
+/*********************************************************/
+/*CircularMemory chardev*/
+
+typedef struct {
+    size_t size;
+    size_t prod;
+    size_t cons;
+    uint8_t *cbuf;
+} CirMemCharDriver;
+
+static bool cirmem_chr_is_empty(const CharDriverState *chr)
+{
+    const CirMemCharDriver *d = chr->opaque;
+
+    return d->cons == d->prod;
+}
+
+static size_t qemu_chr_cirmem_count(const CharDriverState *chr)
+{
+    const CirMemCharDriver *d = chr->opaque;
+
+    return (d->prod - d->cons);
+}
+
+static int cirmem_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
+{
+    CirMemCharDriver *d = chr->opaque;
+    int i;
+
+    if (!buf || (len < 0)) {
+        return -1;
+    }
+
+    for (i = 0; i < len; i++ ) {
+        /* Avoid writing the IAC information to the queue. */
+        if ((unsigned char)buf[i] == IAC) {
+            continue;
+        }
+
+        d->cbuf[d->prod++ % d->size] = buf[i];
+        if ((d->prod - d->cons) > d->size) {
+            d->cons = d->prod - d->size;
+        }
+    }
+
+    return 0;
+}
+
+static int cirmem_chr_read(CharDriverState *chr, uint8_t *buf, int len)
+{
+    CirMemCharDriver *d = chr->opaque;
+    int i;
+
+    for (i = 0; i < len && !cirmem_chr_is_empty(chr); i++) {
+        buf[i] = d->cbuf[d->cons++ % d->size];
+    }
+
+    return i;
+}
+
+static void cirmem_chr_close(struct CharDriverState *chr)
+{
+    CirMemCharDriver *d = chr->opaque;
+
+    g_free(d->cbuf);
+    g_free(d);
+    chr->opaque = NULL;
+}
+
+static CharDriverState *qemu_chr_open_cirmemchr(QemuOpts *opts)
+{
+    CharDriverState *chr;
+    CirMemCharDriver *d;
+
+    chr = g_malloc0(sizeof(CharDriverState));
+    d = g_malloc(sizeof(*d));
+
+    d->size = qemu_opt_get_number(opts, "maxcapacity", 0);
+    if (d->size == 0) {
+        d->size = CBUFF_SIZE;
+    }
+
+    /* The size must be power of 2 */
+    if (d->size & (d->size - 1)) {
+        fprintf(stderr, "chardev: size of memory device must be power of 2\n");
+        goto fail;
+    }
+
+    d->prod = 0;
+    d->cons = 0;
+    d->cbuf = g_malloc0(d->size);
+
+    chr->opaque = d;
+    chr->chr_write = cirmem_chr_write;
+    chr->chr_close = cirmem_chr_close;
+
+    return chr;
+
+fail:
+    g_free(d);
+    g_free(chr);
+    return NULL;
+}
+
+static bool qemu_is_chr(const CharDriverState *chr, const char *filename)
+{
+    return strcmp(chr->filename, filename);
+}
+
+void qmp_memchar_write(const char *device, int64_t size,
+                       const char *data, bool has_format,
+                       enum DataFormat format,
+                       Error **errp)
+{
+    CharDriverState *chr;
+    guchar *write_data;
+    int ret;
+    gsize write_count;
+
+    chr = qemu_chr_find(device);
+    if (!chr) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    if (qemu_is_chr(chr, "memory")) {
+        error_setg(errp,"%s is not memory char device", device);
+        return;
+    }
+
+    write_count = (gsize)size;
+
+    if (has_format && (format == DATA_FORMAT_BASE64)) {
+        write_data = g_base64_decode(data, &write_count);
+    } else {
+        write_data = (uint8_t *)data;
+    }
+
+    ret = cirmem_chr_write(chr, write_data, write_count);
+
+    if (ret < 0) {
+        error_setg(errp, "Failed to write to device %s", device);
+        return;
+    }
+}
+
+MemCharRead *qmp_memchar_read(const char *device, int64_t size,
+                              bool has_format, enum DataFormat format,
+                              Error **errp)
+{
+    CharDriverState *chr;
+    guchar *read_data;
+    MemCharRead *meminfo;
+    size_t count;
+
+    chr = qemu_chr_find(device);
+    if (!chr) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return NULL;
+    }
+
+    if (qemu_is_chr(chr, "memory")) {
+        error_setg(errp,"%s is not memory char device", device);
+        return NULL;
+    }
+
+    if (size <= 0) {
+        error_setg(errp, "size must be greater than zero");
+        return NULL;
+    }
+
+    meminfo = g_malloc0(sizeof(MemCharRead));
+
+    count = qemu_chr_cirmem_count(chr);
+    if (count == 0) {
+        meminfo->data = g_strdup("");
+        return meminfo;
+    }
+
+    size = size > count ? count : size;
+    read_data = g_malloc0(size + 1);
+
+    meminfo->count = cirmem_chr_read(chr, read_data, size);
+
+    if (has_format && (format == DATA_FORMAT_BASE64)) {
+        meminfo->data = g_base64_encode(read_data, (size_t)meminfo->count);
+    } else {
+        meminfo->data = (char *)read_data;
+    }
+
+    return meminfo;
+}
+
 QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
 {
     char host[65], port[33], width[8], height[8];
@@ -2697,6 +2891,11 @@ QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
         qemu_opt_set(opts, "path", filename);
         return opts;
     }
+    if (strstart(filename, "memory", &p)) {
+        qemu_opt_set(opts, "backend", "memory");
+        qemu_opt_set(opts, "maxcapacity", p);
+        return opts;
+    }
     if (strstart(filename, "file:", &p)) {
         qemu_opt_set(opts, "backend", "file");
         qemu_opt_set(opts, "path", p);
@@ -2796,6 +2995,7 @@ static const struct {
     { .name = "udp",       .open = qemu_chr_open_udp },
     { .name = "msmouse",   .open = qemu_chr_open_msmouse },
     { .name = "vc",        .open = text_console_init },
+    { .name = "memory",    .open = qemu_chr_open_cirmemchr },
 #ifdef _WIN32
     { .name = "file",      .open = qemu_chr_open_win_file_out },
     { .name = "pipe",      .open = qemu_chr_open_win_pipe },
@@ -3055,6 +3255,9 @@ QemuOptsList qemu_chardev_opts = {
         },{
             .name = "debug",
             .type = QEMU_OPT_NUMBER,
+        },{
+            .name = "maxcapacity",
+            .type = QEMU_OPT_NUMBER,
         },
         { /* end of list */ }
     },
diff --git a/qemu-options.hx b/qemu-options.hx
index 4e2b499..2d44137 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1736,6 +1736,7 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev,
     "-chardev msmouse,id=id[,mux=on|off]\n"
     "-chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]]\n"
     "         [,mux=on|off]\n"
+    "-chardev memory,id=id,maxcapacity=maxcapacity\n"
     "-chardev file,id=id,path=path[,mux=on|off]\n"
     "-chardev pipe,id=id,path=path[,mux=on|off]\n"
 #ifdef _WIN32
@@ -1777,6 +1778,7 @@ Backend is one of:
 @option{udp},
 @option{msmouse},
 @option{vc},
+@option{memory},
 @option{file},
 @option{pipe},
 @option{console},
@@ -1885,6 +1887,14 @@ the console, in pixels.
 @option{cols} and @option{rows} specify that the console be sized to fit a text
 console with the given dimensions.
 
+@item -chardev memory ,id=@var{id} ,maxcapacity=@var{maxcapacity}
+
+Create a circular buffer with fixed size indicated by optionally @option{maxcapacity}
+which will be default 64K if it is not given.
+
+@option{maxcapacity} specifies the max capacity of the size of circular buffer
+to create. Should be power of 2.
+
 @item -chardev file ,id=@var{id} ,path=@var{path}
 
 Log all traffic received from the guest to a file.
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 0ad73f3..7a0202e 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -611,13 +611,14 @@ int64_t qmp_guest_fsfreeze_thaw(Error **err)
 
 static void guest_fsfreeze_cleanup(void)
 {
-    int64_t ret;
     Error *err = NULL;
 
     if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
-        ret = qmp_guest_fsfreeze_thaw(&err);
-        if (ret < 0 || err) {
-            slog("failed to clean up frozen filesystems");
+        qmp_guest_fsfreeze_thaw(&err);
+        if (err) {
+            slog("failed to clean up frozen filesystems: %s",
+                 error_get_pretty(err));
+            error_free(err);
         }
     }
 }
@@ -934,9 +935,11 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
                 error_setg_errno(errp, errno,
                                  "failed to get MAC address of %s",
                                  ifa->ifa_name);
+                close(sock);
                 goto error;
             }
 
+            close(sock);
             mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
 
             info->value->hardware_address =
@@ -946,20 +949,19 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
                                 (int) mac_addr[4], (int) mac_addr[5]);
 
             info->value->has_hardware_address = true;
-            close(sock);
         }
 
         if (ifa->ifa_addr &&
             ifa->ifa_addr->sa_family == AF_INET) {
             /* interface with IPv4 address */
-            address_item = g_malloc0(sizeof(*address_item));
-            address_item->value = g_malloc0(sizeof(*address_item->value));
             p = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
             if (!inet_ntop(AF_INET, p, addr4, sizeof(addr4))) {
                 error_setg_errno(errp, errno, "inet_ntop failed");
                 goto error;
             }
 
+            address_item = g_malloc0(sizeof(*address_item));
+            address_item->value = g_malloc0(sizeof(*address_item->value));
             address_item->value->ip_address = g_strdup(addr4);
             address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV4;
 
@@ -972,14 +974,14 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
         } else if (ifa->ifa_addr &&
                    ifa->ifa_addr->sa_family == AF_INET6) {
             /* interface with IPv6 address */
-            address_item = g_malloc0(sizeof(*address_item));
-            address_item->value = g_malloc0(sizeof(*address_item->value));
             p = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
             if (!inet_ntop(AF_INET6, p, addr6, sizeof(addr6))) {
                 error_setg_errno(errp, errno, "inet_ntop failed");
                 goto error;
             }
 
+            address_item = g_malloc0(sizeof(*address_item));
+            address_item->value = g_malloc0(sizeof(*address_item->value));
             address_item->value->ip_address = g_strdup(addr6);
             address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV6;
 
diff --git a/qmp-commands.hx b/qmp-commands.hx
index cbf1280..f58a841 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -466,6 +466,72 @@ Note: inject-nmi fails when the guest doesn't support injecting.
 EQMP
 
     {
+        .name       = "memchar-write",
+        .args_type  = "device:s,size:i,data:s,format:s?",
+        .mhandler.cmd_new = qmp_marshal_input_memchar_write,
+    },
+
+SQMP
+memchar-write
+-------------
+
+Provide writing interface for CirMemCharDriver. Write data to memory
+char device.
+
+Arguments:
+
+- "device": the name of the char device, must be unique (json-string)
+- "size": the memory size, in bytes, should be power of 2 (json-int)
+- "data": the source data write to memory (json-string)
+- "format": the data format write to memory, default is
+            utf8. (json-string, optional)
+          - Possible values: "utf8", "base64"
+
+Example:
+
+-> { "execute": "memchar-write",
+                "arguments": { "device": foo,
+                               "size": 8,
+                               "data": "abcdefgh",
+                               "format": "utf8" } }
+<- { "return": {} }
+
+EQMP
+
+    {
+        .name       = "memchar-read",
+        .args_type  = "device:s,size:i,format:s?",
+        .mhandler.cmd_new = qmp_marshal_input_memchar_read,
+    },
+
+SQMP
+memchar-read
+-------------
+
+Provide read interface for CirMemCharDriver. Read from the char
+device memory and return the data with size.
+
+Arguments:
+
+- "device": the name of the char device, must be unique (json-string)
+- "size": the memory size wanted to read in bytes (refer to unencoded
+          size of the raw data), would adjust to the init size of the
+          memchar if the requested size is larger than it. (json-int)
+- "format": the data format write to memchardev, default is
+            utf8. (json-string, optional)
+          - Possible values: "utf8", "base64"
+
+Example:
+
+-> { "execute": "memchar-read",
+                "arguments": { "device": foo,
+                               "size": 1000,
+                               "format": "utf8" } }
+<- { "return": { "data": "data string...", "count": 1000 } }
+
+EQMP
+
+    {
         .name       = "xen-save-devices-state",
         .args_type  = "filename:F",
     .mhandler.cmd_new = qmp_marshal_input_xen_save_devices_state,
@@ -938,7 +1004,8 @@ EQMP
     {
         .name       = "drive-mirror",
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
-                      "on-source-error:s?,on-target-error:s?",
+                      "on-source-error:s?,on-target-error:s?,"
+                      "granularity:i?,buf-size:i?",
         .mhandler.cmd_new = qmp_marshal_input_drive_mirror,
     },
 
@@ -962,6 +1029,9 @@ Arguments:
   file/device (NewImageMode, optional, default 'absolute-paths')
 - "speed": maximum speed of the streaming job, in bytes per second
   (json-int)
+- "granularity": granularity of the dirty bitmap, in bytes (json-int, optional)
+- "buf_size": maximum amount of data in flight from source to target, in bytes
+  (json-int, default 10M)
 - "sync": what parts of the disk image should be copied to the destination;
   possibilities include "full" for all the disk, "top" for only the sectors
   allocated in the topmost image, or "none" to only replicate new I/O
@@ -971,6 +1041,10 @@ Arguments:
 - "on-target-error": the action to take on an error on the target
   (BlockdevOnError, default 'report')
 
+The default value of the granularity is the image cluster size clamped
+between 4096 and 65536, if the image format defines one.  If the format
+does not define a cluster size, the default value of the granularity
+is 65536.
 
 
 Example:
@@ -2549,13 +2623,6 @@ Make an asynchronous request for balloon info. When the request completes a
 json-object will be returned containing the following data:
 
 - "actual": current balloon value in bytes (json-int)
-- "mem_swapped_in": Amount of memory swapped in bytes (json-int, optional)
-- "mem_swapped_out": Amount of memory swapped out in bytes (json-int, optional)
-- "major_page_faults": Number of major faults (json-int, optional)
-- "minor_page_faults": Number of minor faults (json-int, optional)
-- "free_mem": Total amount of free and unused memory in
-              bytes (json-int, optional)
-- "total_mem": Total amount of available memory in bytes (json-int, optional)
 
 Example:
 
@@ -2563,12 +2630,6 @@ Example:
 <- {
       "return":{
          "actual":1073741824,
-         "mem_swapped_in":0,
-         "mem_swapped_out":0,
-         "major_page_faults":142,
-         "minor_page_faults":239245,
-         "free_mem":1014185984,
-         "total_mem":1044668416
       }
    }
 
diff --git a/qom/cpu.c b/qom/cpu.c
index 49e5134..8fb538b 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -34,11 +34,24 @@ static void cpu_common_reset(CPUState *cpu)
 {
 }
 
+ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model)
+{
+    CPUClass *cc = CPU_CLASS(object_class_by_name(typename));
+
+    return cc->class_by_name(cpu_model);
+}
+
+static ObjectClass *cpu_common_class_by_name(const char *cpu_model)
+{
+    return NULL;
+}
+
 static void cpu_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     CPUClass *k = CPU_CLASS(klass);
 
+    k->class_by_name = cpu_common_class_by_name;
     k->reset = cpu_common_reset;
     dc->no_user = 1;
 }
diff --git a/qom/object.c b/qom/object.c
index 03e6f24..e200282 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -501,6 +501,11 @@ ObjectClass *object_get_class(Object *obj)
     return obj->class;
 }
 
+bool object_class_is_abstract(ObjectClass *klass)
+{
+    return klass->type->abstract;
+}
+
 const char *object_class_get_name(ObjectClass *klass)
 {
     return klass->type->name;
diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
index cbe6440..0b23f77 100755
--- a/scripts/kvm/vmxcap
+++ b/scripts/kvm/vmxcap
@@ -147,6 +147,7 @@ controls = [
             5: 'Enable VPID',
             6: 'WBINVD exiting',
             7: 'Unrestricted guest',
+            9: 'Virtual interrupt delivery',
             10: 'PAUSE-loop exiting',
             11: 'RDRAND exiting',
             12: 'Enable INVPCID',
diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index 40e9809..0ad69f0 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -96,14 +96,15 @@ static ObjectClass *alpha_cpu_class_by_name(const char *cpu_model)
     }
 
     oc = object_class_by_name(cpu_model);
-    if (oc != NULL) {
+    if (oc != NULL && object_class_dynamic_cast(oc, TYPE_ALPHA_CPU) != NULL &&
+        !object_class_is_abstract(oc)) {
         return oc;
     }
 
     for (i = 0; i < ARRAY_SIZE(alpha_cpu_aliases); i++) {
         if (strcmp(cpu_model, alpha_cpu_aliases[i].alias) == 0) {
             oc = object_class_by_name(alpha_cpu_aliases[i].typename);
-            assert(oc != NULL);
+            assert(oc != NULL && !object_class_is_abstract(oc));
             return oc;
         }
     }
@@ -111,6 +112,9 @@ static ObjectClass *alpha_cpu_class_by_name(const char *cpu_model)
     typename = g_strdup_printf("%s-" TYPE_ALPHA_CPU, cpu_model);
     oc = object_class_by_name(typename);
     g_free(typename);
+    if (oc != NULL && object_class_is_abstract(oc)) {
+        oc = NULL;
+    }
     return oc;
 }
 
@@ -244,6 +248,13 @@ static void alpha_cpu_initfn(Object *obj)
     env->fen = 1;
 }
 
+static void alpha_cpu_class_init(ObjectClass *oc, void *data)
+{
+    CPUClass *cc = CPU_CLASS(oc);
+
+    cc->class_by_name = alpha_cpu_class_by_name;
+}
+
 static const TypeInfo alpha_cpu_type_info = {
     .name = TYPE_ALPHA_CPU,
     .parent = TYPE_CPU,
@@ -251,6 +262,7 @@ static const TypeInfo alpha_cpu_type_info = {
     .instance_init = alpha_cpu_initfn,
     .abstract = true,
     .class_size = sizeof(AlphaCPUClass),
+    .class_init = alpha_cpu_class_init,
 };
 
 static void alpha_cpu_register_types(void)
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 07588a1..d1a4c82 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -201,6 +201,22 @@ void arm_cpu_realize(ARMCPU *cpu)
 
 /* CPU models */
 
+static ObjectClass *arm_cpu_class_by_name(const char *cpu_model)
+{
+    ObjectClass *oc;
+
+    if (!cpu_model) {
+        return NULL;
+    }
+
+    oc = object_class_by_name(cpu_model);
+    if (!oc || !object_class_dynamic_cast(oc, TYPE_ARM_CPU) ||
+        object_class_is_abstract(oc)) {
+        return NULL;
+    }
+    return oc;
+}
+
 static void arm926_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
@@ -766,6 +782,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
 
     acc->parent_reset = cc->reset;
     cc->reset = arm_cpu_reset;
+
+    cc->class_by_name = arm_cpu_class_by_name;
 }
 
 static void cpu_register(const ARMCPUInfo *info)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 37c34a1..7a10fdd 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1262,12 +1262,14 @@ ARMCPU *cpu_arm_init(const char *cpu_model)
 {
     ARMCPU *cpu;
     CPUARMState *env;
+    ObjectClass *oc;
     static int inited = 0;
 
-    if (!object_class_by_name(cpu_model)) {
+    oc = cpu_class_by_name(TYPE_ARM_CPU, cpu_model);
+    if (!oc) {
         return NULL;
     }
-    cpu = ARM_CPU(object_new(cpu_model));
+    cpu = ARM_CPU(object_new(object_class_get_name(oc)));
     env = &cpu->env;
     env->cpu_model_str = cpu_model;
     arm_cpu_realize(cpu);
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 376d4c8..5c108e1 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -23,6 +23,8 @@
 
 #include "cpu.h"
 #include "sysemu/kvm.h"
+#include "sysemu/cpus.h"
+#include "topology.h"
 
 #include "qemu/option.h"
 #include "qemu/config-file.h"
@@ -45,6 +47,18 @@
 #include "hw/apic_internal.h"
 #endif
 
+static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
+                                     uint32_t vendor2, uint32_t vendor3)
+{
+    int i;
+    for (i = 0; i < 4; i++) {
+        dst[i] = vendor1 >> (8 * i);
+        dst[i + 4] = vendor2 >> (8 * i);
+        dst[i + 8] = vendor3 >> (8 * i);
+    }
+    dst[CPUID_VENDOR_SZ] = '\0';
+}
+
 /* feature flags taken from "Intel Processor Identification and the CPUID
  * Instruction" and AMD's "CPUID Specification".  In cases of disagreement
  * between feature naming conventions, aliases may be added.
@@ -206,22 +220,17 @@ typedef struct model_features_t {
 int check_cpuid = 0;
 int enforce_cpuid = 0;
 
-#if defined(CONFIG_KVM)
 static uint32_t kvm_default_features = (1 << KVM_FEATURE_CLOCKSOURCE) |
         (1 << KVM_FEATURE_NOP_IO_DELAY) |
         (1 << KVM_FEATURE_CLOCKSOURCE2) |
         (1 << KVM_FEATURE_ASYNC_PF) |
         (1 << KVM_FEATURE_STEAL_TIME) |
+        (1 << KVM_FEATURE_PV_EOI) |
         (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
-static const uint32_t kvm_pv_eoi_features = (0x1 << KVM_FEATURE_PV_EOI);
-#else
-static uint32_t kvm_default_features = 0;
-static const uint32_t kvm_pv_eoi_features = 0;
-#endif
 
-void enable_kvm_pv_eoi(void)
+void disable_kvm_pv_eoi(void)
 {
-    kvm_default_features |= kvm_pv_eoi_features;
+    kvm_default_features &= ~(1UL << KVM_FEATURE_PV_EOI);
 }
 
 void host_cpuid(uint32_t function, uint32_t count,
@@ -338,19 +347,17 @@ static void add_flagname_to_bitmaps(const char *flagname,
 }
 
 typedef struct x86_def_t {
-    struct x86_def_t *next;
     const char *name;
     uint32_t level;
-    uint32_t vendor1, vendor2, vendor3;
+    /* vendor is zero-terminated, 12 character ASCII string */
+    char vendor[CPUID_VENDOR_SZ + 1];
     int family;
     int model;
     int stepping;
-    int tsc_khz;
     uint32_t features, ext_features, ext2_features, ext3_features;
     uint32_t kvm_features, svm_features;
     uint32_t xlevel;
     char model_id[48];
-    int vendor_override;
     /* Store the results of Centaur's CPUID instructions */
     uint32_t ext4_features;
     uint32_t xlevel2;
@@ -396,19 +403,13 @@ typedef struct x86_def_t {
 #define TCG_SVM_FEATURES 0
 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP)
 
-/* maintains list of cpu model definitions
- */
-static x86_def_t *x86_defs = {NULL};
-
-/* built-in cpu model definitions (deprecated)
+/* built-in CPU model definitions
  */
 static x86_def_t builtin_x86_defs[] = {
     {
         .name = "qemu64",
         .level = 4,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 6,
         .model = 2,
         .stepping = 3,
@@ -425,9 +426,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "phenom",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 16,
         .model = 2,
         .stepping = 3,
@@ -453,9 +452,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "core2duo",
         .level = 10,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 15,
         .stepping = 11,
@@ -474,9 +471,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "kvm64",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 15,
         .model = 6,
         .stepping = 1,
@@ -500,9 +495,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "qemu32",
         .level = 4,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 3,
         .stepping = 3,
@@ -513,9 +506,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "kvm32",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 15,
         .model = 6,
         .stepping = 1,
@@ -530,9 +521,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "coreduo",
         .level = 10,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 14,
         .stepping = 8,
@@ -548,9 +537,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "486",
         .level = 1,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 4,
         .model = 0,
         .stepping = 0,
@@ -560,9 +547,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "pentium",
         .level = 1,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 5,
         .model = 4,
         .stepping = 3,
@@ -572,9 +557,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "pentium2",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 5,
         .stepping = 2,
@@ -584,9 +567,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "pentium3",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 7,
         .stepping = 3,
@@ -596,9 +577,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "athlon",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 6,
         .model = 2,
         .stepping = 3,
@@ -612,9 +591,7 @@ static x86_def_t builtin_x86_defs[] = {
         .name = "n270",
         /* original is on level 10 */
         .level = 5,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 28,
         .stepping = 2,
@@ -633,9 +610,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Conroe",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 2,
         .stepping = 3,
@@ -653,9 +628,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Penryn",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 2,
         .stepping = 3,
@@ -674,9 +647,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Nehalem",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 2,
         .stepping = 3,
@@ -695,9 +666,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Westmere",
         .level = 11,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 44,
         .stepping = 1,
@@ -717,9 +686,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "SandyBridge",
         .level = 0xd,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 42,
         .stepping = 1,
@@ -742,9 +709,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Haswell",
         .level = 0xd,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 60,
         .stepping = 1,
@@ -772,9 +737,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Opteron_G1",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 15,
         .model = 6,
         .stepping = 1,
@@ -796,9 +759,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Opteron_G2",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 15,
         .model = 6,
         .stepping = 1,
@@ -822,9 +783,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Opteron_G3",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 15,
         .model = 6,
         .stepping = 1,
@@ -850,9 +809,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Opteron_G4",
         .level = 0xd,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 21,
         .model = 1,
         .stepping = 2,
@@ -882,9 +839,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Opteron_G5",
         .level = 0xd,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 21,
         .model = 2,
         .stepping = 0,
@@ -945,9 +900,7 @@ static void kvm_cpu_fill_host(x86_def_t *x86_cpu_def)
 
     x86_cpu_def->name = "host";
     host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
-    x86_cpu_def->vendor1 = ebx;
-    x86_cpu_def->vendor2 = edx;
-    x86_cpu_def->vendor3 = ecx;
+    x86_cpu_vendor_words2str(x86_cpu_def->vendor, ebx, edx, ecx);
 
     host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
     x86_cpu_def->family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
@@ -972,12 +925,9 @@ static void kvm_cpu_fill_host(x86_def_t *x86_cpu_def)
                 kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_ECX);
 
     cpu_x86_fill_model_id(x86_cpu_def->model_id);
-    x86_cpu_def->vendor_override = 0;
 
     /* Call Centaur's CPUID instruction. */
-    if (x86_cpu_def->vendor1 == CPUID_VENDOR_VIA_1 &&
-        x86_cpu_def->vendor2 == CPUID_VENDOR_VIA_2 &&
-        x86_cpu_def->vendor3 == CPUID_VENDOR_VIA_3) {
+    if (!strcmp(x86_cpu_def->vendor, CPUID_VENDOR_VIA)) {
         host_cpuid(0xC0000000, 0, &eax, &ebx, &ecx, &edx);
         eax = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);
         if (eax >= 0xC0000001) {
@@ -1213,15 +1163,10 @@ static char *x86_cpuid_get_vendor(Object *obj, Error **errp)
     X86CPU *cpu = X86_CPU(obj);
     CPUX86State *env = &cpu->env;
     char *value;
-    int i;
 
     value = (char *)g_malloc(CPUID_VENDOR_SZ + 1);
-    for (i = 0; i < 4; i++) {
-        value[i    ] = env->cpuid_vendor1 >> (8 * i);
-        value[i + 4] = env->cpuid_vendor2 >> (8 * i);
-        value[i + 8] = env->cpuid_vendor3 >> (8 * i);
-    }
-    value[CPUID_VENDOR_SZ] = '\0';
+    x86_cpu_vendor_words2str(value, env->cpuid_vendor1, env->cpuid_vendor2,
+                             env->cpuid_vendor3);
     return value;
 }
 
@@ -1246,7 +1191,6 @@ static void x86_cpuid_set_vendor(Object *obj, const char *value,
         env->cpuid_vendor2 |= ((uint8_t)value[i + 4]) << (8 * i);
         env->cpuid_vendor3 |= ((uint8_t)value[i + 8]) << (8 * i);
     }
-    env->cpuid_vendor_override = 1;
 }
 
 static char *x86_cpuid_get_model_id(Object *obj, Error **errp)
@@ -1320,34 +1264,50 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor *v, void *opaque,
 static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *name)
 {
     x86_def_t *def;
+    int i;
 
-    for (def = x86_defs; def; def = def->next) {
-        if (name && !strcmp(name, def->name)) {
-            break;
-        }
+    if (name == NULL) {
+        return -1;
     }
-    if (kvm_enabled() && name && strcmp(name, "host") == 0) {
+    if (kvm_enabled() && strcmp(name, "host") == 0) {
         kvm_cpu_fill_host(x86_cpu_def);
-    } else if (!def) {
-        return -1;
-    } else {
-        memcpy(x86_cpu_def, def, sizeof(*def));
+        return 0;
     }
 
-    return 0;
+    for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
+        def = &builtin_x86_defs[i];
+        if (strcmp(name, def->name) == 0) {
+            memcpy(x86_cpu_def, def, sizeof(*def));
+            /* sysenter isn't supported in compatibility mode on AMD,
+             * syscall isn't supported in compatibility mode on Intel.
+             * Normally we advertise the actual CPU vendor, but you can
+             * override this using the 'vendor' property if you want to use
+             * KVM's sysenter/syscall emulation in compatibility mode and
+             * when doing cross vendor migration
+             */
+            if (kvm_enabled()) {
+                uint32_t  ebx = 0, ecx = 0, edx = 0;
+                host_cpuid(0, 0, NULL, &ebx, &ecx, &edx);
+                x86_cpu_vendor_words2str(x86_cpu_def->vendor, ebx, edx, ecx);
+            }
+            return 0;
+        }
+    }
+
+    return -1;
 }
 
 /* Parse "+feature,-feature,feature=foo" CPU feature string
  */
-static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
+static void cpu_x86_parse_featurestr(X86CPU *cpu, char *features, Error **errp)
 {
-    unsigned int i;
     char *featurestr; /* Single 'key=value" string being parsed */
     /* Features to be added */
     FeatureWordArray plus_features = { 0 };
     /* Features to be removed */
     FeatureWordArray minus_features = { 0 };
     uint32_t numvalue;
+    CPUX86State *env = &cpu->env;
 
     featurestr = features ? strtok(features, ",") : NULL;
 
@@ -1360,87 +1320,57 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
         } else if ((val = strchr(featurestr, '='))) {
             *val = 0; val++;
             if (!strcmp(featurestr, "family")) {
-                char *err;
-                numvalue = strtoul(val, &err, 0);
-                if (!*val || *err || numvalue > 0xff + 0xf) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
-                }
-                x86_cpu_def->family = numvalue;
+                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             } else if (!strcmp(featurestr, "model")) {
-                char *err;
-                numvalue = strtoul(val, &err, 0);
-                if (!*val || *err || numvalue > 0xff) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
-                }
-                x86_cpu_def->model = numvalue;
+                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             } else if (!strcmp(featurestr, "stepping")) {
-                char *err;
-                numvalue = strtoul(val, &err, 0);
-                if (!*val || *err || numvalue > 0xf) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
-                }
-                x86_cpu_def->stepping = numvalue ;
+                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             } else if (!strcmp(featurestr, "level")) {
-                char *err;
-                numvalue = strtoul(val, &err, 0);
-                if (!*val || *err) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
-                }
-                x86_cpu_def->level = numvalue;
+                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             } else if (!strcmp(featurestr, "xlevel")) {
                 char *err;
+                char num[32];
+
                 numvalue = strtoul(val, &err, 0);
                 if (!*val || *err) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
+                    error_setg(errp, "bad numerical value %s\n", val);
+                    goto out;
                 }
                 if (numvalue < 0x80000000) {
+                    fprintf(stderr, "xlevel value shall always be >= 0x80000000"
+                            ", fixup will be removed in future versions\n");
                     numvalue += 0x80000000;
                 }
-                x86_cpu_def->xlevel = numvalue;
+                snprintf(num, sizeof(num), "%" PRIu32, numvalue);
+                object_property_parse(OBJECT(cpu), num, featurestr, errp);
             } else if (!strcmp(featurestr, "vendor")) {
-                if (strlen(val) != 12) {
-                    fprintf(stderr, "vendor string must be 12 chars long\n");
-                    goto error;
-                }
-                x86_cpu_def->vendor1 = 0;
-                x86_cpu_def->vendor2 = 0;
-                x86_cpu_def->vendor3 = 0;
-                for(i = 0; i < 4; i++) {
-                    x86_cpu_def->vendor1 |= ((uint8_t)val[i    ]) << (8 * i);
-                    x86_cpu_def->vendor2 |= ((uint8_t)val[i + 4]) << (8 * i);
-                    x86_cpu_def->vendor3 |= ((uint8_t)val[i + 8]) << (8 * i);
-                }
-                x86_cpu_def->vendor_override = 1;
+                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             } else if (!strcmp(featurestr, "model_id")) {
-                pstrcpy(x86_cpu_def->model_id, sizeof(x86_cpu_def->model_id),
-                        val);
+                object_property_parse(OBJECT(cpu), val, "model-id", errp);
             } else if (!strcmp(featurestr, "tsc_freq")) {
                 int64_t tsc_freq;
                 char *err;
+                char num[32];
 
                 tsc_freq = strtosz_suffix_unit(val, &err,
                                                STRTOSZ_DEFSUFFIX_B, 1000);
                 if (tsc_freq < 0 || *err) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
+                    error_setg(errp, "bad numerical value %s\n", val);
+                    goto out;
                 }
-                x86_cpu_def->tsc_khz = tsc_freq / 1000;
+                snprintf(num, sizeof(num), "%" PRId64, tsc_freq);
+                object_property_parse(OBJECT(cpu), num, "tsc-frequency", errp);
             } else if (!strcmp(featurestr, "hv_spinlocks")) {
                 char *err;
                 numvalue = strtoul(val, &err, 0);
                 if (!*val || *err) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
+                    error_setg(errp, "bad numerical value %s\n", val);
+                    goto out;
                 }
                 hyperv_set_spinlock_retries(numvalue);
             } else {
-                fprintf(stderr, "unrecognized feature %s\n", featurestr);
-                goto error;
+                error_setg(errp, "unrecognized feature %s\n", featurestr);
+                goto out;
             }
         } else if (!strcmp(featurestr, "check")) {
             check_cpuid = 1;
@@ -1451,31 +1381,34 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
         } else if (!strcmp(featurestr, "hv_vapic")) {
             hyperv_enable_vapic_recommended(true);
         } else {
-            fprintf(stderr, "feature string `%s' not in format (+feature|-feature|feature=xyz)\n", featurestr);
-            goto error;
+            error_setg(errp, "feature string `%s' not in format (+feature|"
+                       "-feature|feature=xyz)\n", featurestr);
+            goto out;
+        }
+        if (error_is_set(errp)) {
+            goto out;
         }
         featurestr = strtok(NULL, ",");
     }
-    x86_cpu_def->features |= plus_features[FEAT_1_EDX];
-    x86_cpu_def->ext_features |= plus_features[FEAT_1_ECX];
-    x86_cpu_def->ext2_features |= plus_features[FEAT_8000_0001_EDX];
-    x86_cpu_def->ext3_features |= plus_features[FEAT_8000_0001_ECX];
-    x86_cpu_def->ext4_features |= plus_features[FEAT_C000_0001_EDX];
-    x86_cpu_def->kvm_features |= plus_features[FEAT_KVM];
-    x86_cpu_def->svm_features |= plus_features[FEAT_SVM];
-    x86_cpu_def->cpuid_7_0_ebx_features |= plus_features[FEAT_7_0_EBX];
-    x86_cpu_def->features &= ~minus_features[FEAT_1_EDX];
-    x86_cpu_def->ext_features &= ~minus_features[FEAT_1_ECX];
-    x86_cpu_def->ext2_features &= ~minus_features[FEAT_8000_0001_EDX];
-    x86_cpu_def->ext3_features &= ~minus_features[FEAT_8000_0001_ECX];
-    x86_cpu_def->ext4_features &= ~minus_features[FEAT_C000_0001_EDX];
-    x86_cpu_def->kvm_features &= ~minus_features[FEAT_KVM];
-    x86_cpu_def->svm_features &= ~minus_features[FEAT_SVM];
-    x86_cpu_def->cpuid_7_0_ebx_features &= ~minus_features[FEAT_7_0_EBX];
-    return 0;
+    env->cpuid_features |= plus_features[FEAT_1_EDX];
+    env->cpuid_ext_features |= plus_features[FEAT_1_ECX];
+    env->cpuid_ext2_features |= plus_features[FEAT_8000_0001_EDX];
+    env->cpuid_ext3_features |= plus_features[FEAT_8000_0001_ECX];
+    env->cpuid_ext4_features |= plus_features[FEAT_C000_0001_EDX];
+    env->cpuid_kvm_features |= plus_features[FEAT_KVM];
+    env->cpuid_svm_features |= plus_features[FEAT_SVM];
+    env->cpuid_7_0_ebx_features |= plus_features[FEAT_7_0_EBX];
+    env->cpuid_features &= ~minus_features[FEAT_1_EDX];
+    env->cpuid_ext_features &= ~minus_features[FEAT_1_ECX];
+    env->cpuid_ext2_features &= ~minus_features[FEAT_8000_0001_EDX];
+    env->cpuid_ext3_features &= ~minus_features[FEAT_8000_0001_ECX];
+    env->cpuid_ext4_features &= ~minus_features[FEAT_C000_0001_EDX];
+    env->cpuid_kvm_features &= ~minus_features[FEAT_KVM];
+    env->cpuid_svm_features &= ~minus_features[FEAT_SVM];
+    env->cpuid_7_0_ebx_features &= ~minus_features[FEAT_7_0_EBX];
 
-error:
-    return -1;
+out:
+    return;
 }
 
 /* generate a composite string into buf of all cpuid names in featureset
@@ -1513,8 +1446,10 @@ void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf)
 {
     x86_def_t *def;
     char buf[256];
+    int i;
 
-    for (def = x86_defs; def; def = def->next) {
+    for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
+        def = &builtin_x86_defs[i];
         snprintf(buf, sizeof(buf), "%s", def->name);
         (*cpu_fprintf)(f, "x86 %16s  %-48s\n", buf, def->model_id);
     }
@@ -1536,11 +1471,13 @@ CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
 {
     CpuDefinitionInfoList *cpu_list = NULL;
     x86_def_t *def;
+    int i;
 
-    for (def = x86_defs; def; def = def->next) {
+    for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
         CpuDefinitionInfoList *entry;
         CpuDefinitionInfo *info;
 
+        def = &builtin_x86_defs[i];
         info = g_malloc0(sizeof(*info));
         info->name = g_strdup(def->name);
 
@@ -1602,18 +1539,12 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
         goto out;
     }
 
-    def->kvm_features |= kvm_default_features;
+    if (kvm_enabled()) {
+        def->kvm_features |= kvm_default_features;
+    }
     def->ext_features |= CPUID_EXT_HYPERVISOR;
 
-    if (cpu_x86_parse_featurestr(def, features) < 0) {
-        error_setg(&error, "Invalid cpu_model string format: %s", cpu_model);
-        goto out;
-    }
-    assert(def->vendor1);
-    env->cpuid_vendor1 = def->vendor1;
-    env->cpuid_vendor2 = def->vendor2;
-    env->cpuid_vendor3 = def->vendor3;
-    env->cpuid_vendor_override = def->vendor_override;
+    object_property_set_str(OBJECT(cpu), def->vendor, "vendor", &error);
     object_property_set_int(OBJECT(cpu), def->level, "level", &error);
     object_property_set_int(OBJECT(cpu), def->family, "family", &error);
     object_property_set_int(OBJECT(cpu), def->model, "model", &error);
@@ -1628,11 +1559,13 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
     env->cpuid_ext4_features = def->ext4_features;
     env->cpuid_7_0_ebx_features = def->cpuid_7_0_ebx_features;
     env->cpuid_xlevel2 = def->xlevel2;
-    object_property_set_int(OBJECT(cpu), (int64_t)def->tsc_khz * 1000,
-                            "tsc-frequency", &error);
 
     object_property_set_str(OBJECT(cpu), def->model_id, "model-id", &error);
+    if (error) {
+        goto out;
+    }
 
+    cpu_x86_parse_featurestr(cpu, features, &error);
 out:
     g_strfreev(model_pieces);
     if (error) {
@@ -1661,7 +1594,6 @@ void x86_cpudef_setup(void)
 
     for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); ++i) {
         x86_def_t *def = &builtin_x86_defs[i];
-        def->next = x86_defs;
 
         /* Look for specific "cpudef" models that */
         /* have the QEMU version in .model_id */
@@ -1674,8 +1606,6 @@ void x86_cpudef_setup(void)
                 break;
             }
         }
-
-        x86_defs = def;
     }
 }
 
@@ -1685,16 +1615,6 @@ static void get_cpuid_vendor(CPUX86State *env, uint32_t *ebx,
     *ebx = env->cpuid_vendor1;
     *edx = env->cpuid_vendor2;
     *ecx = env->cpuid_vendor3;
-
-    /* sysenter isn't supported on compatibility mode on AMD, syscall
-     * isn't supported in compatibility mode on Intel.
-     * Normally we advertise the actual cpu vendor, but you can override
-     * this if you want to use KVM's sysenter/syscall emulation
-     * in compatibility mode and when doing cross vendor migration
-     */
-    if (kvm_enabled() && ! env->cpuid_vendor_override) {
-        host_cpuid(0, 0, NULL, ebx, ecx, edx);
-    }
 }
 
 void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
@@ -2197,6 +2117,39 @@ void x86_cpu_realize(Object *obj, Error **errp)
     cpu_reset(CPU(cpu));
 }
 
+/* Enables contiguous-apic-ID mode, for compatibility */
+static bool compat_apic_id_mode;
+
+void enable_compat_apic_id_mode(void)
+{
+    compat_apic_id_mode = true;
+}
+
+/* Calculates initial APIC ID for a specific CPU index
+ *
+ * Currently we need to be able to calculate the APIC ID from the CPU index
+ * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have
+ * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of
+ * all CPUs up to max_cpus.
+ */
+uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index)
+{
+    uint32_t correct_id;
+    static bool warned;
+
+    correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index);
+    if (compat_apic_id_mode) {
+        if (cpu_index != correct_id && !warned) {
+            error_report("APIC IDs set in compatibility mode, "
+                         "CPU topology won't match the configuration");
+            warned = true;
+        }
+        return cpu_index;
+    } else {
+        return correct_id;
+    }
+}
+
 static void x86_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -2231,7 +2184,7 @@ static void x86_cpu_initfn(Object *obj)
                         x86_cpuid_get_tsc_freq,
                         x86_cpuid_set_tsc_freq, NULL, NULL, NULL);
 
-    env->cpuid_apic_id = cs->cpu_index;
+    env->cpuid_apic_id = x86_cpu_apic_id_from_index(cs->cpu_index);
 
     /* init various static tables used in TCG mode */
     if (tcg_enabled() && !inited) {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 4e091cd..62508dc 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -537,14 +537,14 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
 #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
 #define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
+#define CPUID_VENDOR_INTEL "GenuineIntel"
 
 #define CPUID_VENDOR_AMD_1   0x68747541 /* "Auth" */
 #define CPUID_VENDOR_AMD_2   0x69746e65 /* "enti" */
 #define CPUID_VENDOR_AMD_3   0x444d4163 /* "cAMD" */
+#define CPUID_VENDOR_AMD   "AuthenticAMD"
 
-#define CPUID_VENDOR_VIA_1   0x746e6543 /* "Cent" */
-#define CPUID_VENDOR_VIA_2   0x48727561 /* "aurH" */
-#define CPUID_VENDOR_VIA_3   0x736c7561 /* "auls" */
+#define CPUID_VENDOR_VIA   "CentaurHauls"
 
 #define CPUID_MWAIT_IBE     (1 << 1) /* Interrupts can exit capability */
 #define CPUID_MWAIT_EMX     (1 << 0) /* enumeration supported */
@@ -835,7 +835,6 @@ typedef struct CPUX86State {
     uint32_t cpuid_ext2_features;
     uint32_t cpuid_ext3_features;
     uint32_t cpuid_apic_id;
-    int cpuid_vendor_override;
     /* Store the results of Centaur's CPUID instructions */
     uint32_t cpuid_xlevel2;
     uint32_t cpuid_ext4_features;
@@ -1250,9 +1249,12 @@ void do_smm_enter(CPUX86State *env1);
 
 void cpu_report_tpr_access(CPUX86State *env, TPRAccess access);
 
-void enable_kvm_pv_eoi(void);
+void disable_kvm_pv_eoi(void);
 
 /* Return name of 32-bit register, from a R_* constant */
 const char *get_register_name_32(unsigned int reg);
 
+uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index);
+void enable_compat_apic_id_mode(void);
+
 #endif /* CPU_I386_H */
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 3acff40..9ebf181 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -411,11 +411,19 @@ static void cpu_update_state(void *opaque, int running, RunState state)
     }
 }
 
+unsigned long kvm_arch_vcpu_id(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    return cpu->env.cpuid_apic_id;
+}
+
+#define KVM_MAX_CPUID_ENTRIES  100
+
 int kvm_arch_init_vcpu(CPUState *cs)
 {
     struct {
         struct kvm_cpuid2 cpuid;
-        struct kvm_cpuid_entry2 entries[100];
+        struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
     } QEMU_PACKED cpuid_data;
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
@@ -502,6 +510,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
 
     for (i = 0; i <= limit; i++) {
+        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+            fprintf(stderr, "unsupported level value: 0x%x\n", limit);
+            abort();
+        }
         c = &cpuid_data.entries[cpuid_i++];
 
         switch (i) {
@@ -516,6 +528,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
             times = c->eax & 0xff;
 
             for (j = 1; j < times; ++j) {
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                            "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
+                    abort();
+                }
                 c = &cpuid_data.entries[cpuid_i++];
                 c->function = i;
                 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -544,6 +561,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
                 if (i == 0xd && c->eax == 0) {
                     continue;
                 }
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+                    abort();
+                }
                 c = &cpuid_data.entries[cpuid_i++];
             }
             break;
@@ -557,6 +579,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
     cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
 
     for (i = 0x80000000; i <= limit; i++) {
+        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+            fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
+            abort();
+        }
         c = &cpuid_data.entries[cpuid_i++];
 
         c->function = i;
@@ -569,6 +595,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
         cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
 
         for (i = 0xC0000000; i <= limit; i++) {
+            if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
+                abort();
+            }
             c = &cpuid_data.entries[cpuid_i++];
 
             c->function = i;
diff --git a/target-i386/topology.h b/target-i386/topology.h
new file mode 100644
index 0000000..24ed525
--- /dev/null
+++ b/target-i386/topology.h
@@ -0,0 +1,136 @@
+/*
+ *  x86 CPU topology data structures and functions
+ *
+ *  Copyright (c) 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef TARGET_I386_TOPOLOGY_H
+#define TARGET_I386_TOPOLOGY_H
+
+/* This file implements the APIC-ID-based CPU topology enumeration logic,
+ * documented at the following document:
+ *   Intel® 64 Architecture Processor Topology Enumeration
+ *   http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/
+ *
+ * This code should be compatible with AMD's "Extended Method" described at:
+ *   AMD CPUID Specification (Publication #25481)
+ *   Section 3: Multiple Core Calcuation
+ * as long as:
+ *  nr_threads is set to 1;
+ *  OFFSET_IDX is assumed to be 0;
+ *  CPUID Fn8000_0008_ECX[ApicIdCoreIdSize[3:0]] is set to apicid_core_width().
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "qemu/bitops.h"
+
+/* APIC IDs can be 32-bit, but beware: APIC IDs > 255 require x2APIC support
+ */
+typedef uint32_t apic_id_t;
+
+/* Return the bit width needed for 'count' IDs
+ */
+static unsigned apicid_bitwidth_for_count(unsigned count)
+{
+    g_assert(count >= 1);
+    if (count == 1) {
+        return 0;
+    }
+    return bitops_flsl(count - 1) + 1;
+}
+
+/* Bit width of the SMT_ID (thread ID) field on the APIC ID
+ */
+static inline unsigned apicid_smt_width(unsigned nr_cores, unsigned nr_threads)
+{
+    return apicid_bitwidth_for_count(nr_threads);
+}
+
+/* Bit width of the Core_ID field
+ */
+static inline unsigned apicid_core_width(unsigned nr_cores, unsigned nr_threads)
+{
+    return apicid_bitwidth_for_count(nr_cores);
+}
+
+/* Bit offset of the Core_ID field
+ */
+static inline unsigned apicid_core_offset(unsigned nr_cores,
+                                          unsigned nr_threads)
+{
+    return apicid_smt_width(nr_cores, nr_threads);
+}
+
+/* Bit offset of the Pkg_ID (socket ID) field
+ */
+static inline unsigned apicid_pkg_offset(unsigned nr_cores, unsigned nr_threads)
+{
+    return apicid_core_offset(nr_cores, nr_threads) +
+           apicid_core_width(nr_cores, nr_threads);
+}
+
+/* Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID
+ *
+ * The caller must make sure core_id < nr_cores and smt_id < nr_threads.
+ */
+static inline apic_id_t apicid_from_topo_ids(unsigned nr_cores,
+                                             unsigned nr_threads,
+                                             unsigned pkg_id,
+                                             unsigned core_id,
+                                             unsigned smt_id)
+{
+    return (pkg_id  << apicid_pkg_offset(nr_cores, nr_threads)) |
+           (core_id << apicid_core_offset(nr_cores, nr_threads)) |
+           smt_id;
+}
+
+/* Calculate thread/core/package IDs for a specific topology,
+ * based on (contiguous) CPU index
+ */
+static inline void x86_topo_ids_from_idx(unsigned nr_cores,
+                                         unsigned nr_threads,
+                                         unsigned cpu_index,
+                                         unsigned *pkg_id,
+                                         unsigned *core_id,
+                                         unsigned *smt_id)
+{
+    unsigned core_index = cpu_index / nr_threads;
+    *smt_id = cpu_index % nr_threads;
+    *core_id = core_index % nr_cores;
+    *pkg_id = core_index / nr_cores;
+}
+
+/* Make APIC ID for the CPU 'cpu_index'
+ *
+ * 'cpu_index' is a sequential, contiguous ID for the CPU.
+ */
+static inline apic_id_t x86_apicid_from_cpu_idx(unsigned nr_cores,
+                                                unsigned nr_threads,
+                                                unsigned cpu_index)
+{
+    unsigned pkg_id, core_id, smt_id;
+    x86_topo_ids_from_idx(nr_cores, nr_threads, cpu_index,
+                          &pkg_id, &core_id, &smt_id);
+    return apicid_from_topo_ids(nr_cores, nr_threads, pkg_id, core_id, smt_id);
+}
+
+#endif /* TARGET_I386_TOPOLOGY_H */
diff --git a/target-m68k/cpu.c b/target-m68k/cpu.c
index ce89674..5c78031 100644
--- a/target-m68k/cpu.c
+++ b/target-m68k/cpu.c
@@ -55,6 +55,22 @@ static void m68k_cpu_reset(CPUState *s)
 
 /* CPU models */
 
+static ObjectClass *m68k_cpu_class_by_name(const char *cpu_model)
+{
+    ObjectClass *oc;
+
+    if (cpu_model == NULL) {
+        return NULL;
+    }
+
+    oc = object_class_by_name(cpu_model);
+    if (oc != NULL && (object_class_dynamic_cast(oc, TYPE_M68K_CPU) == NULL ||
+                       object_class_is_abstract(oc))) {
+        return NULL;
+    }
+    return oc;
+}
+
 static void m5206_cpu_initfn(Object *obj)
 {
     M68kCPU *cpu = M68K_CPU(obj);
@@ -134,6 +150,8 @@ static void m68k_cpu_class_init(ObjectClass *c, void *data)
 
     mcc->parent_reset = cc->reset;
     cc->reset = m68k_cpu_reset;
+
+    cc->class_by_name = m68k_cpu_class_by_name;
 }
 
 static void register_cpu_type(const M68kCPUInfo *info)
@@ -144,7 +162,7 @@ static void register_cpu_type(const M68kCPUInfo *info)
         .instance_init = info->instance_init,
     };
 
-    type_register_static(&type_info);
+    type_register(&type_info);
 }
 
 static const TypeInfo m68k_cpu_type_info = {
diff --git a/target-m68k/helper.c b/target-m68k/helper.c
index 097fc78..f66e12b 100644
--- a/target-m68k/helper.c
+++ b/target-m68k/helper.c
@@ -97,12 +97,14 @@ CPUM68KState *cpu_m68k_init(const char *cpu_model)
 {
     M68kCPU *cpu;
     CPUM68KState *env;
+    ObjectClass *oc;
     static int inited;
 
-    if (object_class_by_name(cpu_model) == NULL) {
+    oc = cpu_class_by_name(TYPE_M68K_CPU, cpu_model);
+    if (oc == NULL) {
         return NULL;
     }
-    cpu = M68K_CPU(object_new(cpu_model));
+    cpu = M68K_CPU(object_new(object_class_get_name(oc)));
     env = &cpu->env;
 
     if (!inited) {
diff --git a/target-openrisc/cpu.c b/target-openrisc/cpu.c
index 56544d8..54876d9 100644
--- a/target-openrisc/cpu.c
+++ b/target-openrisc/cpu.c
@@ -88,6 +88,23 @@ static void openrisc_cpu_initfn(Object *obj)
 }
 
 /* CPU models */
+
+static ObjectClass *openrisc_cpu_class_by_name(const char *cpu_model)
+{
+    ObjectClass *oc;
+
+    if (cpu_model == NULL) {
+        return NULL;
+    }
+
+    oc = object_class_by_name(cpu_model);
+    if (oc != NULL && (!object_class_dynamic_cast(oc, TYPE_OPENRISC_CPU) ||
+                       object_class_is_abstract(oc))) {
+        return NULL;
+    }
+    return oc;
+}
+
 static void or1200_initfn(Object *obj)
 {
     OpenRISCCPU *cpu = OPENRISC_CPU(obj);
@@ -120,6 +137,8 @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
 
     occ->parent_reset = cc->reset;
     cc->reset = openrisc_cpu_reset;
+
+    cc->class_by_name = openrisc_cpu_class_by_name;
 }
 
 static void cpu_register(const OpenRISCCPUInfo *info)
@@ -132,7 +151,7 @@ static void cpu_register(const OpenRISCCPUInfo *info)
         .class_size = sizeof(OpenRISCCPUClass),
     };
 
-    type_register_static(&type_info);
+    type_register(&type_info);
 }
 
 static const TypeInfo openrisc_cpu_type_info = {
@@ -158,11 +177,13 @@ static void openrisc_cpu_register_types(void)
 OpenRISCCPU *cpu_openrisc_init(const char *cpu_model)
 {
     OpenRISCCPU *cpu;
+    ObjectClass *oc;
 
-    if (!object_class_by_name(cpu_model)) {
+    oc = openrisc_cpu_class_by_name(cpu_model);
+    if (oc == NULL) {
         return NULL;
     }
-    cpu = OPENRISC_CPU(object_new(cpu_model));
+    cpu = OPENRISC_CPU(object_new(object_class_get_name(oc)));
     cpu->env.cpu_model_str = cpu_model;
 
     openrisc_cpu_realize(OBJECT(cpu), NULL);
@@ -170,11 +191,6 @@ OpenRISCCPU *cpu_openrisc_init(const char *cpu_model)
     return cpu;
 }
 
-typedef struct OpenRISCCPUList {
-    fprintf_function cpu_fprintf;
-    FILE *file;
-} OpenRISCCPUList;
-
 /* Sort alphabetically by type name, except for "any". */
 static gint openrisc_cpu_list_compare(gconstpointer a, gconstpointer b)
 {
@@ -196,7 +212,7 @@ static gint openrisc_cpu_list_compare(gconstpointer a, gconstpointer b)
 static void openrisc_cpu_list_entry(gpointer data, gpointer user_data)
 {
     ObjectClass *oc = data;
-    OpenRISCCPUList *s = user_data;
+    CPUListState *s = user_data;
 
     (*s->cpu_fprintf)(s->file, "  %s\n",
                       object_class_get_name(oc));
@@ -204,7 +220,7 @@ static void openrisc_cpu_list_entry(gpointer data, gpointer user_data)
 
 void cpu_openrisc_list(FILE *f, fprintf_function cpu_fprintf)
 {
-    OpenRISCCPUList s = {
+    CPUListState s = {
         .file = f,
         .cpu_fprintf = cpu_fprintf,
     };
diff --git a/target-openrisc/exception_helper.c b/target-openrisc/exception_helper.c
index dab4148..0c53b77 100644
--- a/target-openrisc/exception_helper.c
+++ b/target-openrisc/exception_helper.c
@@ -23,7 +23,7 @@
 
 void HELPER(exception)(CPUOpenRISCState *env, uint32_t excp)
 {
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     raise_exception(cpu, excp);
 }
diff --git a/target-openrisc/fpu_helper.c b/target-openrisc/fpu_helper.c
index b184d5e..4615a36 100644
--- a/target-openrisc/fpu_helper.c
+++ b/target-openrisc/fpu_helper.c
@@ -68,7 +68,7 @@ static inline void update_fpcsr(OpenRISCCPU *cpu)
 uint64_t HELPER(itofd)(CPUOpenRISCState *env, uint64_t val)
 {
     uint64_t itofd;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     set_float_exception_flags(0, &cpu->env.fp_status);
     itofd = int32_to_float64(val, &cpu->env.fp_status);
@@ -80,7 +80,7 @@ uint64_t HELPER(itofd)(CPUOpenRISCState *env, uint64_t val)
 uint32_t HELPER(itofs)(CPUOpenRISCState *env, uint32_t val)
 {
     uint32_t itofs;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     set_float_exception_flags(0, &cpu->env.fp_status);
     itofs = int32_to_float32(val, &cpu->env.fp_status);
@@ -92,7 +92,7 @@ uint32_t HELPER(itofs)(CPUOpenRISCState *env, uint32_t val)
 uint64_t HELPER(ftoid)(CPUOpenRISCState *env, uint64_t val)
 {
     uint64_t ftoid;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     set_float_exception_flags(0, &cpu->env.fp_status);
     ftoid = float32_to_int64(val, &cpu->env.fp_status);
@@ -104,7 +104,7 @@ uint64_t HELPER(ftoid)(CPUOpenRISCState *env, uint64_t val)
 uint32_t HELPER(ftois)(CPUOpenRISCState *env, uint32_t val)
 {
     uint32_t ftois;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     set_float_exception_flags(0, &cpu->env.fp_status);
     ftois = float32_to_int32(val, &cpu->env.fp_status);
@@ -120,7 +120,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env,               \
                                      uint64_t fdt0, uint64_t fdt1)        \
 {                                                                         \
     uint64_t result;                                                      \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     result = float64_ ## name(fdt0, fdt1, &cpu->env.fp_status);           \
     update_fpcsr(cpu);                                                    \
@@ -131,7 +131,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env,               \
                                      uint32_t fdt0, uint32_t fdt1)        \
 {                                                                         \
     uint32_t result;                                                      \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     result = float32_ ## name(fdt0, fdt1, &cpu->env.fp_status);           \
     update_fpcsr(cpu);                                                    \
@@ -152,7 +152,7 @@ uint64_t helper_float_ ## name1 ## name2 ## _d(CPUOpenRISCState *env,     \
 {                                                                         \
     uint64_t result, temp, hi, lo;                                        \
     uint32_t val1, val2;                                                  \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     hi = env->fpmaddhi;                                                   \
     lo = env->fpmaddlo;                                                   \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
@@ -174,7 +174,7 @@ uint32_t helper_float_ ## name1 ## name2 ## _s(CPUOpenRISCState *env,     \
 {                                                                         \
     uint64_t result, temp, hi, lo;                                        \
     uint32_t val1, val2;                                                  \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     hi = cpu->env.fpmaddhi;                                               \
     lo = cpu->env.fpmaddlo;                                               \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
@@ -198,7 +198,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env,               \
                                      uint64_t fdt0, uint64_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = float64_ ## name(fdt0, fdt1, &cpu->env.fp_status);              \
     update_fpcsr(cpu);                                                    \
@@ -209,7 +209,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env,               \
                                              uint32_t fdt0, uint32_t fdt1)\
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = float32_ ## name(fdt0, fdt1, &cpu->env.fp_status);              \
     update_fpcsr(cpu);                                                    \
@@ -227,7 +227,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env,               \
                                      uint64_t fdt0, uint64_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float64_eq_quiet(fdt0, fdt1, &cpu->env.fp_status);             \
     update_fpcsr(cpu);                                                    \
@@ -238,7 +238,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env,               \
                                      uint32_t fdt0, uint32_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float32_eq_quiet(fdt0, fdt1, &cpu->env.fp_status);             \
     update_fpcsr(cpu);                                                    \
@@ -253,7 +253,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env,               \
                                      uint64_t fdt0, uint64_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float64_le(fdt0, fdt1, &cpu->env.fp_status);                   \
     update_fpcsr(cpu);                                                    \
@@ -264,7 +264,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env,               \
                                      uint32_t fdt0, uint32_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float32_le(fdt0, fdt1, &cpu->env.fp_status);                   \
     update_fpcsr(cpu);                                                    \
@@ -278,7 +278,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env,               \
                                      uint64_t fdt0, uint64_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float64_lt(fdt0, fdt1, &cpu->env.fp_status);                   \
     update_fpcsr(cpu);                                                    \
@@ -289,7 +289,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env,               \
                                      uint32_t fdt0, uint32_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float32_lt(fdt0, fdt1, &cpu->env.fp_status);                   \
     update_fpcsr(cpu);                                                    \
diff --git a/target-openrisc/int_helper.c b/target-openrisc/int_helper.c
index 20f9837..16cb5ab 100644
--- a/target-openrisc/int_helper.c
+++ b/target-openrisc/int_helper.c
@@ -48,7 +48,7 @@ uint32_t HELPER(mul32)(CPUOpenRISCState *env,
     uint64_t result;
     uint32_t high, cy;
 
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     result = (uint64_t)ra * rb;
     /* regisiers in or32 is 32bit, so 32 is NOT a magic number.
diff --git a/target-openrisc/interrupt_helper.c b/target-openrisc/interrupt_helper.c
index 79f5afe..a176441 100644
--- a/target-openrisc/interrupt_helper.c
+++ b/target-openrisc/interrupt_helper.c
@@ -23,7 +23,7 @@
 
 void HELPER(rfe)(CPUOpenRISCState *env)
 {
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 #ifndef CONFIG_USER_ONLY
     int need_flush_tlb = (cpu->env.sr & (SR_SM | SR_IME | SR_DME)) ^
                          (cpu->env.esr & (SR_SM | SR_IME | SR_DME));
diff --git a/target-openrisc/mmu.c b/target-openrisc/mmu.c
index 8364652..d354e1f 100644
--- a/target-openrisc/mmu.c
+++ b/target-openrisc/mmu.c
@@ -187,7 +187,7 @@ int cpu_openrisc_handle_mmu_fault(CPUOpenRISCState *env,
     int ret = 0;
     hwaddr physical = 0;
     int prot = 0;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     ret = cpu_openrisc_get_phys_addr(cpu, &physical, &prot,
                                      address, rw);
@@ -209,7 +209,7 @@ int cpu_openrisc_handle_mmu_fault(CPUOpenRISCState *env,
                                   target_ulong address, int rw, int mmu_idx)
 {
     int ret = 0;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     cpu_openrisc_raise_mmu_exception(cpu, address, rw, ret);
     ret = 1;
@@ -224,7 +224,7 @@ hwaddr cpu_get_phys_page_debug(CPUOpenRISCState *env,
 {
     hwaddr phys_addr;
     int prot;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     if (cpu_openrisc_get_phys_addr(cpu, &phys_addr, &prot, addr, 0)) {
         return -1;
diff --git a/target-openrisc/sys_helper.c b/target-openrisc/sys_helper.c
index f160dc3..3c5f45a 100644
--- a/target-openrisc/sys_helper.c
+++ b/target-openrisc/sys_helper.c
@@ -30,7 +30,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
     int spr = (ra | offset);
     int idx;
 
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     switch (spr) {
     case TO_SPR(0, 0): /* VR */
@@ -177,7 +177,7 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
     int spr = (ra | offset);
     int idx;
 
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     switch (spr) {
     case TO_SPR(0, 0): /* VR */
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 2f4f068..2c64c63 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -384,6 +384,11 @@ static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 
 #endif /* !defined (TARGET_PPC64) */
 
+unsigned long kvm_arch_vcpu_id(CPUState *cpu)
+{
+    return cpu->cpu_index;
+}
+
 int kvm_arch_init_vcpu(CPUState *cs)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 4f767c9..e143af5 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -10578,6 +10578,8 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
 
     pcc->parent_reset = cc->reset;
     cc->reset = ppc_cpu_reset;
+
+    cc->class_by_name = ppc_cpu_class_by_name;
 }
 
 static const TypeInfo ppc_cpu_type_info = {
diff --git a/target-s390x/Makefile.objs b/target-s390x/Makefile.objs
index e728abf..3afb0b7 100644
--- a/target-s390x/Makefile.objs
+++ b/target-s390x/Makefile.objs
@@ -1,4 +1,4 @@
 obj-y += translate.o helper.o cpu.o interrupt.o
 obj-y += int_helper.o fpu_helper.o cc_helper.o mem_helper.o misc_helper.o
-obj-$(CONFIG_SOFTMMU) += machine.o
+obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 1f2d942..9be4a47 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -50,6 +50,11 @@
 #define MMU_USER_IDX 1
 
 #define MAX_EXT_QUEUE 16
+#define MAX_IO_QUEUE 16
+#define MAX_MCHK_QUEUE 16
+
+#define PSW_MCHK_MASK 0x0004000000000000
+#define PSW_IO_MASK 0x0200000000000000
 
 typedef struct PSW {
     uint64_t mask;
@@ -62,6 +67,17 @@ typedef struct ExtQueue {
     uint32_t param64;
 } ExtQueue;
 
+typedef struct IOIntQueue {
+    uint16_t id;
+    uint16_t nr;
+    uint32_t parm;
+    uint32_t word;
+} IOIntQueue;
+
+typedef struct MchkQueue {
+    uint16_t type;
+} MchkQueue;
+
 typedef struct CPUS390XState {
     uint64_t regs[16];     /* GP registers */
     CPU_DoubleU fregs[16]; /* FP registers */
@@ -93,9 +109,17 @@ typedef struct CPUS390XState {
     uint64_t cregs[16]; /* control registers */
 
     ExtQueue ext_queue[MAX_EXT_QUEUE];
-    int pending_int;
+    IOIntQueue io_queue[MAX_IO_QUEUE][8];
+    MchkQueue mchk_queue[MAX_MCHK_QUEUE];
 
+    int pending_int;
     int ext_index;
+    int io_index[8];
+    int mchk_index;
+
+    uint64_t ckc;
+    uint64_t cputm;
+    uint32_t todpr;
 
     CPU_COMMON
 
@@ -123,6 +147,9 @@ static inline void cpu_clone_regs(CPUS390XState *env, target_ulong newsp)
 }
 #endif
 
+/* distinguish between 24 bit and 31 bit addressing */
+#define HIGH_ORDER_BIT 0x80000000
+
 /* Interrupt Codes */
 /* Program Interrupts */
 #define PGM_OPERATION                   0x0001
@@ -300,8 +327,27 @@ int cpu_s390x_handle_mmu_fault (CPUS390XState *env, target_ulong address, int rw
                                 int mmu_idx);
 #define cpu_handle_mmu_fault cpu_s390x_handle_mmu_fault
 
+#include "ioinst.h"
 
 #ifndef CONFIG_USER_ONLY
+void *s390_cpu_physical_memory_map(CPUS390XState *env, hwaddr addr, hwaddr *len,
+                                   int is_write);
+void s390_cpu_physical_memory_unmap(CPUS390XState *env, void *addr, hwaddr len,
+                                    int is_write);
+static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb)
+{
+    hwaddr addr = 0;
+    uint8_t reg;
+
+    reg = ipb >> 28;
+    if (reg > 0) {
+        addr = env->regs[reg];
+    }
+    addr += (ipb >> 16) & 0xfff;
+
+    return addr;
+}
+
 void s390x_tod_timer(void *opaque);
 void s390x_cpu_timer(void *opaque);
 
@@ -351,6 +397,114 @@ static inline unsigned s390_del_running_cpu(CPUS390XState *env)
 void cpu_lock(void);
 void cpu_unlock(void);
 
+typedef struct SubchDev SubchDev;
+
+#ifndef CONFIG_USER_ONLY
+SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
+                         uint16_t schid);
+bool css_subch_visible(SubchDev *sch);
+void css_conditional_io_interrupt(SubchDev *sch);
+int css_do_stsch(SubchDev *sch, SCHIB *schib);
+bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid);
+int css_do_msch(SubchDev *sch, SCHIB *schib);
+int css_do_xsch(SubchDev *sch);
+int css_do_csch(SubchDev *sch);
+int css_do_hsch(SubchDev *sch);
+int css_do_ssch(SubchDev *sch, ORB *orb);
+int css_do_tsch(SubchDev *sch, IRB *irb);
+int css_do_stcrw(CRW *crw);
+int css_do_tpi(IOIntCode *int_code, int lowcore);
+int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid,
+                         int rfmt, void *buf);
+void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo);
+int css_enable_mcsse(void);
+int css_enable_mss(void);
+int css_do_rsch(SubchDev *sch);
+int css_do_rchp(uint8_t cssid, uint8_t chpid);
+bool css_present(uint8_t cssid);
+#else
+static inline SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
+                                       uint16_t schid)
+{
+    return NULL;
+}
+static inline bool css_subch_visible(SubchDev *sch)
+{
+    return false;
+}
+static inline void css_conditional_io_interrupt(SubchDev *sch)
+{
+}
+static inline int css_do_stsch(SubchDev *sch, SCHIB *schib)
+{
+    return -ENODEV;
+}
+static inline bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid)
+{
+    return true;
+}
+static inline int css_do_msch(SubchDev *sch, SCHIB *schib)
+{
+    return -ENODEV;
+}
+static inline int css_do_xsch(SubchDev *sch)
+{
+    return -ENODEV;
+}
+static inline int css_do_csch(SubchDev *sch)
+{
+    return -ENODEV;
+}
+static inline int css_do_hsch(SubchDev *sch)
+{
+    return -ENODEV;
+}
+static inline int css_do_ssch(SubchDev *sch, ORB *orb)
+{
+    return -ENODEV;
+}
+static inline int css_do_tsch(SubchDev *sch, IRB *irb)
+{
+    return -ENODEV;
+}
+static inline int css_do_stcrw(CRW *crw)
+{
+    return 1;
+}
+static inline int css_do_tpi(IOIntCode *int_code, int lowcore)
+{
+    return 0;
+}
+static inline int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid,
+                                       int rfmt, uint8_t l_chpid, void *buf)
+{
+    return 0;
+}
+static inline void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo)
+{
+}
+static inline int css_enable_mss(void)
+{
+    return -EINVAL;
+}
+static inline int css_enable_mcsse(void)
+{
+    return -EINVAL;
+}
+static inline int css_do_rsch(SubchDev *sch)
+{
+    return -ENODEV;
+}
+static inline int css_do_rchp(uint8_t cssid, uint8_t chpid)
+{
+    return -ENODEV;
+}
+static inline bool css_present(uint8_t cssid)
+{
+    return false;
+}
+#endif
+
 static inline void cpu_set_tls(CPUS390XState *env, target_ulong newtls)
 {
     env->aregs[0] = newtls >> 32;
@@ -370,10 +524,14 @@ void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 #define EXCP_EXT 1 /* external interrupt */
 #define EXCP_SVC 2 /* supervisor call (syscall) */
 #define EXCP_PGM 3 /* program interruption */
+#define EXCP_IO  7 /* I/O interrupt */
+#define EXCP_MCHK 8 /* machine check */
 
 #define INTERRUPT_EXT        (1 << 0)
 #define INTERRUPT_TOD        (1 << 1)
 #define INTERRUPT_CPUTIMER   (1 << 2)
+#define INTERRUPT_IO         (1 << 3)
+#define INTERRUPT_MCHK       (1 << 4)
 
 /* Program Status Word.  */
 #define S390_PSWM_REGNUM 0
@@ -836,6 +994,45 @@ static inline void cpu_inject_ext(CPUS390XState *env, uint32_t code, uint32_t pa
     cpu_interrupt(env, CPU_INTERRUPT_HARD);
 }
 
+static inline void cpu_inject_io(CPUS390XState *env, uint16_t subchannel_id,
+                                 uint16_t subchannel_number,
+                                 uint32_t io_int_parm, uint32_t io_int_word)
+{
+    int isc = ffs(io_int_word << 2) - 1;
+
+    if (env->io_index[isc] == MAX_IO_QUEUE - 1) {
+        /* ugh - can't queue anymore. Let's drop. */
+        return;
+    }
+
+    env->io_index[isc]++;
+    assert(env->io_index[isc] < MAX_IO_QUEUE);
+
+    env->io_queue[env->io_index[isc]][isc].id = subchannel_id;
+    env->io_queue[env->io_index[isc]][isc].nr = subchannel_number;
+    env->io_queue[env->io_index[isc]][isc].parm = io_int_parm;
+    env->io_queue[env->io_index[isc]][isc].word = io_int_word;
+
+    env->pending_int |= INTERRUPT_IO;
+    cpu_interrupt(env, CPU_INTERRUPT_HARD);
+}
+
+static inline void cpu_inject_crw_mchk(CPUS390XState *env)
+{
+    if (env->mchk_index == MAX_MCHK_QUEUE - 1) {
+        /* ugh - can't queue anymore. Let's drop. */
+        return;
+    }
+
+    env->mchk_index++;
+    assert(env->mchk_index < MAX_MCHK_QUEUE);
+
+    env->mchk_queue[env->mchk_index].type = 1;
+
+    env->pending_int |= INTERRUPT_MCHK;
+    cpu_interrupt(env, CPU_INTERRUPT_HARD);
+}
+
 static inline bool cpu_has_work(CPUState *cpu)
 {
     CPUS390XState *env = &S390_CPU(cpu)->env;
@@ -859,4 +1056,52 @@ void program_interrupt(CPUS390XState *env, uint32_t code, int ilen);
 void QEMU_NORETURN runtime_exception(CPUS390XState *env, int excp,
                                      uintptr_t retaddr);
 
+#include <sysemu/kvm.h>
+
+#ifdef CONFIG_KVM
+void kvm_s390_io_interrupt(S390CPU *cpu, uint16_t subchannel_id,
+                           uint16_t subchannel_nr, uint32_t io_int_parm,
+                           uint32_t io_int_word);
+void kvm_s390_crw_mchk(S390CPU *cpu);
+void kvm_s390_enable_css_support(S390CPU *cpu);
+#else
+static inline void kvm_s390_io_interrupt(S390CPU *cpu,
+                                        uint16_t subchannel_id,
+                                        uint16_t subchannel_nr,
+                                        uint32_t io_int_parm,
+                                        uint32_t io_int_word)
+{
+}
+static inline void kvm_s390_crw_mchk(S390CPU *cpu)
+{
+}
+static inline void kvm_s390_enable_css_support(S390CPU *cpu)
+{
+}
+#endif
+
+static inline void s390_io_interrupt(S390CPU *cpu,
+                                     uint16_t subchannel_id,
+                                     uint16_t subchannel_nr,
+                                     uint32_t io_int_parm,
+                                     uint32_t io_int_word)
+{
+    if (kvm_enabled()) {
+        kvm_s390_io_interrupt(cpu, subchannel_id, subchannel_nr, io_int_parm,
+                              io_int_word);
+    } else {
+        cpu_inject_io(&cpu->env, subchannel_id, subchannel_nr, io_int_parm,
+                      io_int_word);
+    }
+}
+
+static inline void s390_crw_mchk(S390CPU *cpu)
+{
+    if (kvm_enabled()) {
+        kvm_s390_crw_mchk(cpu);
+    } else {
+        cpu_inject_crw_mchk(&cpu->env);
+    }
+}
+
 #endif
diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index 9a132e6..857c897 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -471,13 +471,56 @@ static uint64_t get_psw_mask(CPUS390XState *env)
     return r;
 }
 
+static LowCore *cpu_map_lowcore(CPUS390XState *env)
+{
+    LowCore *lowcore;
+    hwaddr len = sizeof(LowCore);
+
+    lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+
+    if (len < sizeof(LowCore)) {
+        cpu_abort(env, "Could not map lowcore\n");
+    }
+
+    return lowcore;
+}
+
+static void cpu_unmap_lowcore(LowCore *lowcore)
+{
+    cpu_physical_memory_unmap(lowcore, sizeof(LowCore), 1, sizeof(LowCore));
+}
+
+void *s390_cpu_physical_memory_map(CPUS390XState *env, hwaddr addr, hwaddr *len,
+                                   int is_write)
+{
+    hwaddr start = addr;
+
+    /* Mind the prefix area. */
+    if (addr < 8192) {
+        /* Map the lowcore. */
+        start += env->psa;
+        *len = MIN(*len, 8192 - addr);
+    } else if ((addr >= env->psa) && (addr < env->psa + 8192)) {
+        /* Map the 0 page. */
+        start -= env->psa;
+        *len = MIN(*len, 8192 - start);
+    }
+
+    return cpu_physical_memory_map(start, len, is_write);
+}
+
+void s390_cpu_physical_memory_unmap(CPUS390XState *env, void *addr, hwaddr len,
+                                    int is_write)
+{
+    cpu_physical_memory_unmap(addr, len, is_write, len);
+}
+
 static void do_svc_interrupt(CPUS390XState *env)
 {
     uint64_t mask, addr;
     LowCore *lowcore;
-    hwaddr len = TARGET_PAGE_SIZE;
 
-    lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+    lowcore = cpu_map_lowcore(env);
 
     lowcore->svc_code = cpu_to_be16(env->int_svc_code);
     lowcore->svc_ilen = cpu_to_be16(env->int_svc_ilen);
@@ -486,7 +529,7 @@ static void do_svc_interrupt(CPUS390XState *env)
     mask = be64_to_cpu(lowcore->svc_new_psw.mask);
     addr = be64_to_cpu(lowcore->svc_new_psw.addr);
 
-    cpu_physical_memory_unmap(lowcore, len, 1, len);
+    cpu_unmap_lowcore(lowcore);
 
     load_psw(env, mask, addr);
 }
@@ -495,7 +538,6 @@ static void do_program_interrupt(CPUS390XState *env)
 {
     uint64_t mask, addr;
     LowCore *lowcore;
-    hwaddr len = TARGET_PAGE_SIZE;
     int ilen = env->int_pgm_ilen;
 
     switch (ilen) {
@@ -513,7 +555,7 @@ static void do_program_interrupt(CPUS390XState *env)
     qemu_log_mask(CPU_LOG_INT, "%s: code=0x%x ilen=%d\n",
                   __func__, env->int_pgm_code, ilen);
 
-    lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+    lowcore = cpu_map_lowcore(env);
 
     lowcore->pgm_ilen = cpu_to_be16(ilen);
     lowcore->pgm_code = cpu_to_be16(env->int_pgm_code);
@@ -522,7 +564,7 @@ static void do_program_interrupt(CPUS390XState *env)
     mask = be64_to_cpu(lowcore->program_new_psw.mask);
     addr = be64_to_cpu(lowcore->program_new_psw.addr);
 
-    cpu_physical_memory_unmap(lowcore, len, 1, len);
+    cpu_unmap_lowcore(lowcore);
 
     DPRINTF("%s: %x %x %" PRIx64 " %" PRIx64 "\n", __func__,
             env->int_pgm_code, ilen, env->psw.mask,
@@ -537,7 +579,6 @@ static void do_ext_interrupt(CPUS390XState *env)
 {
     uint64_t mask, addr;
     LowCore *lowcore;
-    hwaddr len = TARGET_PAGE_SIZE;
     ExtQueue *q;
 
     if (!(env->psw.mask & PSW_MASK_EXT)) {
@@ -549,7 +590,7 @@ static void do_ext_interrupt(CPUS390XState *env)
     }
 
     q = &env->ext_queue[env->ext_index];
-    lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+    lowcore = cpu_map_lowcore(env);
 
     lowcore->ext_int_code = cpu_to_be16(q->code);
     lowcore->ext_params = cpu_to_be32(q->param);
@@ -560,7 +601,7 @@ static void do_ext_interrupt(CPUS390XState *env)
     mask = be64_to_cpu(lowcore->external_new_psw.mask);
     addr = be64_to_cpu(lowcore->external_new_psw.addr);
 
-    cpu_physical_memory_unmap(lowcore, len, 1, len);
+    cpu_unmap_lowcore(lowcore);
 
     env->ext_index--;
     if (env->ext_index == -1) {
@@ -573,12 +614,140 @@ static void do_ext_interrupt(CPUS390XState *env)
     load_psw(env, mask, addr);
 }
 
+static void do_io_interrupt(CPUS390XState *env)
+{
+    uint64_t mask, addr;
+    LowCore *lowcore;
+    IOIntQueue *q;
+    uint8_t isc;
+    int disable = 1;
+    int found = 0;
+
+    if (!(env->psw.mask & PSW_MASK_IO)) {
+        cpu_abort(env, "I/O int w/o I/O mask\n");
+    }
+
+    for (isc = 0; isc < ARRAY_SIZE(env->io_index); isc++) {
+        if (env->io_index[isc] < 0) {
+            continue;
+        }
+        if (env->io_index[isc] > MAX_IO_QUEUE) {
+            cpu_abort(env, "I/O queue overrun for isc %d: %d\n",
+                      isc, env->io_index[isc]);
+        }
+
+        q = &env->io_queue[env->io_index[isc]][isc];
+        if (!(env->cregs[6] & q->word)) {
+            disable = 0;
+            continue;
+        }
+        found = 1;
+        lowcore = cpu_map_lowcore(env);
+
+        lowcore->subchannel_id = cpu_to_be16(q->id);
+        lowcore->subchannel_nr = cpu_to_be16(q->nr);
+        lowcore->io_int_parm = cpu_to_be32(q->parm);
+        lowcore->io_int_word = cpu_to_be32(q->word);
+        lowcore->io_old_psw.mask = cpu_to_be64(get_psw_mask(env));
+        lowcore->io_old_psw.addr = cpu_to_be64(env->psw.addr);
+        mask = be64_to_cpu(lowcore->io_new_psw.mask);
+        addr = be64_to_cpu(lowcore->io_new_psw.addr);
+
+        cpu_unmap_lowcore(lowcore);
+
+        env->io_index[isc]--;
+        if (env->io_index >= 0) {
+            disable = 0;
+        }
+        break;
+    }
+
+    if (disable) {
+        env->pending_int &= ~INTERRUPT_IO;
+    }
+
+    if (found) {
+        DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
+                env->psw.mask, env->psw.addr);
+        load_psw(env, mask, addr);
+    }
+}
+
+static void do_mchk_interrupt(CPUS390XState *env)
+{
+    uint64_t mask, addr;
+    LowCore *lowcore;
+    MchkQueue *q;
+    int i;
+
+    if (!(env->psw.mask & PSW_MASK_MCHECK)) {
+        cpu_abort(env, "Machine check w/o mchk mask\n");
+    }
+
+    if (env->mchk_index < 0 || env->mchk_index > MAX_MCHK_QUEUE) {
+        cpu_abort(env, "Mchk queue overrun: %d\n", env->mchk_index);
+    }
+
+    q = &env->mchk_queue[env->mchk_index];
+
+    if (q->type != 1) {
+        /* Don't know how to handle this... */
+        cpu_abort(env, "Unknown machine check type %d\n", q->type);
+    }
+    if (!(env->cregs[14] & (1 << 28))) {
+        /* CRW machine checks disabled */
+        return;
+    }
+
+    lowcore = cpu_map_lowcore(env);
+
+    for (i = 0; i < 16; i++) {
+        lowcore->floating_pt_save_area[i] = cpu_to_be64(env->fregs[i].ll);
+        lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]);
+        lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]);
+        lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]);
+    }
+    lowcore->prefixreg_save_area = cpu_to_be32(env->psa);
+    lowcore->fpt_creg_save_area = cpu_to_be32(env->fpc);
+    lowcore->tod_progreg_save_area = cpu_to_be32(env->todpr);
+    lowcore->cpu_timer_save_area[0] = cpu_to_be32(env->cputm >> 32);
+    lowcore->cpu_timer_save_area[1] = cpu_to_be32((uint32_t)env->cputm);
+    lowcore->clock_comp_save_area[0] = cpu_to_be32(env->ckc >> 32);
+    lowcore->clock_comp_save_area[1] = cpu_to_be32((uint32_t)env->ckc);
+
+    lowcore->mcck_interruption_code[0] = cpu_to_be32(0x00400f1d);
+    lowcore->mcck_interruption_code[1] = cpu_to_be32(0x40330000);
+    lowcore->mcck_old_psw.mask = cpu_to_be64(get_psw_mask(env));
+    lowcore->mcck_old_psw.addr = cpu_to_be64(env->psw.addr);
+    mask = be64_to_cpu(lowcore->mcck_new_psw.mask);
+    addr = be64_to_cpu(lowcore->mcck_new_psw.addr);
+
+    cpu_unmap_lowcore(lowcore);
+
+    env->mchk_index--;
+    if (env->mchk_index == -1) {
+        env->pending_int &= ~INTERRUPT_MCHK;
+    }
+
+    DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
+            env->psw.mask, env->psw.addr);
+
+    load_psw(env, mask, addr);
+}
+
 void do_interrupt(CPUS390XState *env)
 {
     qemu_log_mask(CPU_LOG_INT, "%s: %d at pc=%" PRIx64 "\n",
                   __func__, env->exception_index, env->psw.addr);
 
     s390_add_running_cpu(env);
+    /* handle machine checks */
+    if ((env->psw.mask & PSW_MASK_MCHECK) &&
+        (env->exception_index == -1)) {
+        if (env->pending_int & INTERRUPT_MCHK) {
+            env->exception_index = EXCP_MCHK;
+        }
+    }
     /* handle external interrupts */
     if ((env->psw.mask & PSW_MASK_EXT) &&
         env->exception_index == -1) {
@@ -597,6 +766,13 @@ void do_interrupt(CPUS390XState *env)
             env->pending_int &= ~INTERRUPT_TOD;
         }
     }
+    /* handle I/O interrupts */
+    if ((env->psw.mask & PSW_MASK_IO) &&
+        (env->exception_index == -1)) {
+        if (env->pending_int & INTERRUPT_IO) {
+            env->exception_index = EXCP_IO;
+        }
+    }
 
     switch (env->exception_index) {
     case EXCP_PGM:
@@ -608,6 +784,12 @@ void do_interrupt(CPUS390XState *env)
     case EXCP_EXT:
         do_ext_interrupt(env);
         break;
+    case EXCP_IO:
+        do_io_interrupt(env);
+        break;
+    case EXCP_MCHK:
+        do_mchk_interrupt(env);
+        break;
     }
     env->exception_index = -1;
 
diff --git a/target-s390x/ioinst.c b/target-s390x/ioinst.c
new file mode 100644
index 0000000..e3531f3
--- /dev/null
+++ b/target-s390x/ioinst.c
@@ -0,0 +1,761 @@
+/*
+ * I/O instructions for S/390
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include <sys/types.h>
+
+#include "cpu.h"
+#include "ioinst.h"
+#include "trace.h"
+
+int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid,
+                                 int *schid)
+{
+    if (!IOINST_SCHID_ONE(value)) {
+        return -EINVAL;
+    }
+    if (!IOINST_SCHID_M(value)) {
+        if (IOINST_SCHID_CSSID(value)) {
+            return -EINVAL;
+        }
+        *cssid = 0;
+        *m = 0;
+    } else {
+        *cssid = IOINST_SCHID_CSSID(value);
+        *m = 1;
+    }
+    *ssid = IOINST_SCHID_SSID(value);
+    *schid = IOINST_SCHID_NR(value);
+    return 0;
+}
+
+int ioinst_handle_xsch(CPUS390XState *env, uint64_t reg1)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    int ret = -ENODEV;
+    int cc;
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("xsch", cssid, ssid, schid);
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_xsch(sch);
+    }
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EBUSY:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        cc = 1;
+        break;
+    }
+
+    return cc;
+}
+
+int ioinst_handle_csch(CPUS390XState *env, uint64_t reg1)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    int ret = -ENODEV;
+    int cc;
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("csch", cssid, ssid, schid);
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_csch(sch);
+    }
+    if (ret == -ENODEV) {
+        cc = 3;
+    } else {
+        cc = 0;
+    }
+    return cc;
+}
+
+int ioinst_handle_hsch(CPUS390XState *env, uint64_t reg1)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    int ret = -ENODEV;
+    int cc;
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("hsch", cssid, ssid, schid);
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_hsch(sch);
+    }
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EBUSY:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        cc = 1;
+        break;
+    }
+
+    return cc;
+}
+
+static int ioinst_schib_valid(SCHIB *schib)
+{
+    if ((schib->pmcw.flags & PMCW_FLAGS_MASK_INVALID) ||
+        (schib->pmcw.chars & PMCW_CHARS_MASK_INVALID)) {
+        return 0;
+    }
+    /* Disallow extended measurements for now. */
+    if (schib->pmcw.chars & PMCW_CHARS_MASK_XMWME) {
+        return 0;
+    }
+    return 1;
+}
+
+int ioinst_handle_msch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    SCHIB *schib;
+    uint64_t addr;
+    int ret = -ENODEV;
+    int cc;
+    hwaddr len = sizeof(*schib);
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("msch", cssid, ssid, schid);
+    addr = decode_basedisp_s(env, ipb);
+    schib = s390_cpu_physical_memory_map(env, addr, &len, 0);
+    if (!schib || len != sizeof(*schib)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        cc = -EIO;
+        goto out;
+    }
+    if (!ioinst_schib_valid(schib)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        cc = -EIO;
+        goto out;
+    }
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_msch(sch, schib);
+    }
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EBUSY:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        cc = 1;
+        break;
+    }
+out:
+    s390_cpu_physical_memory_unmap(env, schib, len, 0);
+    return cc;
+}
+
+static void copy_orb_from_guest(ORB *dest, const ORB *src)
+{
+    dest->intparm = be32_to_cpu(src->intparm);
+    dest->ctrl0 = be16_to_cpu(src->ctrl0);
+    dest->lpm = src->lpm;
+    dest->ctrl1 = src->ctrl1;
+    dest->cpa = be32_to_cpu(src->cpa);
+}
+
+static int ioinst_orb_valid(ORB *orb)
+{
+    if ((orb->ctrl0 & ORB_CTRL0_MASK_INVALID) ||
+        (orb->ctrl1 & ORB_CTRL1_MASK_INVALID)) {
+        return 0;
+    }
+    if ((orb->cpa & HIGH_ORDER_BIT) != 0) {
+        return 0;
+    }
+    return 1;
+}
+
+int ioinst_handle_ssch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    ORB *orig_orb, orb;
+    uint64_t addr;
+    int ret = -ENODEV;
+    int cc;
+    hwaddr len = sizeof(*orig_orb);
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("ssch", cssid, ssid, schid);
+    addr = decode_basedisp_s(env, ipb);
+    orig_orb = s390_cpu_physical_memory_map(env, addr, &len, 0);
+    if (!orig_orb || len != sizeof(*orig_orb)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        cc = -EIO;
+        goto out;
+    }
+    copy_orb_from_guest(&orb, orig_orb);
+    if (!ioinst_orb_valid(&orb)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        cc = -EIO;
+        goto out;
+    }
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_ssch(sch, &orb);
+    }
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EBUSY:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        cc = 1;
+        break;
+    }
+
+out:
+    s390_cpu_physical_memory_unmap(env, orig_orb, len, 0);
+    return cc;
+}
+
+int ioinst_handle_stcrw(CPUS390XState *env, uint32_t ipb)
+{
+    CRW *crw;
+    uint64_t addr;
+    int cc;
+    hwaddr len = sizeof(*crw);
+
+    addr = decode_basedisp_s(env, ipb);
+    crw = s390_cpu_physical_memory_map(env, addr, &len, 1);
+    if (!crw || len != sizeof(*crw)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        cc = -EIO;
+        goto out;
+    }
+    cc = css_do_stcrw(crw);
+    /* 0 - crw stored, 1 - zeroes stored */
+out:
+    s390_cpu_physical_memory_unmap(env, crw, len, 1);
+    return cc;
+}
+
+int ioinst_handle_stsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    uint64_t addr;
+    int cc;
+    SCHIB *schib;
+    hwaddr len = sizeof(*schib);
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("stsch", cssid, ssid, schid);
+    addr = decode_basedisp_s(env, ipb);
+    schib = s390_cpu_physical_memory_map(env, addr, &len, 1);
+    if (!schib || len != sizeof(*schib)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        cc = -EIO;
+        goto out;
+    }
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch) {
+        if (css_subch_visible(sch)) {
+            css_do_stsch(sch, schib);
+            cc = 0;
+        } else {
+            /* Indicate no more subchannels in this css/ss */
+            cc = 3;
+        }
+    } else {
+        if (css_schid_final(cssid, ssid, schid)) {
+            cc = 3; /* No more subchannels in this css/ss */
+        } else {
+            /* Store an empty schib. */
+            memset(schib, 0, sizeof(*schib));
+            cc = 0;
+        }
+    }
+out:
+    s390_cpu_physical_memory_unmap(env, schib, len, 1);
+    return cc;
+}
+
+int ioinst_handle_tsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    IRB *irb;
+    uint64_t addr;
+    int ret = -ENODEV;
+    int cc;
+    hwaddr len = sizeof(*irb);
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("tsch", cssid, ssid, schid);
+    addr = decode_basedisp_s(env, ipb);
+    irb = s390_cpu_physical_memory_map(env, addr, &len, 1);
+    if (!irb || len != sizeof(*irb)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        cc = -EIO;
+        goto out;
+    }
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_tsch(sch, irb);
+        /* 0 - status pending, 1 - not status pending */
+        cc = ret;
+    } else {
+        cc = 3;
+    }
+out:
+    s390_cpu_physical_memory_unmap(env, irb, sizeof(*irb), 1);
+    return cc;
+}
+
+typedef struct ChscReq {
+    uint16_t len;
+    uint16_t command;
+    uint32_t param0;
+    uint32_t param1;
+    uint32_t param2;
+} QEMU_PACKED ChscReq;
+
+typedef struct ChscResp {
+    uint16_t len;
+    uint16_t code;
+    uint32_t param;
+    char data[0];
+} QEMU_PACKED ChscResp;
+
+#define CHSC_MIN_RESP_LEN 0x0008
+
+#define CHSC_SCPD 0x0002
+#define CHSC_SCSC 0x0010
+#define CHSC_SDA  0x0031
+
+#define CHSC_SCPD_0_M 0x20000000
+#define CHSC_SCPD_0_C 0x10000000
+#define CHSC_SCPD_0_FMT 0x0f000000
+#define CHSC_SCPD_0_CSSID 0x00ff0000
+#define CHSC_SCPD_0_RFMT 0x00000f00
+#define CHSC_SCPD_0_RES 0xc000f000
+#define CHSC_SCPD_1_RES 0xffffff00
+#define CHSC_SCPD_01_CHPID 0x000000ff
+static void ioinst_handle_chsc_scpd(ChscReq *req, ChscResp *res)
+{
+    uint16_t len = be16_to_cpu(req->len);
+    uint32_t param0 = be32_to_cpu(req->param0);
+    uint32_t param1 = be32_to_cpu(req->param1);
+    uint16_t resp_code;
+    int rfmt;
+    uint16_t cssid;
+    uint8_t f_chpid, l_chpid;
+    int desc_size;
+    int m;
+
+    rfmt = (param0 & CHSC_SCPD_0_RFMT) >> 8;
+    if ((rfmt == 0) ||  (rfmt == 1)) {
+        rfmt = !!(param0 & CHSC_SCPD_0_C);
+    }
+    if ((len != 0x0010) || (param0 & CHSC_SCPD_0_RES) ||
+        (param1 & CHSC_SCPD_1_RES) || req->param2) {
+        resp_code = 0x0003;
+        goto out_err;
+    }
+    if (param0 & CHSC_SCPD_0_FMT) {
+        resp_code = 0x0007;
+        goto out_err;
+    }
+    cssid = (param0 & CHSC_SCPD_0_CSSID) >> 16;
+    m = param0 & CHSC_SCPD_0_M;
+    if (cssid != 0) {
+        if (!m || !css_present(cssid)) {
+            resp_code = 0x0008;
+            goto out_err;
+        }
+    }
+    f_chpid = param0 & CHSC_SCPD_01_CHPID;
+    l_chpid = param1 & CHSC_SCPD_01_CHPID;
+    if (l_chpid < f_chpid) {
+        resp_code = 0x0003;
+        goto out_err;
+    }
+    /* css_collect_chp_desc() is endian-aware */
+    desc_size = css_collect_chp_desc(m, cssid, f_chpid, l_chpid, rfmt,
+                                     &res->data);
+    res->code = cpu_to_be16(0x0001);
+    res->len = cpu_to_be16(8 + desc_size);
+    res->param = cpu_to_be32(rfmt);
+    return;
+
+  out_err:
+    res->code = cpu_to_be16(resp_code);
+    res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+    res->param = cpu_to_be32(rfmt);
+}
+
+#define CHSC_SCSC_0_M 0x20000000
+#define CHSC_SCSC_0_FMT 0x000f0000
+#define CHSC_SCSC_0_CSSID 0x0000ff00
+#define CHSC_SCSC_0_RES 0xdff000ff
+static void ioinst_handle_chsc_scsc(ChscReq *req, ChscResp *res)
+{
+    uint16_t len = be16_to_cpu(req->len);
+    uint32_t param0 = be32_to_cpu(req->param0);
+    uint8_t cssid;
+    uint16_t resp_code;
+    uint32_t general_chars[510];
+    uint32_t chsc_chars[508];
+
+    if (len != 0x0010) {
+        resp_code = 0x0003;
+        goto out_err;
+    }
+
+    if (param0 & CHSC_SCSC_0_FMT) {
+        resp_code = 0x0007;
+        goto out_err;
+    }
+    cssid = (param0 & CHSC_SCSC_0_CSSID) >> 8;
+    if (cssid != 0) {
+        if (!(param0 & CHSC_SCSC_0_M) || !css_present(cssid)) {
+            resp_code = 0x0008;
+            goto out_err;
+        }
+    }
+    if ((param0 & CHSC_SCSC_0_RES) || req->param1 || req->param2) {
+        resp_code = 0x0003;
+        goto out_err;
+    }
+    res->code = cpu_to_be16(0x0001);
+    res->len = cpu_to_be16(4080);
+    res->param = 0;
+
+    memset(general_chars, 0, sizeof(general_chars));
+    memset(chsc_chars, 0, sizeof(chsc_chars));
+
+    general_chars[0] = cpu_to_be32(0x03000000);
+    general_chars[1] = cpu_to_be32(0x00059000);
+
+    chsc_chars[0] = cpu_to_be32(0x40000000);
+    chsc_chars[3] = cpu_to_be32(0x00040000);
+
+    memcpy(res->data, general_chars, sizeof(general_chars));
+    memcpy(res->data + sizeof(general_chars), chsc_chars, sizeof(chsc_chars));
+    return;
+
+  out_err:
+    res->code = cpu_to_be16(resp_code);
+    res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+    res->param = 0;
+}
+
+#define CHSC_SDA_0_FMT 0x0f000000
+#define CHSC_SDA_0_OC 0x0000ffff
+#define CHSC_SDA_0_RES 0xf0ff0000
+#define CHSC_SDA_OC_MCSSE 0x0
+#define CHSC_SDA_OC_MSS 0x2
+static void ioinst_handle_chsc_sda(ChscReq *req, ChscResp *res)
+{
+    uint16_t resp_code = 0x0001;
+    uint16_t len = be16_to_cpu(req->len);
+    uint32_t param0 = be32_to_cpu(req->param0);
+    uint16_t oc;
+    int ret;
+
+    if ((len != 0x0400) || (param0 & CHSC_SDA_0_RES)) {
+        resp_code = 0x0003;
+        goto out;
+    }
+
+    if (param0 & CHSC_SDA_0_FMT) {
+        resp_code = 0x0007;
+        goto out;
+    }
+
+    oc = param0 & CHSC_SDA_0_OC;
+    switch (oc) {
+    case CHSC_SDA_OC_MCSSE:
+        ret = css_enable_mcsse();
+        if (ret == -EINVAL) {
+            resp_code = 0x0101;
+            goto out;
+        }
+        break;
+    case CHSC_SDA_OC_MSS:
+        ret = css_enable_mss();
+        if (ret == -EINVAL) {
+            resp_code = 0x0101;
+            goto out;
+        }
+        break;
+    default:
+        resp_code = 0x0003;
+        goto out;
+    }
+
+out:
+    res->code = cpu_to_be16(resp_code);
+    res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+    res->param = 0;
+}
+
+static void ioinst_handle_chsc_unimplemented(ChscResp *res)
+{
+    res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+    res->code = cpu_to_be16(0x0004);
+    res->param = 0;
+}
+
+int ioinst_handle_chsc(CPUS390XState *env, uint32_t ipb)
+{
+    ChscReq *req;
+    ChscResp *res;
+    uint64_t addr;
+    int reg;
+    uint16_t len;
+    uint16_t command;
+    hwaddr map_size = TARGET_PAGE_SIZE;
+    int ret = 0;
+
+    trace_ioinst("chsc");
+    reg = (ipb >> 20) & 0x00f;
+    addr = env->regs[reg];
+    /* Page boundary? */
+    if (addr & 0xfff) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        return -EIO;
+    }
+    req = s390_cpu_physical_memory_map(env, addr, &map_size, 1);
+    if (!req || map_size != TARGET_PAGE_SIZE) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        ret = -EIO;
+        goto out;
+    }
+    len = be16_to_cpu(req->len);
+    /* Length field valid? */
+    if ((len < 16) || (len > 4088) || (len & 7)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        ret = -EIO;
+        goto out;
+    }
+    memset((char *)req + len, 0, TARGET_PAGE_SIZE - len);
+    res = (void *)((char *)req + len);
+    command = be16_to_cpu(req->command);
+    trace_ioinst_chsc_cmd(command, len);
+    switch (command) {
+    case CHSC_SCSC:
+        ioinst_handle_chsc_scsc(req, res);
+        break;
+    case CHSC_SCPD:
+        ioinst_handle_chsc_scpd(req, res);
+        break;
+    case CHSC_SDA:
+        ioinst_handle_chsc_sda(req, res);
+        break;
+    default:
+        ioinst_handle_chsc_unimplemented(res);
+        break;
+    }
+
+out:
+    s390_cpu_physical_memory_unmap(env, req, map_size, 1);
+    return ret;
+}
+
+int ioinst_handle_tpi(CPUS390XState *env, uint32_t ipb)
+{
+    uint64_t addr;
+    int lowcore;
+    IOIntCode *int_code;
+    hwaddr len, orig_len;
+    int ret;
+
+    trace_ioinst("tpi");
+    addr = decode_basedisp_s(env, ipb);
+    lowcore = addr ? 0 : 1;
+    len = lowcore ? 8 /* two words */ : 12 /* three words */;
+    orig_len = len;
+    int_code = s390_cpu_physical_memory_map(env, addr, &len, 1);
+    if (!int_code || (len != orig_len)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        ret = -EIO;
+        goto out;
+    }
+    ret = css_do_tpi(int_code, lowcore);
+out:
+    s390_cpu_physical_memory_unmap(env, int_code, len, 1);
+    return ret;
+}
+
+#define SCHM_REG1_RES(_reg) (_reg & 0x000000000ffffffc)
+#define SCHM_REG1_MBK(_reg) ((_reg & 0x00000000f0000000) >> 28)
+#define SCHM_REG1_UPD(_reg) ((_reg & 0x0000000000000002) >> 1)
+#define SCHM_REG1_DCT(_reg) (_reg & 0x0000000000000001)
+
+int ioinst_handle_schm(CPUS390XState *env, uint64_t reg1, uint64_t reg2,
+                       uint32_t ipb)
+{
+    uint8_t mbk;
+    int update;
+    int dct;
+
+    trace_ioinst("schm");
+
+    if (SCHM_REG1_RES(reg1)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+
+    mbk = SCHM_REG1_MBK(reg1);
+    update = SCHM_REG1_UPD(reg1);
+    dct = SCHM_REG1_DCT(reg1);
+
+    if (update && (reg2 & 0x0000000000000fff)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+
+    css_do_schm(mbk, update, dct, update ? reg2 : 0);
+
+    return 0;
+}
+
+int ioinst_handle_rsch(CPUS390XState *env, uint64_t reg1)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    int ret = -ENODEV;
+    int cc;
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("rsch", cssid, ssid, schid);
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_rsch(sch);
+    }
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EINVAL:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        cc = 1;
+        break;
+    }
+
+    return cc;
+
+}
+
+#define RCHP_REG1_RES(_reg) (_reg & 0x00000000ff00ff00)
+#define RCHP_REG1_CSSID(_reg) ((_reg & 0x0000000000ff0000) >> 16)
+#define RCHP_REG1_CHPID(_reg) (_reg & 0x00000000000000ff)
+int ioinst_handle_rchp(CPUS390XState *env, uint64_t reg1)
+{
+    int cc;
+    uint8_t cssid;
+    uint8_t chpid;
+    int ret;
+
+    if (RCHP_REG1_RES(reg1)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+
+    cssid = RCHP_REG1_CSSID(reg1);
+    chpid = RCHP_REG1_CHPID(reg1);
+
+    trace_ioinst_chp_id("rchp", cssid, chpid);
+
+    ret = css_do_rchp(cssid, chpid);
+
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EBUSY:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        /* Invalid channel subsystem. */
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+
+    return cc;
+}
+
+#define SAL_REG1_INVALID(_reg) (_reg & 0x0000000080000000)
+int ioinst_handle_sal(CPUS390XState *env, uint64_t reg1)
+{
+    /* We do not provide address limit checking, so let's suppress it. */
+    if (SAL_REG1_INVALID(reg1) || reg1 & 0x000000000000ffff) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    return 0;
+}
diff --git a/target-s390x/ioinst.h b/target-s390x/ioinst.h
new file mode 100644
index 0000000..d5a43f4
--- /dev/null
+++ b/target-s390x/ioinst.h
@@ -0,0 +1,230 @@
+/*
+ * S/390 channel I/O instructions
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+*/
+
+#ifndef IOINST_S390X_H
+#define IOINST_S390X_H
+/*
+ * Channel I/O related definitions, as defined in the Principles
+ * Of Operation (and taken from the Linux implementation).
+ */
+
+/* subchannel status word (command mode only) */
+typedef struct SCSW {
+    uint16_t flags;
+    uint16_t ctrl;
+    uint32_t cpa;
+    uint8_t dstat;
+    uint8_t cstat;
+    uint16_t count;
+} QEMU_PACKED SCSW;
+
+#define SCSW_FLAGS_MASK_KEY 0xf000
+#define SCSW_FLAGS_MASK_SCTL 0x0800
+#define SCSW_FLAGS_MASK_ESWF 0x0400
+#define SCSW_FLAGS_MASK_CC 0x0300
+#define SCSW_FLAGS_MASK_FMT 0x0080
+#define SCSW_FLAGS_MASK_PFCH 0x0040
+#define SCSW_FLAGS_MASK_ISIC 0x0020
+#define SCSW_FLAGS_MASK_ALCC 0x0010
+#define SCSW_FLAGS_MASK_SSI 0x0008
+#define SCSW_FLAGS_MASK_ZCC 0x0004
+#define SCSW_FLAGS_MASK_ECTL 0x0002
+#define SCSW_FLAGS_MASK_PNO 0x0001
+
+#define SCSW_CTRL_MASK_FCTL 0x7000
+#define SCSW_CTRL_MASK_ACTL 0x0fe0
+#define SCSW_CTRL_MASK_STCTL 0x001f
+
+#define SCSW_FCTL_CLEAR_FUNC 0x1000
+#define SCSW_FCTL_HALT_FUNC 0x2000
+#define SCSW_FCTL_START_FUNC 0x4000
+
+#define SCSW_ACTL_SUSP 0x0020
+#define SCSW_ACTL_DEVICE_ACTIVE 0x0040
+#define SCSW_ACTL_SUBCH_ACTIVE 0x0080
+#define SCSW_ACTL_CLEAR_PEND 0x0100
+#define SCSW_ACTL_HALT_PEND  0x0200
+#define SCSW_ACTL_START_PEND 0x0400
+#define SCSW_ACTL_RESUME_PEND 0x0800
+
+#define SCSW_STCTL_STATUS_PEND 0x0001
+#define SCSW_STCTL_SECONDARY 0x0002
+#define SCSW_STCTL_PRIMARY 0x0004
+#define SCSW_STCTL_INTERMEDIATE 0x0008
+#define SCSW_STCTL_ALERT 0x0010
+
+#define SCSW_DSTAT_ATTENTION     0x80
+#define SCSW_DSTAT_STAT_MOD      0x40
+#define SCSW_DSTAT_CU_END        0x20
+#define SCSW_DSTAT_BUSY          0x10
+#define SCSW_DSTAT_CHANNEL_END   0x08
+#define SCSW_DSTAT_DEVICE_END    0x04
+#define SCSW_DSTAT_UNIT_CHECK    0x02
+#define SCSW_DSTAT_UNIT_EXCEP    0x01
+
+#define SCSW_CSTAT_PCI           0x80
+#define SCSW_CSTAT_INCORR_LEN    0x40
+#define SCSW_CSTAT_PROG_CHECK    0x20
+#define SCSW_CSTAT_PROT_CHECK    0x10
+#define SCSW_CSTAT_DATA_CHECK    0x08
+#define SCSW_CSTAT_CHN_CTRL_CHK  0x04
+#define SCSW_CSTAT_INTF_CTRL_CHK 0x02
+#define SCSW_CSTAT_CHAIN_CHECK   0x01
+
+/* path management control word */
+typedef struct PMCW {
+    uint32_t intparm;
+    uint16_t flags;
+    uint16_t devno;
+    uint8_t  lpm;
+    uint8_t  pnom;
+    uint8_t  lpum;
+    uint8_t  pim;
+    uint16_t mbi;
+    uint8_t  pom;
+    uint8_t  pam;
+    uint8_t  chpid[8];
+    uint32_t chars;
+} QEMU_PACKED PMCW;
+
+#define PMCW_FLAGS_MASK_QF 0x8000
+#define PMCW_FLAGS_MASK_W 0x4000
+#define PMCW_FLAGS_MASK_ISC 0x3800
+#define PMCW_FLAGS_MASK_ENA 0x0080
+#define PMCW_FLAGS_MASK_LM 0x0060
+#define PMCW_FLAGS_MASK_MME 0x0018
+#define PMCW_FLAGS_MASK_MP 0x0004
+#define PMCW_FLAGS_MASK_TF 0x0002
+#define PMCW_FLAGS_MASK_DNV 0x0001
+#define PMCW_FLAGS_MASK_INVALID 0x0700
+
+#define PMCW_CHARS_MASK_ST 0x00e00000
+#define PMCW_CHARS_MASK_MBFC 0x00000004
+#define PMCW_CHARS_MASK_XMWME 0x00000002
+#define PMCW_CHARS_MASK_CSENSE 0x00000001
+#define PMCW_CHARS_MASK_INVALID 0xff1ffff8
+
+/* subchannel information block */
+typedef struct SCHIB {
+    PMCW pmcw;
+    SCSW scsw;
+    uint64_t mba;
+    uint8_t mda[4];
+} QEMU_PACKED SCHIB;
+
+/* interruption response block */
+typedef struct IRB {
+    SCSW scsw;
+    uint32_t esw[5];
+    uint32_t ecw[8];
+    uint32_t emw[8];
+} QEMU_PACKED IRB;
+
+/* operation request block */
+typedef struct ORB {
+    uint32_t intparm;
+    uint16_t ctrl0;
+    uint8_t lpm;
+    uint8_t ctrl1;
+    uint32_t cpa;
+} QEMU_PACKED ORB;
+
+#define ORB_CTRL0_MASK_KEY 0xf000
+#define ORB_CTRL0_MASK_SPND 0x0800
+#define ORB_CTRL0_MASK_STR 0x0400
+#define ORB_CTRL0_MASK_MOD 0x0200
+#define ORB_CTRL0_MASK_SYNC 0x0100
+#define ORB_CTRL0_MASK_FMT 0x0080
+#define ORB_CTRL0_MASK_PFCH 0x0040
+#define ORB_CTRL0_MASK_ISIC 0x0020
+#define ORB_CTRL0_MASK_ALCC 0x0010
+#define ORB_CTRL0_MASK_SSIC 0x0008
+#define ORB_CTRL0_MASK_C64 0x0002
+#define ORB_CTRL0_MASK_I2K 0x0001
+#define ORB_CTRL0_MASK_INVALID 0x0004
+
+#define ORB_CTRL1_MASK_ILS 0x80
+#define ORB_CTRL1_MASK_MIDAW 0x40
+#define ORB_CTRL1_MASK_ORBX 0x01
+#define ORB_CTRL1_MASK_INVALID 0x3e
+
+/* channel command word (type 1) */
+typedef struct CCW1 {
+    uint8_t cmd_code;
+    uint8_t flags;
+    uint16_t count;
+    uint32_t cda;
+} QEMU_PACKED CCW1;
+
+#define CCW_FLAG_DC              0x80
+#define CCW_FLAG_CC              0x40
+#define CCW_FLAG_SLI             0x20
+#define CCW_FLAG_SKIP            0x10
+#define CCW_FLAG_PCI             0x08
+#define CCW_FLAG_IDA             0x04
+#define CCW_FLAG_SUSPEND         0x02
+
+#define CCW_CMD_NOOP             0x03
+#define CCW_CMD_BASIC_SENSE      0x04
+#define CCW_CMD_TIC              0x08
+#define CCW_CMD_SENSE_ID         0xe4
+
+typedef struct CRW {
+    uint16_t flags;
+    uint16_t rsid;
+} QEMU_PACKED CRW;
+
+#define CRW_FLAGS_MASK_S 0x4000
+#define CRW_FLAGS_MASK_R 0x2000
+#define CRW_FLAGS_MASK_C 0x1000
+#define CRW_FLAGS_MASK_RSC 0x0f00
+#define CRW_FLAGS_MASK_A 0x0080
+#define CRW_FLAGS_MASK_ERC 0x003f
+
+#define CRW_ERC_INIT 0x02
+#define CRW_ERC_IPI  0x04
+
+#define CRW_RSC_SUBCH 0x3
+#define CRW_RSC_CHP   0x4
+
+/* I/O interruption code */
+typedef struct IOIntCode {
+    uint32_t subsys_id;
+    uint32_t intparm;
+    uint32_t interrupt_id;
+} QEMU_PACKED IOIntCode;
+
+/* schid disintegration */
+#define IOINST_SCHID_ONE(_schid)   ((_schid & 0x00010000) >> 16)
+#define IOINST_SCHID_M(_schid)     ((_schid & 0x00080000) >> 19)
+#define IOINST_SCHID_CSSID(_schid) ((_schid & 0xff000000) >> 24)
+#define IOINST_SCHID_SSID(_schid)  ((_schid & 0x00060000) >> 17)
+#define IOINST_SCHID_NR(_schid)    (_schid & 0x0000ffff)
+
+int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid,
+                                 int *schid);
+int ioinst_handle_xsch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_csch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_hsch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_msch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_ssch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_stcrw(CPUS390XState *env, uint32_t ipb);
+int ioinst_handle_stsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_tsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_chsc(CPUS390XState *env, uint32_t ipb);
+int ioinst_handle_tpi(CPUS390XState *env, uint32_t ipb);
+int ioinst_handle_schm(CPUS390XState *env, uint64_t reg1, uint64_t reg2,
+                       uint32_t ipb);
+int ioinst_handle_rsch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_rchp(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_sal(CPUS390XState *env, uint64_t reg1);
+
+#endif
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index add6a58..2c24182 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -47,9 +47,29 @@
 
 #define IPA0_DIAG                       0x8300
 #define IPA0_SIGP                       0xae00
-#define IPA0_PRIV                       0xb200
+#define IPA0_B2                         0xb200
+#define IPA0_B9                         0xb900
+#define IPA0_EB                         0xeb00
 
 #define PRIV_SCLP_CALL                  0x20
+#define PRIV_CSCH                       0x30
+#define PRIV_HSCH                       0x31
+#define PRIV_MSCH                       0x32
+#define PRIV_SSCH                       0x33
+#define PRIV_STSCH                      0x34
+#define PRIV_TSCH                       0x35
+#define PRIV_TPI                        0x36
+#define PRIV_SAL                        0x37
+#define PRIV_RSCH                       0x38
+#define PRIV_STCRW                      0x39
+#define PRIV_STCPS                      0x3a
+#define PRIV_RCHP                       0x3b
+#define PRIV_SCHM                       0x3c
+#define PRIV_CHSC                       0x5f
+#define PRIV_SIGA                       0x74
+#define PRIV_XSCH                       0x76
+#define PRIV_SQBS                       0x8a
+#define PRIV_EQBS                       0x9c
 #define DIAG_KVM_HYPERCALL              0x500
 #define DIAG_KVM_BREAKPOINT             0x501
 
@@ -76,6 +96,11 @@ int kvm_arch_init(KVMState *s)
     return 0;
 }
 
+unsigned long kvm_arch_vcpu_id(CPUState *cpu)
+{
+    return cpu->cpu_index;
+}
+
 int kvm_arch_init_vcpu(CPUState *cpu)
 {
     int ret = 0;
@@ -375,10 +400,123 @@ static int kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run,
     return 0;
 }
 
-static int handle_priv(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
+static int kvm_handle_css_inst(S390CPU *cpu, struct kvm_run *run,
+                               uint8_t ipa0, uint8_t ipa1, uint8_t ipb)
+{
+    int r = 0;
+    int no_cc = 0;
+    CPUS390XState *env = &cpu->env;
+
+    if (ipa0 != 0xb2) {
+        /* Not handled for now. */
+        return -1;
+    }
+    cpu_synchronize_state(env);
+    switch (ipa1) {
+    case PRIV_XSCH:
+        r = ioinst_handle_xsch(env, env->regs[1]);
+        break;
+    case PRIV_CSCH:
+        r = ioinst_handle_csch(env, env->regs[1]);
+        break;
+    case PRIV_HSCH:
+        r = ioinst_handle_hsch(env, env->regs[1]);
+        break;
+    case PRIV_MSCH:
+        r = ioinst_handle_msch(env, env->regs[1], run->s390_sieic.ipb);
+        break;
+    case PRIV_SSCH:
+        r = ioinst_handle_ssch(env, env->regs[1], run->s390_sieic.ipb);
+        break;
+    case PRIV_STCRW:
+        r = ioinst_handle_stcrw(env, run->s390_sieic.ipb);
+        break;
+    case PRIV_STSCH:
+        r = ioinst_handle_stsch(env, env->regs[1], run->s390_sieic.ipb);
+        break;
+    case PRIV_TSCH:
+        /* We should only get tsch via KVM_EXIT_S390_TSCH. */
+        fprintf(stderr, "Spurious tsch intercept\n");
+        break;
+    case PRIV_CHSC:
+        r = ioinst_handle_chsc(env, run->s390_sieic.ipb);
+        break;
+    case PRIV_TPI:
+        /* This should have been handled by kvm already. */
+        fprintf(stderr, "Spurious tpi intercept\n");
+        break;
+    case PRIV_SCHM:
+        no_cc = 1;
+        r = ioinst_handle_schm(env, env->regs[1], env->regs[2],
+                               run->s390_sieic.ipb);
+        break;
+    case PRIV_RSCH:
+        r = ioinst_handle_rsch(env, env->regs[1]);
+        break;
+    case PRIV_RCHP:
+        r = ioinst_handle_rchp(env, env->regs[1]);
+        break;
+    case PRIV_STCPS:
+        /* We do not provide this instruction, it is suppressed. */
+        no_cc = 1;
+        r = 0;
+        break;
+    case PRIV_SAL:
+        no_cc = 1;
+        r = ioinst_handle_sal(env, env->regs[1]);
+        break;
+    default:
+        r = -1;
+        break;
+    }
+
+    if (r >= 0) {
+        if (!no_cc) {
+            setcc(cpu, r);
+        }
+        r = 0;
+    } else if (r < -1) {
+        r = 0;
+    }
+    return r;
+}
+
+static int is_ioinst(uint8_t ipa0, uint8_t ipa1, uint8_t ipb)
+{
+    int ret = 0;
+    uint16_t ipa = (ipa0 << 8) | ipa1;
+
+    switch (ipa) {
+    case IPA0_B2 | PRIV_CSCH:
+    case IPA0_B2 | PRIV_HSCH:
+    case IPA0_B2 | PRIV_MSCH:
+    case IPA0_B2 | PRIV_SSCH:
+    case IPA0_B2 | PRIV_STSCH:
+    case IPA0_B2 | PRIV_TPI:
+    case IPA0_B2 | PRIV_SAL:
+    case IPA0_B2 | PRIV_RSCH:
+    case IPA0_B2 | PRIV_STCRW:
+    case IPA0_B2 | PRIV_STCPS:
+    case IPA0_B2 | PRIV_RCHP:
+    case IPA0_B2 | PRIV_SCHM:
+    case IPA0_B2 | PRIV_CHSC:
+    case IPA0_B2 | PRIV_SIGA:
+    case IPA0_B2 | PRIV_XSCH:
+    case IPA0_B9 | PRIV_EQBS:
+    case IPA0_EB | PRIV_SQBS:
+        ret = 1;
+        break;
+    }
+
+    return ret;
+}
+
+static int handle_priv(S390CPU *cpu, struct kvm_run *run,
+                       uint8_t ipa0, uint8_t ipa1)
 {
     int r = 0;
     uint16_t ipbh0 = (run->s390_sieic.ipb & 0xffff0000) >> 16;
+    uint8_t ipb = run->s390_sieic.ipb & 0xff;
 
     dprintf("KVM: PRIV: %d\n", ipa1);
     switch (ipa1) {
@@ -386,8 +524,16 @@ static int handle_priv(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
             r = kvm_sclp_service_call(cpu, run, ipbh0);
             break;
         default:
-            dprintf("KVM: unknown PRIV: 0x%x\n", ipa1);
-            r = -1;
+            if (is_ioinst(ipa0, ipa1, ipb)) {
+                r = kvm_handle_css_inst(cpu, run, ipa0, ipa1, ipb);
+                if (r == -1) {
+                    setcc(cpu, 3);
+                    r = 0;
+                }
+            } else {
+                dprintf("KVM: unknown PRIV: 0x%x\n", ipa1);
+                r = -1;
+            }
             break;
     }
 
@@ -528,15 +674,17 @@ static int handle_instruction(S390CPU *cpu, struct kvm_run *run)
 
     dprintf("handle_instruction 0x%x 0x%x\n", run->s390_sieic.ipa, run->s390_sieic.ipb);
     switch (ipa0) {
-        case IPA0_PRIV:
-            r = handle_priv(cpu, run, ipa1);
-            break;
-        case IPA0_DIAG:
-            r = handle_diag(env, run, ipb_code);
-            break;
-        case IPA0_SIGP:
-            r = handle_sigp(cpu, run, ipa1);
-            break;
+    case IPA0_B2:
+    case IPA0_B9:
+    case IPA0_EB:
+        r = handle_priv(cpu, run, ipa0 >> 8, ipa1);
+        break;
+    case IPA0_DIAG:
+        r = handle_diag(env, run, ipb_code);
+        break;
+    case IPA0_SIGP:
+        r = handle_sigp(cpu, run, ipa1);
+        break;
     }
 
     if (r < 0) {
@@ -595,6 +743,43 @@ static int handle_intercept(S390CPU *cpu)
     return r;
 }
 
+static int handle_tsch(S390CPU *cpu)
+{
+    CPUS390XState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
+    struct kvm_run *run = cs->kvm_run;
+    int ret;
+
+    cpu_synchronize_state(env);
+    ret = ioinst_handle_tsch(env, env->regs[1], run->s390_tsch.ipb);
+    if (ret >= 0) {
+        /* Success; set condition code. */
+        setcc(cpu, ret);
+        ret = 0;
+    } else if (ret < -1) {
+        /*
+         * Failure.
+         * If an I/O interrupt had been dequeued, we have to reinject it.
+         */
+        if (run->s390_tsch.dequeued) {
+            uint16_t subchannel_id = run->s390_tsch.subchannel_id;
+            uint16_t subchannel_nr = run->s390_tsch.subchannel_nr;
+            uint32_t io_int_parm = run->s390_tsch.io_int_parm;
+            uint32_t io_int_word = run->s390_tsch.io_int_word;
+            uint32_t type = ((subchannel_id & 0xff00) << 24) |
+                ((subchannel_id & 0x00060) << 22) | (subchannel_nr << 16);
+
+            kvm_s390_interrupt_internal(cpu, type,
+                                        ((uint32_t)subchannel_id << 16)
+                                        | subchannel_nr,
+                                        ((uint64_t)io_int_parm << 32)
+                                        | io_int_word, 1);
+        }
+        ret = 0;
+    }
+    return ret;
+}
+
 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
     S390CPU *cpu = S390_CPU(cs);
@@ -607,6 +792,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         case KVM_EXIT_S390_RESET:
             qemu_system_reset_request();
             break;
+        case KVM_EXIT_S390_TSCH:
+            ret = handle_tsch(cpu);
+            break;
         default:
             fprintf(stderr, "Unknown KVM exit: %d\n", run->exit_reason);
             break;
@@ -632,3 +820,33 @@ int kvm_arch_on_sigbus(int code, void *addr)
 {
     return 1;
 }
+
+void kvm_s390_io_interrupt(S390CPU *cpu, uint16_t subchannel_id,
+                           uint16_t subchannel_nr, uint32_t io_int_parm,
+                           uint32_t io_int_word)
+{
+    uint32_t type;
+
+    type = ((subchannel_id & 0xff00) << 24) |
+        ((subchannel_id & 0x00060) << 22) | (subchannel_nr << 16);
+    kvm_s390_interrupt_internal(cpu, type,
+                                ((uint32_t)subchannel_id << 16) | subchannel_nr,
+                                ((uint64_t)io_int_parm << 32) | io_int_word, 1);
+}
+
+void kvm_s390_crw_mchk(S390CPU *cpu)
+{
+    kvm_s390_interrupt_internal(cpu, KVM_S390_MCHK, 1 << 28,
+                                0x00400f1d40330000, 1);
+}
+
+void kvm_s390_enable_css_support(S390CPU *cpu)
+{
+    struct kvm_enable_cap cap = {};
+    int r;
+
+    /* Activate host kernel channel subsystem support. */
+    cap.cap = KVM_CAP_S390_CSS_SUPPORT;
+    r = kvm_vcpu_ioctl(CPU(cpu), KVM_ENABLE_CAP, &cap);
+    assert(r == 0);
+}
diff --git a/target-unicore32/cpu.c b/target-unicore32/cpu.c
index 884c101..c120440 100644
--- a/target-unicore32/cpu.c
+++ b/target-unicore32/cpu.c
@@ -22,6 +22,22 @@ static inline void set_feature(CPUUniCore32State *env, int feature)
 
 /* CPU models */
 
+static ObjectClass *uc32_cpu_class_by_name(const char *cpu_model)
+{
+    ObjectClass *oc;
+
+    if (cpu_model == NULL) {
+        return NULL;
+    }
+
+    oc = object_class_by_name(cpu_model);
+    if (oc != NULL && (!object_class_dynamic_cast(oc, TYPE_UNICORE32_CPU) ||
+                       object_class_is_abstract(oc))) {
+        oc = NULL;
+    }
+    return oc;
+}
+
 typedef struct UniCore32CPUInfo {
     const char *name;
     void (*instance_init)(Object *obj);
@@ -80,6 +96,13 @@ static void uc32_cpu_initfn(Object *obj)
     tlb_flush(env, 1);
 }
 
+static void uc32_cpu_class_init(ObjectClass *oc, void *data)
+{
+    CPUClass *cc = CPU_CLASS(oc);
+
+    cc->class_by_name = uc32_cpu_class_by_name;
+}
+
 static void uc32_register_cpu_type(const UniCore32CPUInfo *info)
 {
     TypeInfo type_info = {
@@ -88,7 +111,7 @@ static void uc32_register_cpu_type(const UniCore32CPUInfo *info)
         .instance_init = info->instance_init,
     };
 
-    type_register_static(&type_info);
+    type_register(&type_info);
 }
 
 static const TypeInfo uc32_cpu_type_info = {
@@ -98,6 +121,7 @@ static const TypeInfo uc32_cpu_type_info = {
     .instance_init = uc32_cpu_initfn,
     .abstract = true,
     .class_size = sizeof(UniCore32CPUClass),
+    .class_init = uc32_cpu_class_init,
 };
 
 static void uc32_cpu_register_types(void)
diff --git a/target-unicore32/helper.c b/target-unicore32/helper.c
index 5359538..183b5b3 100644
--- a/target-unicore32/helper.c
+++ b/target-unicore32/helper.c
@@ -29,12 +29,14 @@ CPUUniCore32State *uc32_cpu_init(const char *cpu_model)
 {
     UniCore32CPU *cpu;
     CPUUniCore32State *env;
+    ObjectClass *oc;
     static int inited = 1;
 
-    if (object_class_by_name(cpu_model) == NULL) {
+    oc = cpu_class_by_name(TYPE_UNICORE32_CPU, cpu_model);
+    if (oc == NULL) {
         return NULL;
     }
-    cpu = UNICORE32_CPU(object_new(cpu_model));
+    cpu = UNICORE32_CPU(object_new(object_class_get_name(oc)));
     env = &cpu->env;
 
     if (inited) {
diff --git a/tests/.gitignore b/tests/.gitignore
index f9041f3..38c94ef 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -10,4 +10,5 @@ test-qmp-commands.h
 test-qmp-commands
 test-qmp-input-strict
 test-qmp-marshal.c
+test-x86-cpuid
 *-test
diff --git a/tests/Makefile b/tests/Makefile
index 442b286..c681ceb 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -45,6 +45,11 @@ gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
 gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
 check-unit-y += tests/test-thread-pool$(EXESUF)
 gcov-files-test-thread-pool-y = thread-pool.c
+gcov-files-test-hbitmap-y = util/hbitmap.c
+check-unit-y += tests/test-hbitmap$(EXESUF)
+check-unit-y += tests/test-x86-cpuid$(EXESUF)
+# all code tested by test-x86-cpuid is inside topology.h
+gcov-files-test-x86-cpuid-y =
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -72,12 +77,15 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \
 	tests/test-coroutine.o tests/test-string-output-visitor.o \
 	tests/test-string-input-visitor.o tests/test-qmp-output-visitor.o \
 	tests/test-qmp-input-visitor.o tests/test-qmp-input-strict.o \
-	tests/test-qmp-commands.o tests/test-visitor-serialization.o
+	tests/test-qmp-commands.o tests/test-visitor-serialization.o \
+	tests/test-x86-cpuid.o
 
 test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o
 
 $(test-obj-y): QEMU_INCLUDES += -Itests
 
+tests/test-x86-cpuid.o: QEMU_INCLUDES += -I$(SRC_PATH)/target-i386
+
 tests/check-qint$(EXESUF): tests/check-qint.o libqemuutil.a
 tests/check-qstring$(EXESUF): tests/check-qstring.o libqemuutil.a
 tests/check-qdict$(EXESUF): tests/check-qdict.o libqemuutil.a
@@ -88,6 +96,8 @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil
 tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a
+tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o libqemuutil.a libqemustub.a
+tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
 $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index c6eb851..b040820 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -207,6 +207,37 @@ class TestSingleDrive(ImageMirroringTestCase):
         self.assertTrue(self.compare_images(test_img, target_img),
                         'target image does not match source after mirroring')
 
+    def test_small_buffer(self):
+        self.assert_no_active_mirrors()
+
+        # A small buffer is rounded up automatically
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             buf_size=4096, target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
+    def test_small_buffer2(self):
+        self.assert_no_active_mirrors()
+
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,size=%d'
+                        % (TestSingleDrive.image_len, TestSingleDrive.image_len), target_img)
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             buf_size=65536, mode='existing', target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
     def test_large_cluster(self):
         self.assert_no_active_mirrors()
 
@@ -292,6 +323,27 @@ class TestMirrorNoBacking(ImageMirroringTestCase):
         self.assertTrue(self.compare_images(test_img, target_img),
                         'target image does not match source after mirroring')
 
+    def test_large_cluster(self):
+        self.assert_no_active_mirrors()
+
+        # qemu-img create fails if the image is not there
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'size=%d'
+                        %(TestMirrorNoBacking.image_len), target_backing_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s'
+                        % (TestMirrorNoBacking.image_len, target_backing_img), target_img)
+        os.remove(target_backing_img)
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             mode='existing', target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
 class TestReadErrors(ImageMirroringTestCase):
     image_len = 2 * 1024 * 1024 # MB
 
@@ -330,6 +382,9 @@ new_state = "1"
                  '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw'
                        % (self.blkdebug_file, backing_img),
                  test_img)
+        # Write something for tests that use sync='top'
+        qemu_io('-c', 'write %d 512' % (self.MIRROR_GRANULARITY + 65536),
+                        test_img)
         self.vm = iotests.VM().add_drive(test_img)
         self.vm.launch()
 
@@ -383,6 +438,32 @@ new_state = "1"
         self.complete_and_wait()
         self.vm.shutdown()
 
+    def test_large_cluster(self):
+        self.assert_no_active_mirrors()
+
+        # Test COW into the target image.  The first half of the
+        # cluster at MIRROR_GRANULARITY has to be copied from
+        # backing_img, even though sync='top'.
+        qemu_img('create', '-f', iotests.imgfmt, '-ocluster_size=131072,backing_file=%s' %(backing_img), target_img)
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='top',
+                             on_source_error='ignore',
+                             mode='existing', target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        event = self.vm.get_qmp_event(wait=True)
+        self.assertEquals(event['event'], 'BLOCK_JOB_ERROR')
+        self.assert_qmp(event, 'data/device', 'drive0')
+        self.assert_qmp(event, 'data/operation', 'read')
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/paused', False)
+        self.complete_and_wait()
+        self.vm.shutdown()
+
+        # Detach blkdebug to compare images successfully
+        qemu_img('rebase', '-f', iotests.imgfmt, '-u', '-b', backing_img, test_img)
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
     def test_stop_read(self):
         self.assert_no_active_mirrors()
 
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index 71009c2..84bfd63 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-..................
+......................
 ----------------------------------------------------------------------
-Ran 18 tests
+Ran 22 tests
 
 OK
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
new file mode 100644
index 0000000..8c902f2
--- /dev/null
+++ b/tests/test-hbitmap.c
@@ -0,0 +1,401 @@
+/*
+ * Hierarchical bitmap unit-tests.
+ *
+ * Copyright (C) 2012 Red Hat Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <glib.h>
+#include <stdarg.h>
+#include "qemu/hbitmap.h"
+
+#define LOG_BITS_PER_LONG          (BITS_PER_LONG == 32 ? 5 : 6)
+
+#define L1                         BITS_PER_LONG
+#define L2                         (BITS_PER_LONG * L1)
+#define L3                         (BITS_PER_LONG * L2)
+
+typedef struct TestHBitmapData {
+    HBitmap       *hb;
+    unsigned long *bits;
+    size_t         size;
+    int            granularity;
+} TestHBitmapData;
+
+
+/* Check that the HBitmap and the shadow bitmap contain the same data,
+ * ignoring the same "first" bits.
+ */
+static void hbitmap_test_check(TestHBitmapData *data,
+                               uint64_t first)
+{
+    uint64_t count = 0;
+    size_t pos;
+    int bit;
+    HBitmapIter hbi;
+    int64_t i, next;
+
+    hbitmap_iter_init(&hbi, data->hb, first);
+
+    i = first;
+    for (;;) {
+        next = hbitmap_iter_next(&hbi);
+        if (next < 0) {
+            next = data->size;
+        }
+
+        while (i < next) {
+            pos = i >> LOG_BITS_PER_LONG;
+            bit = i & (BITS_PER_LONG - 1);
+            i++;
+            g_assert_cmpint(data->bits[pos] & (1UL << bit), ==, 0);
+        }
+
+        if (next == data->size) {
+            break;
+        }
+
+        pos = i >> LOG_BITS_PER_LONG;
+        bit = i & (BITS_PER_LONG - 1);
+        i++;
+        count++;
+        g_assert_cmpint(data->bits[pos] & (1UL << bit), !=, 0);
+    }
+
+    if (first == 0) {
+        g_assert_cmpint(count << data->granularity, ==, hbitmap_count(data->hb));
+    }
+}
+
+/* This is provided instead of a test setup function so that the sizes
+   are kept in the test functions (and not in main()) */
+static void hbitmap_test_init(TestHBitmapData *data,
+                              uint64_t size, int granularity)
+{
+    size_t n;
+    data->hb = hbitmap_alloc(size, granularity);
+
+    n = (size + BITS_PER_LONG - 1) / BITS_PER_LONG;
+    if (n == 0) {
+        n = 1;
+    }
+    data->bits = g_new0(unsigned long, n);
+    data->size = size;
+    data->granularity = granularity;
+    if (size) {
+        hbitmap_test_check(data, 0);
+    }
+}
+
+static void hbitmap_test_teardown(TestHBitmapData *data,
+                                  const void *unused)
+{
+    if (data->hb) {
+        hbitmap_free(data->hb);
+        data->hb = NULL;
+    }
+    if (data->bits) {
+        g_free(data->bits);
+        data->bits = NULL;
+    }
+}
+
+/* Set a range in the HBitmap and in the shadow "simple" bitmap.
+ * The two bitmaps are then tested against each other.
+ */
+static void hbitmap_test_set(TestHBitmapData *data,
+                             uint64_t first, uint64_t count)
+{
+    hbitmap_set(data->hb, first, count);
+    while (count-- != 0) {
+        size_t pos = first >> LOG_BITS_PER_LONG;
+        int bit = first & (BITS_PER_LONG - 1);
+        first++;
+
+        data->bits[pos] |= 1UL << bit;
+    }
+
+    if (data->granularity == 0) {
+        hbitmap_test_check(data, 0);
+    }
+}
+
+/* Reset a range in the HBitmap and in the shadow "simple" bitmap.
+ */
+static void hbitmap_test_reset(TestHBitmapData *data,
+                               uint64_t first, uint64_t count)
+{
+    hbitmap_reset(data->hb, first, count);
+    while (count-- != 0) {
+        size_t pos = first >> LOG_BITS_PER_LONG;
+        int bit = first & (BITS_PER_LONG - 1);
+        first++;
+
+        data->bits[pos] &= ~(1UL << bit);
+    }
+
+    if (data->granularity == 0) {
+        hbitmap_test_check(data, 0);
+    }
+}
+
+static void hbitmap_test_check_get(TestHBitmapData *data)
+{
+    uint64_t count = 0;
+    uint64_t i;
+
+    for (i = 0; i < data->size; i++) {
+        size_t pos = i >> LOG_BITS_PER_LONG;
+        int bit = i & (BITS_PER_LONG - 1);
+        unsigned long val = data->bits[pos] & (1UL << bit);
+        count += hbitmap_get(data->hb, i);
+        g_assert_cmpint(hbitmap_get(data->hb, i), ==, val != 0);
+    }
+    g_assert_cmpint(count, ==, hbitmap_count(data->hb));
+}
+
+static void test_hbitmap_zero(TestHBitmapData *data,
+                               const void *unused)
+{
+    hbitmap_test_init(data, 0, 0);
+}
+
+static void test_hbitmap_unaligned(TestHBitmapData *data,
+                                   const void *unused)
+{
+    hbitmap_test_init(data, L3 + 23, 0);
+    hbitmap_test_set(data, 0, 1);
+    hbitmap_test_set(data, L3 + 22, 1);
+}
+
+static void test_hbitmap_iter_empty(TestHBitmapData *data,
+                                    const void *unused)
+{
+    hbitmap_test_init(data, L1, 0);
+}
+
+static void test_hbitmap_iter_partial(TestHBitmapData *data,
+                                      const void *unused)
+{
+    hbitmap_test_init(data, L3, 0);
+    hbitmap_test_set(data, 0, L3);
+    hbitmap_test_check(data, 1);
+    hbitmap_test_check(data, L1 - 1);
+    hbitmap_test_check(data, L1);
+    hbitmap_test_check(data, L1 * 2 - 1);
+    hbitmap_test_check(data, L2 - 1);
+    hbitmap_test_check(data, L2);
+    hbitmap_test_check(data, L2 + 1);
+    hbitmap_test_check(data, L2 + L1);
+    hbitmap_test_check(data, L2 + L1 * 2 - 1);
+    hbitmap_test_check(data, L2 * 2 - 1);
+    hbitmap_test_check(data, L2 * 2);
+    hbitmap_test_check(data, L2 * 2 + 1);
+    hbitmap_test_check(data, L2 * 2 + L1);
+    hbitmap_test_check(data, L2 * 2 + L1 * 2 - 1);
+    hbitmap_test_check(data, L3 / 2);
+}
+
+static void test_hbitmap_set_all(TestHBitmapData *data,
+                                 const void *unused)
+{
+    hbitmap_test_init(data, L3, 0);
+    hbitmap_test_set(data, 0, L3);
+}
+
+static void test_hbitmap_get_all(TestHBitmapData *data,
+                                 const void *unused)
+{
+    hbitmap_test_init(data, L3, 0);
+    hbitmap_test_set(data, 0, L3);
+    hbitmap_test_check_get(data);
+}
+
+static void test_hbitmap_get_some(TestHBitmapData *data,
+                                  const void *unused)
+{
+    hbitmap_test_init(data, 2 * L2, 0);
+    hbitmap_test_set(data, 10, 1);
+    hbitmap_test_check_get(data);
+    hbitmap_test_set(data, L1 - 1, 1);
+    hbitmap_test_check_get(data);
+    hbitmap_test_set(data, L1, 1);
+    hbitmap_test_check_get(data);
+    hbitmap_test_set(data, L2 - 1, 1);
+    hbitmap_test_check_get(data);
+    hbitmap_test_set(data, L2, 1);
+    hbitmap_test_check_get(data);
+}
+
+static void test_hbitmap_set_one(TestHBitmapData *data,
+                                 const void *unused)
+{
+    hbitmap_test_init(data, 2 * L2, 0);
+    hbitmap_test_set(data, 10, 1);
+    hbitmap_test_set(data, L1 - 1, 1);
+    hbitmap_test_set(data, L1, 1);
+    hbitmap_test_set(data, L2 - 1, 1);
+    hbitmap_test_set(data, L2, 1);
+}
+
+static void test_hbitmap_set_two_elem(TestHBitmapData *data,
+                                      const void *unused)
+{
+    hbitmap_test_init(data, 2 * L2, 0);
+    hbitmap_test_set(data, L1 - 1, 2);
+    hbitmap_test_set(data, L1 * 2 - 1, 4);
+    hbitmap_test_set(data, L1 * 4, L1 + 1);
+    hbitmap_test_set(data, L1 * 8 - 1, L1 + 1);
+    hbitmap_test_set(data, L2 - 1, 2);
+    hbitmap_test_set(data, L2 + L1 - 1, 8);
+    hbitmap_test_set(data, L2 + L1 * 4, L1 + 1);
+    hbitmap_test_set(data, L2 + L1 * 8 - 1, L1 + 1);
+}
+
+static void test_hbitmap_set(TestHBitmapData *data,
+                             const void *unused)
+{
+    hbitmap_test_init(data, L3 * 2, 0);
+    hbitmap_test_set(data, L1 - 1, L1 + 2);
+    hbitmap_test_set(data, L1 * 3 - 1, L1 + 2);
+    hbitmap_test_set(data, L1 * 5, L1 * 2 + 1);
+    hbitmap_test_set(data, L1 * 8 - 1, L1 * 2 + 1);
+    hbitmap_test_set(data, L2 - 1, L1 + 2);
+    hbitmap_test_set(data, L2 + L1 * 2 - 1, L1 + 2);
+    hbitmap_test_set(data, L2 + L1 * 4, L1 * 2 + 1);
+    hbitmap_test_set(data, L2 + L1 * 7 - 1, L1 * 2 + 1);
+    hbitmap_test_set(data, L2 * 2 - 1, L3 * 2 - L2 * 2);
+}
+
+static void test_hbitmap_set_twice(TestHBitmapData *data,
+                                   const void *unused)
+{
+    hbitmap_test_init(data, L1 * 3, 0);
+    hbitmap_test_set(data, 0, L1 * 3);
+    hbitmap_test_set(data, L1, 1);
+}
+
+static void test_hbitmap_set_overlap(TestHBitmapData *data,
+                                     const void *unused)
+{
+    hbitmap_test_init(data, L3 * 2, 0);
+    hbitmap_test_set(data, L1 - 1, L1 + 2);
+    hbitmap_test_set(data, L1 * 2 - 1, L1 * 2 + 2);
+    hbitmap_test_set(data, 0, L1 * 3);
+    hbitmap_test_set(data, L1 * 8 - 1, L2);
+    hbitmap_test_set(data, L2, L1);
+    hbitmap_test_set(data, L2 - L1 - 1, L1 * 8 + 2);
+    hbitmap_test_set(data, L2, L3 - L2 + 1);
+    hbitmap_test_set(data, L3 - L1, L1 * 3);
+    hbitmap_test_set(data, L3 - 1, 3);
+    hbitmap_test_set(data, L3 - 1, L2);
+}
+
+static void test_hbitmap_reset_empty(TestHBitmapData *data,
+                                     const void *unused)
+{
+    hbitmap_test_init(data, L3, 0);
+    hbitmap_test_reset(data, 0, L3);
+}
+
+static void test_hbitmap_reset(TestHBitmapData *data,
+                               const void *unused)
+{
+    hbitmap_test_init(data, L3 * 2, 0);
+    hbitmap_test_set(data, L1 - 1, L1 + 2);
+    hbitmap_test_reset(data, L1 * 2 - 1, L1 * 2 + 2);
+    hbitmap_test_set(data, 0, L1 * 3);
+    hbitmap_test_reset(data, L1 * 8 - 1, L2);
+    hbitmap_test_set(data, L2, L1);
+    hbitmap_test_reset(data, L2 - L1 - 1, L1 * 8 + 2);
+    hbitmap_test_set(data, L2, L3 - L2 + 1);
+    hbitmap_test_reset(data, L3 - L1, L1 * 3);
+    hbitmap_test_set(data, L3 - 1, 3);
+    hbitmap_test_reset(data, L3 - 1, L2);
+    hbitmap_test_set(data, 0, L3 * 2);
+    hbitmap_test_reset(data, 0, L1);
+    hbitmap_test_reset(data, 0, L2);
+    hbitmap_test_reset(data, L3, L3);
+    hbitmap_test_set(data, L3 / 2, L3);
+}
+
+static void test_hbitmap_granularity(TestHBitmapData *data,
+                                     const void *unused)
+{
+    /* Note that hbitmap_test_check has to be invoked manually in this test.  */
+    hbitmap_test_init(data, L1, 1);
+    hbitmap_test_set(data, 0, 1);
+    g_assert_cmpint(hbitmap_count(data->hb), ==, 2);
+    hbitmap_test_check(data, 0);
+    hbitmap_test_set(data, 2, 1);
+    g_assert_cmpint(hbitmap_count(data->hb), ==, 4);
+    hbitmap_test_check(data, 0);
+    hbitmap_test_set(data, 0, 3);
+    g_assert_cmpint(hbitmap_count(data->hb), ==, 4);
+    hbitmap_test_reset(data, 0, 1);
+    g_assert_cmpint(hbitmap_count(data->hb), ==, 2);
+}
+
+static void test_hbitmap_iter_granularity(TestHBitmapData *data,
+                                          const void *unused)
+{
+    HBitmapIter hbi;
+
+    /* Note that hbitmap_test_check has to be invoked manually in this test.  */
+    hbitmap_test_init(data, 131072 << 7, 7);
+    hbitmap_iter_init(&hbi, data->hb, 0);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+    hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8);
+    hbitmap_iter_init(&hbi, data->hb, 0);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+    hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+    hbitmap_test_set(data, (131072 << 7) - 8, 8);
+    hbitmap_iter_init(&hbi, data->hb, 0);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+    hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+}
+
+static void hbitmap_test_add(const char *testpath,
+                                   void (*test_func)(TestHBitmapData *data, const void *user_data))
+{
+    g_test_add(testpath, TestHBitmapData, NULL, NULL, test_func,
+               hbitmap_test_teardown);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    hbitmap_test_add("/hbitmap/size/0", test_hbitmap_zero);
+    hbitmap_test_add("/hbitmap/size/unaligned", test_hbitmap_unaligned);
+    hbitmap_test_add("/hbitmap/iter/empty", test_hbitmap_iter_empty);
+    hbitmap_test_add("/hbitmap/iter/partial", test_hbitmap_iter_partial);
+    hbitmap_test_add("/hbitmap/iter/granularity", test_hbitmap_iter_granularity);
+    hbitmap_test_add("/hbitmap/get/all", test_hbitmap_get_all);
+    hbitmap_test_add("/hbitmap/get/some", test_hbitmap_get_some);
+    hbitmap_test_add("/hbitmap/set/all", test_hbitmap_set_all);
+    hbitmap_test_add("/hbitmap/set/one", test_hbitmap_set_one);
+    hbitmap_test_add("/hbitmap/set/two-elem", test_hbitmap_set_two_elem);
+    hbitmap_test_add("/hbitmap/set/general", test_hbitmap_set);
+    hbitmap_test_add("/hbitmap/set/twice", test_hbitmap_set_twice);
+    hbitmap_test_add("/hbitmap/set/overlap", test_hbitmap_set_overlap);
+    hbitmap_test_add("/hbitmap/reset/empty", test_hbitmap_reset_empty);
+    hbitmap_test_add("/hbitmap/reset/general", test_hbitmap_reset);
+    hbitmap_test_add("/hbitmap/granularity", test_hbitmap_granularity);
+    g_test_run();
+
+    return 0;
+}
diff --git a/tests/test-x86-cpuid.c b/tests/test-x86-cpuid.c
new file mode 100644
index 0000000..8d9f96a
--- /dev/null
+++ b/tests/test-x86-cpuid.c
@@ -0,0 +1,110 @@
+/*
+ *  Test code for x86 CPUID and Topology functions
+ *
+ *  Copyright (c) 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <glib.h>
+
+#include "topology.h"
+
+static void test_topo_bits(void)
+{
+    /* simple tests for 1 thread per core, 1 core per socket */
+    g_assert_cmpuint(apicid_smt_width(1, 1), ==, 0);
+    g_assert_cmpuint(apicid_core_width(1, 1), ==, 0);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 0), ==, 0);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 1), ==, 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 2), ==, 2);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 3), ==, 3);
+
+
+    /* Test field width calculation for multiple values
+     */
+    g_assert_cmpuint(apicid_smt_width(1, 2), ==, 1);
+    g_assert_cmpuint(apicid_smt_width(1, 3), ==, 2);
+    g_assert_cmpuint(apicid_smt_width(1, 4), ==, 2);
+
+    g_assert_cmpuint(apicid_smt_width(1, 14), ==, 4);
+    g_assert_cmpuint(apicid_smt_width(1, 15), ==, 4);
+    g_assert_cmpuint(apicid_smt_width(1, 16), ==, 4);
+    g_assert_cmpuint(apicid_smt_width(1, 17), ==, 5);
+
+
+    g_assert_cmpuint(apicid_core_width(30, 2), ==, 5);
+    g_assert_cmpuint(apicid_core_width(31, 2), ==, 5);
+    g_assert_cmpuint(apicid_core_width(32, 2), ==, 5);
+    g_assert_cmpuint(apicid_core_width(33, 2), ==, 6);
+
+
+    /* build a weird topology and see if IDs are calculated correctly
+     */
+
+    /* This will use 2 bits for thread ID and 3 bits for core ID
+     */
+    g_assert_cmpuint(apicid_smt_width(6, 3), ==, 2);
+    g_assert_cmpuint(apicid_core_width(6, 3), ==, 3);
+    g_assert_cmpuint(apicid_pkg_offset(6, 3), ==, 5);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 0), ==, 0);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1), ==, 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2), ==, 2);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 0), ==,
+                     (1 << 2) | 0);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 1), ==,
+                     (1 << 2) | 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 2), ==,
+                     (1 << 2) | 2);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 0), ==,
+                     (2 << 2) | 0);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 1), ==,
+                     (2 << 2) | 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 2), ==,
+                     (2 << 2) | 2);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 0), ==,
+                     (5 << 2) | 0);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 1), ==,
+                     (5 << 2) | 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 2), ==,
+                     (5 << 2) | 2);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 6 * 3 + 0 * 3 + 0), ==,
+                     (1 << 5));
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 6 * 3 + 1 * 3 + 1), ==,
+                     (1 << 5) | (1 << 2) | 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 3 * 6 * 3 + 5 * 3 + 2), ==,
+                     (3 << 5) | (5 << 2) | 2);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    g_test_add_func("/cpuid/topology/basic", test_topo_bits);
+
+    g_test_run();
+
+    return 0;
+}
diff --git a/trace-events b/trace-events
index 09091e6..1011f27 100644
--- a/trace-events
+++ b/trace-events
@@ -79,10 +79,17 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "
 
 # block/mirror.c
 mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p"
+mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_flush(void *s) "s %p"
 mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d"
 mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
+mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64
+mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
+mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
+mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
+mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
+mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
@@ -1060,3 +1067,26 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
 xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
 xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
 xics_ics_eoi(int nr) "ics_eoi: irq %#x"
+
+# hbitmap.c
+hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx"
+hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
+hbitmap_set(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
+
+# target-s390x/ioinst.c
+ioinst(const char *insn) "IOINST: %s"
+ioinst_sch_id(const char *insn, int cssid, int ssid, int schid) "IOINST: %s (%x.%x.%04x)"
+ioinst_chp_id(const char *insn, int cssid, int chpid) "IOINST: %s (%x.%02x)"
+ioinst_chsc_cmd(uint16_t cmd, uint16_t len) "IOINST: chsc command %04x, len %04x"
+
+# hw/s390x/css.c
+css_enable_facility(const char *facility) "CSS: enable %s"
+css_crw(uint8_t rsc, uint8_t erc, uint16_t rsid, const char *chained) "CSS: queueing crw: rsc=%x, erc=%x, rsid=%x %s"
+css_chpid_add(uint8_t cssid, uint8_t chpid, uint8_t type) "CSS: add chpid %x.%02x (type %02x)"
+css_new_image(uint8_t cssid, const char *default_cssid) "CSS: add css image %02x %s"
+css_assign_subch(const char *do_assign, uint8_t cssid, uint8_t ssid, uint16_t schid, uint16_t devno) "CSS: %s %x.%x.%04x (devno %04x)"
+css_io_interrupt(int cssid, int ssid, int schid, uint32_t intparm, uint8_t isc, const char *conditional) "CSS: I/O interrupt on sch %x.%x.%04x (intparm %08x, isc %x) %s"
+
+# hw/s390x/virtio-ccw.c
+virtio_ccw_interpret_ccw(int cssid, int ssid, int schid, int cmd_code) "VIRTIO-CCW: %x.%x.%04x: interpret command %x"
+virtio_ccw_new_device(int cssid, int ssid, int schid, int devno, const char *devno_mode) "VIRTIO-CCW: add subchannel %x.%x.%04x, devno %04x (%s)"
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 5baeb53..495a178 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -2,7 +2,7 @@ util-obj-y = osdep.o cutils.o qemu-timer-common.o
 util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o
 util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o
 util-obj-y += envlist.o path.o host-utils.o cache-utils.o module.o
-util-obj-y += bitmap.o bitops.o
+util-obj-y += bitmap.o bitops.o hbitmap.o
 util-obj-y += acl.o
 util-obj-y += error.o qemu-error.o
 util-obj-$(CONFIG_POSIX) += compatfd.o
diff --git a/util/hbitmap.c b/util/hbitmap.c
new file mode 100644
index 0000000..2aa487d
--- /dev/null
+++ b/util/hbitmap.c
@@ -0,0 +1,401 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include <string.h>
+#include <glib.h>
+#include <assert.h>
+#include "qemu/osdep.h"
+#include "qemu/hbitmap.h"
+#include "qemu/host-utils.h"
+#include "trace.h"
+
+/* HBitmaps provides an array of bits.  The bits are stored as usual in an
+ * array of unsigned longs, but HBitmap is also optimized to provide fast
+ * iteration over set bits; going from one bit to the next is O(logB n)
+ * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough
+ * that the number of levels is in fact fixed.
+ *
+ * In order to do this, it stacks multiple bitmaps with progressively coarser
+ * granularity; in all levels except the last, bit N is set iff the N-th
+ * unsigned long is nonzero in the immediately next level.  When iteration
+ * completes on the last level it can examine the 2nd-last level to quickly
+ * skip entire words, and even do so recursively to skip blocks of 64 words or
+ * powers thereof (32 on 32-bit machines).
+ *
+ * Given an index in the bitmap, it can be split in group of bits like
+ * this (for the 64-bit case):
+ *
+ *   bits 0-57 => word in the last bitmap     | bits 58-63 => bit in the word
+ *   bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word
+ *   bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word
+ *
+ * So it is easy to move up simply by shifting the index right by
+ * log2(BITS_PER_LONG) bits.  To move down, you shift the index left
+ * similarly, and add the word index within the group.  Iteration uses
+ * ffs (find first set bit) to find the next word to examine; this
+ * operation can be done in constant time in most current architectures.
+ *
+ * Setting or clearing a range of m bits on all levels, the work to perform
+ * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap.
+ *
+ * When iterating on a bitmap, each bit (on any level) is only visited
+ * once.  Hence, The total cost of visiting a bitmap with m bits in it is
+ * the number of bits that are set in all bitmaps.  Unless the bitmap is
+ * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized
+ * cost of advancing from one bit to the next is usually constant (worst case
+ * O(logB n) as in the non-amortized complexity).
+ */
+
+struct HBitmap {
+    /* Number of total bits in the bottom level.  */
+    uint64_t size;
+
+    /* Number of set bits in the bottom level.  */
+    uint64_t count;
+
+    /* A scaling factor.  Given a granularity of G, each bit in the bitmap will
+     * will actually represent a group of 2^G elements.  Each operation on a
+     * range of bits first rounds the bits to determine which group they land
+     * in, and then affect the entire page; iteration will only visit the first
+     * bit of each group.  Here is an example of operations in a size-16,
+     * granularity-1 HBitmap:
+     *
+     *    initial state            00000000
+     *    set(start=0, count=9)    11111000 (iter: 0, 2, 4, 6, 8)
+     *    reset(start=1, count=3)  00111000 (iter: 4, 6, 8)
+     *    set(start=9, count=2)    00111100 (iter: 4, 6, 8, 10)
+     *    reset(start=5, count=5)  00000000
+     *
+     * From an implementation point of view, when setting or resetting bits,
+     * the bitmap will scale bit numbers right by this amount of bits.  When
+     * iterating, the bitmap will scale bit numbers left by this amount of
+     * bits.
+     */
+    int granularity;
+
+    /* A number of progressively less coarse bitmaps (i.e. level 0 is the
+     * coarsest).  Each bit in level N represents a word in level N+1 that
+     * has a set bit, except the last level where each bit represents the
+     * actual bitmap.
+     *
+     * Note that all bitmaps have the same number of levels.  Even a 1-bit
+     * bitmap will still allocate HBITMAP_LEVELS arrays.
+     */
+    unsigned long *levels[HBITMAP_LEVELS];
+};
+
+static inline int popcountl(unsigned long l)
+{
+    return BITS_PER_LONG == 32 ? ctpop32(l) : ctpop64(l);
+}
+
+/* Advance hbi to the next nonzero word and return it.  hbi->pos
+ * is updated.  Returns zero if we reach the end of the bitmap.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
+{
+    size_t pos = hbi->pos;
+    const HBitmap *hb = hbi->hb;
+    unsigned i = HBITMAP_LEVELS - 1;
+
+    unsigned long cur;
+    do {
+        cur = hbi->cur[--i];
+        pos >>= BITS_PER_LEVEL;
+    } while (cur == 0);
+
+    /* Check for end of iteration.  We always use fewer than BITS_PER_LONG
+     * bits in the level 0 bitmap; thus we can repurpose the most significant
+     * bit as a sentinel.  The sentinel is set in hbitmap_alloc and ensures
+     * that the above loop ends even without an explicit check on i.
+     */
+
+    if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) {
+        return 0;
+    }
+    for (; i < HBITMAP_LEVELS - 1; i++) {
+        /* Shift back pos to the left, matching the right shifts above.
+         * The index of this word's least significant set bit provides
+         * the low-order bits.
+         */
+        pos = (pos << BITS_PER_LEVEL) + ffsl(cur) - 1;
+        hbi->cur[i] = cur & (cur - 1);
+
+        /* Set up next level for iteration.  */
+        cur = hb->levels[i + 1][pos];
+    }
+
+    hbi->pos = pos;
+    trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur);
+
+    assert(cur);
+    return cur;
+}
+
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
+{
+    unsigned i, bit;
+    uint64_t pos;
+
+    hbi->hb = hb;
+    pos = first >> hb->granularity;
+    assert(pos < hb->size);
+    hbi->pos = pos >> BITS_PER_LEVEL;
+    hbi->granularity = hb->granularity;
+
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        bit = pos & (BITS_PER_LONG - 1);
+        pos >>= BITS_PER_LEVEL;
+
+        /* Drop bits representing items before first.  */
+        hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1);
+
+        /* We have already added level i+1, so the lowest set bit has
+         * been processed.  Clear it.
+         */
+        if (i != HBITMAP_LEVELS - 1) {
+            hbi->cur[i] &= ~(1UL << bit);
+        }
+    }
+}
+
+bool hbitmap_empty(const HBitmap *hb)
+{
+    return hb->count == 0;
+}
+
+int hbitmap_granularity(const HBitmap *hb)
+{
+    return hb->granularity;
+}
+
+uint64_t hbitmap_count(const HBitmap *hb)
+{
+    return hb->count << hb->granularity;
+}
+
+/* Count the number of set bits between start and end, not accounting for
+ * the granularity.  Also an example of how to use hbitmap_iter_next_word.
+ */
+static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last)
+{
+    HBitmapIter hbi;
+    uint64_t count = 0;
+    uint64_t end = last + 1;
+    unsigned long cur;
+    size_t pos;
+
+    hbitmap_iter_init(&hbi, hb, start << hb->granularity);
+    for (;;) {
+        pos = hbitmap_iter_next_word(&hbi, &cur);
+        if (pos >= (end >> BITS_PER_LEVEL)) {
+            break;
+        }
+        count += popcountl(cur);
+    }
+
+    if (pos == (end >> BITS_PER_LEVEL)) {
+        /* Drop bits representing the END-th and subsequent items.  */
+        int bit = end & (BITS_PER_LONG - 1);
+        cur &= (1UL << bit) - 1;
+        count += popcountl(cur);
+    }
+
+    return count;
+}
+
+/* Setting starts at the last layer and propagates up if an element
+ * changes from zero to non-zero.
+ */
+static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+    unsigned long mask;
+    bool changed;
+
+    assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+    assert(start <= last);
+
+    mask = 2UL << (last & (BITS_PER_LONG - 1));
+    mask -= 1UL << (start & (BITS_PER_LONG - 1));
+    changed = (*elem == 0);
+    *elem |= mask;
+    return changed;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+    size_t pos = start >> BITS_PER_LEVEL;
+    size_t lastpos = last >> BITS_PER_LEVEL;
+    bool changed = false;
+    size_t i;
+
+    i = pos;
+    if (i < lastpos) {
+        uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+        changed |= hb_set_elem(&hb->levels[level][i], start, next - 1);
+        for (;;) {
+            start = next;
+            next += BITS_PER_LONG;
+            if (++i == lastpos) {
+                break;
+            }
+            changed |= (hb->levels[level][i] == 0);
+            hb->levels[level][i] = ~0UL;
+        }
+    }
+    changed |= hb_set_elem(&hb->levels[level][i], start, last);
+
+    /* If there was any change in this layer, we may have to update
+     * the one above.
+     */
+    if (level > 0 && changed) {
+        hb_set_between(hb, level - 1, pos, lastpos);
+    }
+}
+
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
+{
+    /* Compute range in the last layer.  */
+    uint64_t last = start + count - 1;
+
+    trace_hbitmap_set(hb, start, count,
+                      start >> hb->granularity, last >> hb->granularity);
+
+    start >>= hb->granularity;
+    last >>= hb->granularity;
+    count = last - start + 1;
+
+    hb->count += count - hb_count_between(hb, start, last);
+    hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+/* Resetting works the other way round: propagate up if the new
+ * value is zero.
+ */
+static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+    unsigned long mask;
+    bool blanked;
+
+    assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+    assert(start <= last);
+
+    mask = 2UL << (last & (BITS_PER_LONG - 1));
+    mask -= 1UL << (start & (BITS_PER_LONG - 1));
+    blanked = *elem != 0 && ((*elem & ~mask) == 0);
+    *elem &= ~mask;
+    return blanked;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+    size_t pos = start >> BITS_PER_LEVEL;
+    size_t lastpos = last >> BITS_PER_LEVEL;
+    bool changed = false;
+    size_t i;
+
+    i = pos;
+    if (i < lastpos) {
+        uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+
+        /* Here we need a more complex test than when setting bits.  Even if
+         * something was changed, we must not blank bits in the upper level
+         * unless the lower-level word became entirely zero.  So, remove pos
+         * from the upper-level range if bits remain set.
+         */
+        if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) {
+            changed = true;
+        } else {
+            pos++;
+        }
+
+        for (;;) {
+            start = next;
+            next += BITS_PER_LONG;
+            if (++i == lastpos) {
+                break;
+            }
+            changed |= (hb->levels[level][i] != 0);
+            hb->levels[level][i] = 0UL;
+        }
+    }
+
+    /* Same as above, this time for lastpos.  */
+    if (hb_reset_elem(&hb->levels[level][i], start, last)) {
+        changed = true;
+    } else {
+        lastpos--;
+    }
+
+    if (level > 0 && changed) {
+        hb_reset_between(hb, level - 1, pos, lastpos);
+    }
+}
+
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
+{
+    /* Compute range in the last layer.  */
+    uint64_t last = start + count - 1;
+
+    trace_hbitmap_reset(hb, start, count,
+                        start >> hb->granularity, last >> hb->granularity);
+
+    start >>= hb->granularity;
+    last >>= hb->granularity;
+
+    hb->count -= hb_count_between(hb, start, last);
+    hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+bool hbitmap_get(const HBitmap *hb, uint64_t item)
+{
+    /* Compute position and bit in the last layer.  */
+    uint64_t pos = item >> hb->granularity;
+    unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
+
+    return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
+}
+
+void hbitmap_free(HBitmap *hb)
+{
+    unsigned i;
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        g_free(hb->levels[i]);
+    }
+    g_free(hb);
+}
+
+HBitmap *hbitmap_alloc(uint64_t size, int granularity)
+{
+    HBitmap *hb = g_malloc0(sizeof (struct HBitmap));
+    unsigned i;
+
+    assert(granularity >= 0 && granularity < 64);
+    size = (size + (1ULL << granularity) - 1) >> granularity;
+    assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+
+    hb->size = size;
+    hb->granularity = granularity;
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+        hb->levels[i] = g_malloc0(size * sizeof(unsigned long));
+    }
+
+    /* We necessarily have free bits in level 0 due to the definition
+     * of HBITMAP_LEVELS, so use one for a sentinel.  This speeds up
+     * hbitmap_iter_skip_words.
+     */
+    assert(size == 1);
+    hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
+    return hb;
+}
diff --git a/vl.c b/vl.c
index 7aab73b..910abb6 100644
--- a/vl.c
+++ b/vl.c
@@ -176,6 +176,7 @@ int main(int argc, char **argv)
 #define DEFAULT_RAM_SIZE 128
 
 #define MAX_VIRTIO_CONSOLES 1
+#define MAX_SCLP_CONSOLES 1
 
 static const char *data_dir;
 const char *bios_name = NULL;
@@ -203,6 +204,7 @@ int no_quit = 0;
 CharDriverState *serial_hds[MAX_SERIAL_PORTS];
 CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
+CharDriverState *sclp_hds[MAX_SCLP_CONSOLES];
 int win2k_install_hack = 0;
 int singlestep = 0;
 int smp_cpus = 1;
@@ -271,6 +273,7 @@ static int tcg_tb_size;
 static int default_serial = 1;
 static int default_parallel = 1;
 static int default_virtcon = 1;
+static int default_sclp = 1;
 static int default_monitor = 1;
 static int default_floppy = 1;
 static int default_cdrom = 1;
@@ -2340,6 +2343,7 @@ struct device_config {
         DEV_VIRTCON,   /* -virtioconsole */
         DEV_DEBUGCON,  /* -debugcon */
         DEV_GDB,       /* -gdb, -s */
+        DEV_SCLP,      /* s390 sclp */
     } type;
     const char *cmdline;
     Location loc;
@@ -2458,6 +2462,39 @@ static int virtcon_parse(const char *devname)
     return 0;
 }
 
+static int sclp_parse(const char *devname)
+{
+    QemuOptsList *device = qemu_find_opts("device");
+    static int index = 0;
+    char label[32];
+    QemuOpts *dev_opts;
+
+    if (strcmp(devname, "none") == 0) {
+        return 0;
+    }
+    if (index == MAX_SCLP_CONSOLES) {
+        fprintf(stderr, "qemu: too many sclp consoles\n");
+        exit(1);
+    }
+
+    assert(arch_type == QEMU_ARCH_S390X);
+
+    dev_opts = qemu_opts_create(device, NULL, 0, NULL);
+    qemu_opt_set(dev_opts, "driver", "sclpconsole");
+
+    snprintf(label, sizeof(label), "sclpcon%d", index);
+    sclp_hds[index] = qemu_chr_new(label, devname, NULL);
+    if (!sclp_hds[index]) {
+        fprintf(stderr, "qemu: could not connect sclp console"
+                " to character backend '%s'\n", devname);
+        return -1;
+    }
+    qemu_opt_set(dev_opts, "chardev", label);
+
+    index++;
+    return 0;
+}
+
 static int debugcon_parse(const char *devname)
 {   
     QemuOpts *opts;
@@ -3615,6 +3652,7 @@ int main(int argc, char **argv, char **envp)
                 default_serial = 0;
                 default_parallel = 0;
                 default_virtcon = 0;
+                default_sclp = 0;
                 default_monitor = 0;
                 default_net = 0;
                 default_floppy = 0;
@@ -3832,6 +3870,9 @@ int main(int argc, char **argv, char **envp)
     if (!machine->use_virtcon) {
         default_virtcon = 0;
     }
+    if (!machine->use_sclp) {
+        default_sclp = 0;
+    }
     if (machine->no_floppy) {
         default_floppy = 0;
     }
@@ -3873,11 +3914,16 @@ int main(int argc, char **argv, char **envp)
             add_device_config(DEV_SERIAL, "mon:stdio");
         } else if (default_virtcon && default_monitor) {
             add_device_config(DEV_VIRTCON, "mon:stdio");
+        } else if (default_sclp && default_monitor) {
+            add_device_config(DEV_SCLP, "mon:stdio");
         } else {
             if (default_serial)
                 add_device_config(DEV_SERIAL, "stdio");
             if (default_virtcon)
                 add_device_config(DEV_VIRTCON, "stdio");
+            if (default_sclp) {
+                add_device_config(DEV_SCLP, "stdio");
+            }
             if (default_monitor)
                 monitor_parse("stdio", "readline");
         }
@@ -3890,6 +3936,9 @@ int main(int argc, char **argv, char **envp)
             monitor_parse("vc:80Cx24C", "readline");
         if (default_virtcon)
             add_device_config(DEV_VIRTCON, "vc:80Cx24C");
+        if (default_sclp) {
+            add_device_config(DEV_SCLP, "vc:80Cx24C");
+        }
     }
 
     socket_init();
@@ -4060,6 +4109,9 @@ int main(int argc, char **argv, char **envp)
         exit(1);
     if (foreach_device_config(DEV_VIRTCON, virtcon_parse) < 0)
         exit(1);
+    if (foreach_device_config(DEV_SCLP, sclp_parse) < 0) {
+        exit(1);
+    }
     if (foreach_device_config(DEV_DEBUGCON, debugcon_parse) < 0)
         exit(1);