diff options
98 files changed, 7577 insertions, 724 deletions
diff --git a/block-migration.c b/block-migration.c index 6acf3e1..9ac7de6 100644 --- a/block-migration.c +++ b/block-migration.c @@ -23,7 +23,8 @@ #include "sysemu/blockdev.h" #include <assert.h> -#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS) +#define BLOCK_SIZE (1 << 20) +#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS) #define BLK_MIG_FLAG_DEVICE_BLOCK 0x01 #define BLK_MIG_FLAG_EOS 0x02 @@ -254,7 +255,7 @@ static void set_dirty_tracking(int enable) BlkMigDevState *bmds; QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { - bdrv_set_dirty_tracking(bmds->bs, enable); + bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0); } } @@ -478,7 +479,7 @@ static int64_t get_remaining_dirty(void) dirty += bdrv_get_dirty_count(bmds->bs); } - return dirty * BLOCK_SIZE; + return dirty << BDRV_SECTOR_BITS; } static void blk_mig_cleanup(void) @@ -1286,7 +1286,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->iostatus = bs_src->iostatus; /* dirty bitmap */ - bs_dest->dirty_count = bs_src->dirty_count; bs_dest->dirty_bitmap = bs_src->dirty_bitmap; /* job */ @@ -1674,10 +1673,10 @@ static void tracked_request_begin(BdrvTrackedRequest *req, /** * Round a region to cluster boundaries */ -static void round_to_clusters(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - int64_t *cluster_sector_num, - int *cluster_nb_sectors) +void bdrv_round_to_clusters(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + int64_t *cluster_sector_num, + int *cluster_nb_sectors) { BlockDriverInfo bdi; @@ -1719,8 +1718,8 @@ static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs, * CoR read and write operations are atomic and guest writes cannot * interleave between them. */ - round_to_clusters(bs, sector_num, nb_sectors, - &cluster_sector_num, &cluster_nb_sectors); + bdrv_round_to_clusters(bs, sector_num, nb_sectors, + &cluster_sector_num, &cluster_nb_sectors); do { retry = false; @@ -2035,36 +2034,6 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, return ret; } -#define BITS_PER_LONG (sizeof(unsigned long) * 8) - -static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int dirty) -{ - int64_t start, end; - unsigned long val, idx, bit; - - start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK; - end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK; - - for (; start <= end; start++) { - idx = start / BITS_PER_LONG; - bit = start % BITS_PER_LONG; - val = bs->dirty_bitmap[idx]; - if (dirty) { - if (!(val & (1UL << bit))) { - bs->dirty_count++; - val |= 1UL << bit; - } - } else { - if (val & (1UL << bit)) { - bs->dirty_count--; - val &= ~(1UL << bit); - } - } - bs->dirty_bitmap[idx] = val; - } -} - /* Return < 0 if error. Important errors are: -EIO generic I/O error (may happen for all errors) -ENOMEDIUM No media inserted. @@ -2216,8 +2185,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, /* Cover entire cluster so no additional backing file I/O is required when * allocating cluster in the image file. */ - round_to_clusters(bs, sector_num, nb_sectors, - &cluster_sector_num, &cluster_nb_sectors); + bdrv_round_to_clusters(bs, sector_num, nb_sectors, + &cluster_sector_num, &cluster_nb_sectors); trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, cluster_sector_num, cluster_nb_sectors); @@ -2863,8 +2832,9 @@ BlockInfo *bdrv_query_info(BlockDriverState *bs) if (bs->dirty_bitmap) { info->has_dirty = true; info->dirty = g_malloc0(sizeof(*info->dirty)); - info->dirty->count = bdrv_get_dirty_count(bs) * - BDRV_SECTORS_PER_DIRTY_CHUNK * BDRV_SECTOR_SIZE; + info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE; + info->dirty->granularity = + ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap)); } if (bs->drv) { @@ -4173,7 +4143,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, } if (bs->dirty_bitmap) { - set_dirty_bitmap(bs, sector_num, nb_sectors, 0); + bdrv_reset_dirty(bs, sector_num, nb_sectors); } if (bs->drv->bdrv_co_discard) { @@ -4331,22 +4301,20 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) return true; } -void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable) +void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity) { int64_t bitmap_size; - bs->dirty_count = 0; - if (enable) { - if (!bs->dirty_bitmap) { - bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) + - BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1; - bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG; + assert((granularity & (granularity - 1)) == 0); - bs->dirty_bitmap = g_new0(unsigned long, bitmap_size); - } + if (granularity) { + granularity >>= BDRV_SECTOR_BITS; + assert(!bs->dirty_bitmap); + bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS); + bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1); } else { if (bs->dirty_bitmap) { - g_free(bs->dirty_bitmap); + hbitmap_free(bs->dirty_bitmap); bs->dirty_bitmap = NULL; } } @@ -4354,67 +4322,37 @@ void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable) int bdrv_get_dirty(BlockDriverState *bs, int64_t sector) { - int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; - - if (bs->dirty_bitmap && - (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) { - return !!(bs->dirty_bitmap[chunk / BITS_PER_LONG] & - (1UL << (chunk % BITS_PER_LONG))); + if (bs->dirty_bitmap) { + return hbitmap_get(bs->dirty_bitmap, sector); } else { return 0; } } -int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector) +void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi) { - int64_t chunk; - int bit, elem; - - /* Avoid an infinite loop. */ - assert(bs->dirty_count > 0); - - sector = (sector | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; - chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; - - QEMU_BUILD_BUG_ON(sizeof(bs->dirty_bitmap[0]) * 8 != BITS_PER_LONG); - elem = chunk / BITS_PER_LONG; - bit = chunk % BITS_PER_LONG; - for (;;) { - if (sector >= bs->total_sectors) { - sector = 0; - bit = elem = 0; - } - if (bit == 0 && bs->dirty_bitmap[elem] == 0) { - sector += BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG; - elem++; - } else { - if (bs->dirty_bitmap[elem] & (1UL << bit)) { - return sector; - } - sector += BDRV_SECTORS_PER_DIRTY_CHUNK; - if (++bit == BITS_PER_LONG) { - bit = 0; - elem++; - } - } - } + hbitmap_iter_init(hbi, bs->dirty_bitmap, 0); } void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors) { - set_dirty_bitmap(bs, cur_sector, nr_sectors, 1); + hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors); } void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors) { - set_dirty_bitmap(bs, cur_sector, nr_sectors, 0); + hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors); } int64_t bdrv_get_dirty_count(BlockDriverState *bs) { - return bs->dirty_count; + if (bs->dirty_bitmap) { + return hbitmap_count(bs->dirty_bitmap); + } else { + return 0; + } } void bdrv_set_in_use(BlockDriverState *bs, int in_use) diff --git a/block/bochs.c b/block/bochs.c index 1b1d9cd..3737583 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -126,7 +126,7 @@ static int bochs_open(BlockDriverState *bs, int flags) strcmp(bochs.subtype, GROWING_TYPE) || ((le32_to_cpu(bochs.version) != HEADER_VERSION) && (le32_to_cpu(bochs.version) != HEADER_V1))) { - goto fail; + return -EMEDIUMTYPE; } if (le32_to_cpu(bochs.version) == HEADER_V1) { diff --git a/block/cow.c b/block/cow.c index a33ce95..4baf904 100644 --- a/block/cow.c +++ b/block/cow.c @@ -73,7 +73,7 @@ static int cow_open(BlockDriverState *bs, int flags) } if (be32_to_cpu(cow_header.magic) != COW_MAGIC) { - ret = -EINVAL; + ret = -EMEDIUMTYPE; goto fail; } diff --git a/block/mirror.c b/block/mirror.c index 6180aa3..a62ad86 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -15,17 +15,17 @@ #include "block/blockjob.h" #include "block/block_int.h" #include "qemu/ratelimit.h" +#include "qemu/bitmap.h" -enum { - /* - * Size of data buffer for populating the image file. This should be large - * enough to process multiple clusters in a single call, so that populating - * contiguous regions of the image is efficient. - */ - BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */ -}; +#define SLICE_TIME 100000000ULL /* ns */ +#define MAX_IN_FLIGHT 16 -#define SLICE_TIME 100000000ULL /* ns */ +/* The mirroring buffer is a list of granularity-sized chunks. + * Free chunks are organized in a list. + */ +typedef struct MirrorBuffer { + QSIMPLEQ_ENTRY(MirrorBuffer) next; +} MirrorBuffer; typedef struct MirrorBlockJob { BlockJob common; @@ -36,9 +36,26 @@ typedef struct MirrorBlockJob { bool synced; bool should_complete; int64_t sector_num; + int64_t granularity; + size_t buf_size; + unsigned long *cow_bitmap; + HBitmapIter hbi; uint8_t *buf; + QSIMPLEQ_HEAD(, MirrorBuffer) buf_free; + int buf_free_count; + + unsigned long *in_flight_bitmap; + int in_flight; + int ret; } MirrorBlockJob; +typedef struct MirrorOp { + MirrorBlockJob *s; + QEMUIOVector qiov; + int64_t sector_num; + int nb_sectors; +} MirrorOp; + static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, int error) { @@ -52,51 +69,234 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, } } -static int coroutine_fn mirror_iteration(MirrorBlockJob *s, - BlockErrorAction *p_action) +static void mirror_iteration_done(MirrorOp *op, int ret) { - BlockDriverState *source = s->common.bs; - BlockDriverState *target = s->target; - QEMUIOVector qiov; - int ret, nb_sectors; - int64_t end; - struct iovec iov; + MirrorBlockJob *s = op->s; + struct iovec *iov; + int64_t chunk_num; + int i, nb_chunks, sectors_per_chunk; + + trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret); + + s->in_flight--; + iov = op->qiov.iov; + for (i = 0; i < op->qiov.niov; i++) { + MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base; + QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next); + s->buf_free_count++; + } - end = s->common.len >> BDRV_SECTOR_BITS; - s->sector_num = bdrv_get_next_dirty(source, s->sector_num); - nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num); - bdrv_reset_dirty(source, s->sector_num, nb_sectors); + sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; + chunk_num = op->sector_num / sectors_per_chunk; + nb_chunks = op->nb_sectors / sectors_per_chunk; + bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks); + if (s->cow_bitmap && ret >= 0) { + bitmap_set(s->cow_bitmap, chunk_num, nb_chunks); + } - /* Copy the dirty cluster. */ - iov.iov_base = s->buf; - iov.iov_len = nb_sectors * 512; - qemu_iovec_init_external(&qiov, &iov, 1); + g_slice_free(MirrorOp, op); + qemu_coroutine_enter(s->common.co, NULL); +} - trace_mirror_one_iteration(s, s->sector_num, nb_sectors); - ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); +static void mirror_write_complete(void *opaque, int ret) +{ + MirrorOp *op = opaque; + MirrorBlockJob *s = op->s; if (ret < 0) { - *p_action = mirror_error_action(s, true, -ret); - goto fail; + BlockDriverState *source = s->common.bs; + BlockErrorAction action; + + bdrv_set_dirty(source, op->sector_num, op->nb_sectors); + action = mirror_error_action(s, false, -ret); + if (action == BDRV_ACTION_REPORT && s->ret >= 0) { + s->ret = ret; + } } - ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); + mirror_iteration_done(op, ret); +} + +static void mirror_read_complete(void *opaque, int ret) +{ + MirrorOp *op = opaque; + MirrorBlockJob *s = op->s; if (ret < 0) { - *p_action = mirror_error_action(s, false, -ret); - s->synced = false; - goto fail; + BlockDriverState *source = s->common.bs; + BlockErrorAction action; + + bdrv_set_dirty(source, op->sector_num, op->nb_sectors); + action = mirror_error_action(s, true, -ret); + if (action == BDRV_ACTION_REPORT && s->ret >= 0) { + s->ret = ret; + } + + mirror_iteration_done(op, ret); + return; + } + bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors, + mirror_write_complete, op); +} + +static void coroutine_fn mirror_iteration(MirrorBlockJob *s) +{ + BlockDriverState *source = s->common.bs; + int nb_sectors, sectors_per_chunk, nb_chunks; + int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector; + MirrorOp *op; + + s->sector_num = hbitmap_iter_next(&s->hbi); + if (s->sector_num < 0) { + bdrv_dirty_iter_init(source, &s->hbi); + s->sector_num = hbitmap_iter_next(&s->hbi); + trace_mirror_restart_iter(s, bdrv_get_dirty_count(source)); + assert(s->sector_num >= 0); + } + + hbitmap_next_sector = s->sector_num; + sector_num = s->sector_num; + sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; + end = s->common.len >> BDRV_SECTOR_BITS; + + /* Extend the QEMUIOVector to include all adjacent blocks that will + * be copied in this operation. + * + * We have to do this if we have no backing file yet in the destination, + * and the cluster size is very large. Then we need to do COW ourselves. + * The first time a cluster is copied, copy it entirely. Note that, + * because both the granularity and the cluster size are powers of two, + * the number of sectors to copy cannot exceed one cluster. + * + * We also want to extend the QEMUIOVector to include more adjacent + * dirty blocks if possible, to limit the number of I/O operations and + * run efficiently even with a small granularity. + */ + nb_chunks = 0; + nb_sectors = 0; + next_sector = sector_num; + next_chunk = sector_num / sectors_per_chunk; + + /* Wait for I/O to this cluster (from a previous iteration) to be done. */ + while (test_bit(next_chunk, s->in_flight_bitmap)) { + trace_mirror_yield_in_flight(s, sector_num, s->in_flight); + qemu_coroutine_yield(); + } + + do { + int added_sectors, added_chunks; + + if (!bdrv_get_dirty(source, next_sector) || + test_bit(next_chunk, s->in_flight_bitmap)) { + assert(nb_sectors > 0); + break; + } + + added_sectors = sectors_per_chunk; + if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) { + bdrv_round_to_clusters(s->target, + next_sector, added_sectors, + &next_sector, &added_sectors); + + /* On the first iteration, the rounding may make us copy + * sectors before the first dirty one. + */ + if (next_sector < sector_num) { + assert(nb_sectors == 0); + sector_num = next_sector; + next_chunk = next_sector / sectors_per_chunk; + } + } + + added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors)); + added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk; + + /* When doing COW, it may happen that there is not enough space for + * a full cluster. Wait if that is the case. + */ + while (nb_chunks == 0 && s->buf_free_count < added_chunks) { + trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight); + qemu_coroutine_yield(); + } + if (s->buf_free_count < nb_chunks + added_chunks) { + trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight); + break; + } + + /* We have enough free space to copy these sectors. */ + bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks); + + nb_sectors += added_sectors; + nb_chunks += added_chunks; + next_sector += added_sectors; + next_chunk += added_chunks; + } while (next_sector < end); + + /* Allocate a MirrorOp that is used as an AIO callback. */ + op = g_slice_new(MirrorOp); + op->s = s; + op->sector_num = sector_num; + op->nb_sectors = nb_sectors; + + /* Now make a QEMUIOVector taking enough granularity-sized chunks + * from s->buf_free. + */ + qemu_iovec_init(&op->qiov, nb_chunks); + next_sector = sector_num; + while (nb_chunks-- > 0) { + MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); + QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); + s->buf_free_count--; + qemu_iovec_add(&op->qiov, buf, s->granularity); + + /* Advance the HBitmapIter in parallel, so that we do not examine + * the same sector twice. + */ + if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) { + hbitmap_next_sector = hbitmap_iter_next(&s->hbi); + } + + next_sector += sectors_per_chunk; } - return 0; -fail: - /* Try again later. */ - bdrv_set_dirty(source, s->sector_num, nb_sectors); - return ret; + bdrv_reset_dirty(source, sector_num, nb_sectors); + + /* Copy the dirty cluster. */ + s->in_flight++; + trace_mirror_one_iteration(s, sector_num, nb_sectors); + bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, + mirror_read_complete, op); +} + +static void mirror_free_init(MirrorBlockJob *s) +{ + int granularity = s->granularity; + size_t buf_size = s->buf_size; + uint8_t *buf = s->buf; + + assert(s->buf_free_count == 0); + QSIMPLEQ_INIT(&s->buf_free); + while (buf_size != 0) { + MirrorBuffer *cur = (MirrorBuffer *)buf; + QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next); + s->buf_free_count++; + buf_size -= granularity; + buf += granularity; + } +} + +static void mirror_drain(MirrorBlockJob *s) +{ + while (s->in_flight > 0) { + qemu_coroutine_yield(); + } } static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; BlockDriverState *bs = s->common.bs; - int64_t sector_num, end; + int64_t sector_num, end, sectors_per_chunk, length; + uint64_t last_pause_ns; + BlockDriverInfo bdi; + char backing_filename[1024]; int ret = 0; int n; @@ -105,20 +305,39 @@ static void coroutine_fn mirror_run(void *opaque) } s->common.len = bdrv_getlength(bs); - if (s->common.len < 0) { + if (s->common.len <= 0) { block_job_completed(&s->common, s->common.len); return; } + length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity; + s->in_flight_bitmap = bitmap_new(length); + + /* If we have no backing file yet in the destination, we cannot let + * the destination do COW. Instead, we copy sectors around the + * dirty data if needed. We need a bitmap to do that. + */ + bdrv_get_backing_filename(s->target, backing_filename, + sizeof(backing_filename)); + if (backing_filename[0] && !s->target->backing_hd) { + bdrv_get_info(s->target, &bdi); + if (s->granularity < bdi.cluster_size) { + s->buf_size = MAX(s->buf_size, bdi.cluster_size); + s->cow_bitmap = bitmap_new(length); + } + } + end = s->common.len >> BDRV_SECTOR_BITS; - s->buf = qemu_blockalign(bs, BLOCK_SIZE); + s->buf = qemu_blockalign(bs, s->buf_size); + sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; + mirror_free_init(s); if (s->mode != MIRROR_SYNC_MODE_NONE) { /* First part, loop on the sectors and initialize the dirty bitmap. */ BlockDriverState *base; base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; for (sector_num = 0; sector_num < end; ) { - int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; + int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1; ret = bdrv_co_is_allocated_above(bs, base, sector_num, next - sector_num, &n); @@ -136,24 +355,40 @@ static void coroutine_fn mirror_run(void *opaque) } } - s->sector_num = -1; + bdrv_dirty_iter_init(bs, &s->hbi); + last_pause_ns = qemu_get_clock_ns(rt_clock); for (;;) { uint64_t delay_ns; int64_t cnt; bool should_complete; + if (s->ret < 0) { + ret = s->ret; + goto immediate_exit; + } + cnt = bdrv_get_dirty_count(bs); - if (cnt != 0) { - BlockErrorAction action = BDRV_ACTION_REPORT; - ret = mirror_iteration(s, &action); - if (ret < 0 && action == BDRV_ACTION_REPORT) { - goto immediate_exit; + + /* Note that even when no rate limit is applied we need to yield + * periodically with no pending I/O so that qemu_aio_flush() returns. + * We do so every SLICE_TIME nanoseconds, or when there is an error, + * or when the source is clean, whichever comes first. + */ + if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME && + s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || + (cnt == 0 && s->in_flight > 0)) { + trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt); + qemu_coroutine_yield(); + continue; + } else if (cnt != 0) { + mirror_iteration(s); + continue; } - cnt = bdrv_get_dirty_count(bs); } should_complete = false; - if (cnt == 0) { + if (s->in_flight == 0 && cnt == 0) { trace_mirror_before_flush(s); ret = bdrv_flush(s->target); if (ret < 0) { @@ -196,23 +431,20 @@ static void coroutine_fn mirror_run(void *opaque) trace_mirror_before_sleep(s, cnt, s->synced); if (!s->synced) { /* Publish progress */ - s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; + s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE; if (s->common.speed) { - delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK); + delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk); } else { delay_ns = 0; } - /* Note that even when no rate limit is applied we need to yield - * with no pending I/O here so that bdrv_drain_all() returns. - */ block_job_sleep_ns(&s->common, rt_clock, delay_ns); if (block_job_is_cancelled(&s->common)) { break; } } else if (!should_complete) { - delay_ns = (cnt == 0 ? SLICE_TIME : 0); + delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0); block_job_sleep_ns(&s->common, rt_clock, delay_ns); } else if (cnt == 0) { /* The two disks are in sync. Exit and report successful @@ -222,11 +454,24 @@ static void coroutine_fn mirror_run(void *opaque) s->common.cancelled = false; break; } + last_pause_ns = qemu_get_clock_ns(rt_clock); } immediate_exit: + if (s->in_flight > 0) { + /* We get here only if something went wrong. Either the job failed, + * or it was cancelled prematurely so that we do not guarantee that + * the target is a copy of the source. + */ + assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common))); + mirror_drain(s); + } + + assert(s->in_flight == 0); qemu_vfree(s->buf); - bdrv_set_dirty_tracking(bs, false); + g_free(s->cow_bitmap); + g_free(s->in_flight_bitmap); + bdrv_set_dirty_tracking(bs, 0); bdrv_iostatus_disable(s->target); if (s->should_complete && ret == 0) { if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { @@ -288,14 +533,28 @@ static BlockJobType mirror_job_type = { }; void mirror_start(BlockDriverState *bs, BlockDriverState *target, - int64_t speed, MirrorSyncMode mode, - BlockdevOnError on_source_error, + int64_t speed, int64_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) { MirrorBlockJob *s; + if (granularity == 0) { + /* Choose the default granularity based on the target file's cluster + * size, clamped between 4k and 64k. */ + BlockDriverInfo bdi; + if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) { + granularity = MAX(4096, bdi.cluster_size); + granularity = MIN(65536, granularity); + } else { + granularity = 65536; + } + } + + assert ((granularity & (granularity - 1)) == 0); + if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && !bdrv_iostatus_is_enabled(bs)) { @@ -312,7 +571,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, s->on_target_error = on_target_error; s->target = target; s->mode = mode; - bdrv_set_dirty_tracking(bs, true); + s->granularity = granularity; + s->buf_size = MAX(buf_size, granularity); + + bdrv_set_dirty_tracking(bs, granularity); bdrv_set_enable_write_cache(s->target, true); bdrv_set_on_error(s->target, on_target_error, on_target_error); bdrv_iostatus_enable(s->target); diff --git a/block/qcow.c b/block/qcow.c index 4276610..a7135ee 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -112,7 +112,7 @@ static int qcow_open(BlockDriverState *bs, int flags) be64_to_cpus(&header.l1_table_offset); if (header.magic != QCOW_MAGIC) { - ret = -EINVAL; + ret = -EMEDIUMTYPE; goto fail; } if (header.version != QCOW_VERSION) { diff --git a/block/qcow2.c b/block/qcow2.c index f6abff6..7610e56 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -311,7 +311,7 @@ static int qcow2_open(BlockDriverState *bs, int flags) be32_to_cpus(&header.nb_snapshots); if (header.magic != QCOW_MAGIC) { - ret = -EINVAL; + ret = -EMEDIUMTYPE; goto fail; } if (header.version < 2 || header.version > 3) { diff --git a/block/qed.c b/block/qed.c index cf85d8f..b8515e5 100644 --- a/block/qed.c +++ b/block/qed.c @@ -390,7 +390,7 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags) qed_header_le_to_cpu(&le_header, &s->header); if (s->header.magic != QED_MAGIC) { - return -EINVAL; + return -EMEDIUMTYPE; } if (s->header.features & ~QED_FEATURE_MASK) { /* image uses unsupported feature bits */ diff --git a/block/vdi.c b/block/vdi.c index 021abaa..257a592 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -246,7 +246,7 @@ static void vdi_header_print(VdiHeader *header) { char uuid[37]; logout("text %s", header->text); - logout("signature 0x%04x\n", header->signature); + logout("signature 0x%08x\n", header->signature); logout("header size 0x%04x\n", header->header_size); logout("image type 0x%04x\n", header->image_type); logout("image flags 0x%04x\n", header->image_flags); @@ -369,10 +369,12 @@ static int vdi_open(BlockDriverState *bs, int flags) BDRVVdiState *s = bs->opaque; VdiHeader header; size_t bmap_size; + int ret; logout("\n"); - if (bdrv_read(bs->file, 0, (uint8_t *)&header, 1) < 0) { + ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1); + if (ret < 0) { goto fail; } @@ -390,33 +392,45 @@ static int vdi_open(BlockDriverState *bs, int flags) header.disk_size &= ~(SECTOR_SIZE - 1); } - if (header.version != VDI_VERSION_1_1) { + if (header.signature != VDI_SIGNATURE) { + logout("bad vdi signature %08x\n", header.signature); + ret = -EMEDIUMTYPE; + goto fail; + } else if (header.version != VDI_VERSION_1_1) { logout("unsupported version %u.%u\n", header.version >> 16, header.version & 0xffff); + ret = -ENOTSUP; goto fail; } else if (header.offset_bmap % SECTOR_SIZE != 0) { /* We only support block maps which start on a sector boundary. */ logout("unsupported block map offset 0x%x B\n", header.offset_bmap); + ret = -ENOTSUP; goto fail; } else if (header.offset_data % SECTOR_SIZE != 0) { /* We only support data blocks which start on a sector boundary. */ logout("unsupported data offset 0x%x B\n", header.offset_data); + ret = -ENOTSUP; goto fail; } else if (header.sector_size != SECTOR_SIZE) { logout("unsupported sector size %u B\n", header.sector_size); + ret = -ENOTSUP; goto fail; } else if (header.block_size != 1 * MiB) { logout("unsupported block size %u B\n", header.block_size); + ret = -ENOTSUP; goto fail; } else if (header.disk_size > (uint64_t)header.blocks_in_image * header.block_size) { logout("unsupported disk size %" PRIu64 " B\n", header.disk_size); + ret = -ENOTSUP; goto fail; } else if (!uuid_is_null(header.uuid_link)) { logout("link uuid != 0, unsupported\n"); + ret = -ENOTSUP; goto fail; } else if (!uuid_is_null(header.uuid_parent)) { logout("parent uuid != 0, unsupported\n"); + ret = -ENOTSUP; goto fail; } @@ -432,7 +446,8 @@ static int vdi_open(BlockDriverState *bs, int flags) if (bmap_size > 0) { s->bmap = g_malloc(bmap_size * SECTOR_SIZE); } - if (bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size) < 0) { + ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size); + if (ret < 0) { goto fail_free_bmap; } @@ -448,7 +463,7 @@ static int vdi_open(BlockDriverState *bs, int flags) g_free(s->bmap); fail: - return -1; + return ret; } static int vdi_reopen_prepare(BDRVReopenState *state, diff --git a/block/vmdk.c b/block/vmdk.c index 19298c2..8333afb 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -616,7 +616,7 @@ static int vmdk_open_sparse(BlockDriverState *bs, return vmdk_open_vmdk4(bs, file, flags); break; default: - return -EINVAL; + return -EMEDIUMTYPE; break; } } @@ -718,7 +718,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, } buf[2047] = '\0'; if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) { - return -EINVAL; + return -EMEDIUMTYPE; } if (strcmp(ct, "monolithicFlat") && strcmp(ct, "twoGbMaxExtentSparse") && @@ -617,8 +617,13 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type) ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv); if (ret < 0) { - error_report("could not open disk image %s: %s", - file, strerror(-ret)); + if (ret == -EMEDIUMTYPE) { + error_report("could not open disk image %s: not in %s format", + file, drv->format_name); + } else { + error_report("could not open disk image %s: %s", + file, strerror(-ret)); + } goto err; } @@ -642,21 +647,17 @@ void do_commit(Monitor *mon, const QDict *qdict) if (!strcmp(device, "all")) { ret = bdrv_commit_all(); - if (ret == -EBUSY) { - qerror_report(QERR_DEVICE_IN_USE, device); - return; - } } else { bs = bdrv_find(device); if (!bs) { - qerror_report(QERR_DEVICE_NOT_FOUND, device); + monitor_printf(mon, "Device '%s' not found\n", device); return; } ret = bdrv_commit(bs); - if (ret == -EBUSY) { - qerror_report(QERR_DEVICE_IN_USE, device); - return; - } + } + if (ret < 0) { + monitor_printf(mon, "'commit' error for '%s': %s\n", device, + strerror(-ret)); } } @@ -1188,16 +1189,19 @@ void qmp_block_commit(const char *device, drive_get_ref(drive_get_by_blockdev(bs)); } +#define DEFAULT_MIRROR_BUF_SIZE (10 << 20) + void qmp_drive_mirror(const char *device, const char *target, bool has_format, const char *format, enum MirrorSyncMode sync, bool has_mode, enum NewImageMode mode, bool has_speed, int64_t speed, + bool has_granularity, uint32_t granularity, + bool has_buf_size, int64_t buf_size, bool has_on_source_error, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, Error **errp) { - BlockDriverInfo bdi; BlockDriverState *bs; BlockDriverState *source, *target_bs; BlockDriver *proto_drv; @@ -1219,6 +1223,21 @@ void qmp_drive_mirror(const char *device, const char *target, if (!has_mode) { mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; } + if (!has_granularity) { + granularity = 0; + } + if (!has_buf_size) { + buf_size = DEFAULT_MIRROR_BUF_SIZE; + } + + if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { + error_set(errp, QERR_INVALID_PARAMETER, device); + return; + } + if (granularity & (granularity - 1)) { + error_set(errp, QERR_INVALID_PARAMETER, device); + return; + } bs = bdrv_find(device); if (!bs) { @@ -1259,11 +1278,11 @@ void qmp_drive_mirror(const char *device, const char *target, return; } + bdrv_get_geometry(bs, &size); + size *= 512; if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) { /* create new image w/o backing file */ assert(format && drv); - bdrv_get_geometry(bs, &size); - size *= 512; bdrv_img_create(target, format, NULL, NULL, NULL, size, flags, &local_err); } else { @@ -1276,7 +1295,7 @@ void qmp_drive_mirror(const char *device, const char *target, bdrv_img_create(target, format, source->filename, source->drv->format_name, - NULL, -1, flags, &local_err); + NULL, size, flags, &local_err); break; default: abort(); @@ -1288,6 +1307,9 @@ void qmp_drive_mirror(const char *device, const char *target, return; } + /* Mirroring takes care of copy-on-write using the source's backing + * file. + */ target_bs = bdrv_new(""); ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv); @@ -1297,18 +1319,8 @@ void qmp_drive_mirror(const char *device, const char *target, return; } - /* We need a backing file if we will copy parts of a cluster. */ - if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 && - bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) { - ret = bdrv_open_backing_file(target_bs); - if (ret < 0) { - bdrv_delete(target_bs); - error_set(errp, QERR_OPEN_FILE_FAILED, target); - return; - } - } - - mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error, + mirror_start(bs, target_bs, speed, granularity, buf_size, sync, + on_source_error, on_target_error, block_job_cb, bs, &local_err); if (local_err != NULL) { bdrv_delete(target_bs); @@ -517,7 +517,7 @@ static void qemu_init_sigbus(void) prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); } -static void qemu_kvm_eat_signals(CPUArchState *env) +static void qemu_kvm_eat_signals(CPUState *cpu) { struct timespec ts = { 0, 0 }; siginfo_t siginfo; @@ -538,7 +538,7 @@ static void qemu_kvm_eat_signals(CPUArchState *env) switch (r) { case SIGBUS: - if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) { + if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) { sigbus_reraise(); } break; @@ -560,7 +560,7 @@ static void qemu_init_sigbus(void) { } -static void qemu_kvm_eat_signals(CPUArchState *env) +static void qemu_kvm_eat_signals(CPUState *cpu) { } #endif /* !CONFIG_LINUX */ @@ -727,7 +727,7 @@ static void qemu_kvm_wait_io_event(CPUArchState *env) qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); } - qemu_kvm_eat_signals(env); + qemu_kvm_eat_signals(cpu); qemu_wait_io_event_common(cpu); } diff --git a/docs/virtio-balloon-stats.txt b/docs/virtio-balloon-stats.txt new file mode 100644 index 0000000..f74612f --- /dev/null +++ b/docs/virtio-balloon-stats.txt @@ -0,0 +1,104 @@ +virtio balloon memory statistics +================================ + +The virtio balloon driver supports guest memory statistics reporting. These +statistics are available to QEMU users as QOM (QEMU Object Model) device +properties via a polling mechanism. + +Before querying the available stats, clients first have to enable polling. +This is done by writing a time interval value (in seconds) to the +guest-stats-polling-interval property. This value can be: + + > 0 enables polling in the specified interval. If polling is already + enabled, the polling time interval is changed to the new value + + 0 disables polling. Previous polled statistics are still valid and + can be queried. + +Once polling is enabled, the virtio-balloon device in QEMU will start +polling the guest's balloon driver for new stats in the specified time +interval. + +To retrieve those stats, clients have to query the guest-stats property, +which will return a dictionary containing: + + o A key named 'stats', containing all available stats. If the guest + doesn't support a particular stat, or if it couldn't be retrieved, + its value will be -1. Currently, the following stats are supported: + + - stat-swap-in + - stat-swap-out + - stat-major-faults + - stat-minor-faults + - stat-free-memory + - stat-total-memory + + o A key named last-update, which contains the last stats update + timestamp in seconds. Since this timestamp is generated by the host, + a buggy guest can't influence its value + +It's also important to note the following: + + - Previously polled statistics remain available even if the polling is + later disabled + + - As noted above, if a guest doesn't support a particular stat its value + will always be -1. However, it's also possible that a guest temporarily + couldn't update one or even all stats. If this happens, just wait for + the next update + + - Polling can be enabled even if the guest doesn't have stats support + or the balloon driver wasn't loaded in the guest. If this is the case + and stats are queried, an error will be returned + + - The polling timer is only re-armed when the guest responds to the + statistics request. This means that if a (buggy) guest doesn't ever + respond to the request the timer will never be re-armed, which has + the same effect as disabling polling + +Here are a few examples. QEMU is started with '-balloon virtio', which +generates '/machine/peripheral-anon/device[1]' as the QOM path for the +balloon device. + +Enable polling with 2 seconds interval: + +{ "execute": "qom-set", + "arguments": { "path": "/machine/peripheral-anon/device[1]", + "property": "guest-stats-polling-interval", "value": 2 } } + +{ "return": {} } + +Change polling to 10 seconds: + +{ "execute": "qom-set", + "arguments": { "path": "/machine/peripheral-anon/device[1]", + "property": "guest-stats-polling-interval", "value": 10 } } + +{ "return": {} } + +Get stats: + +{ "execute": "qom-get", + "arguments": { "path": "/machine/peripheral-anon/device[1]", + "property": "guest-stats" } } +{ + "return": { + "stats": { + "stat-swap-out": 0, + "stat-free-memory": 844943360, + "stat-minor-faults": 219028, + "stat-major-faults": 235, + "stat-total-memory": 1044406272, + "stat-swap-in": 0 + }, + "last-update": 1358529861 + } +} + +Disable polling: + +{ "execute": "qom-set", + "arguments": { "path": "/machine/peripheral-anon/device[1]", + "property": "stats-polling-interval", "value": 0 } } + +{ "return": {} } diff --git a/hmp-commands.hx b/hmp-commands.hx index 0934b9b..bdd48f3 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -837,6 +837,45 @@ STEXI @item nmi @var{cpu} @findex nmi Inject an NMI on the given CPU (x86 only). + +ETEXI + + { + .name = "memchar_write", + .args_type = "device:s,data:s", + .params = "device data", + .help = "Provide writing interface for CirMemCharDriver. Write" + "'data' to it.", + .mhandler.cmd = hmp_memchar_write, + }, + +STEXI +@item memchar_write @var{device} @var{data} +@findex memchar_write +Provide writing interface for CirMemCharDriver. Write @var{data} +to char device 'memory'. + +ETEXI + + { + .name = "memchar_read", + .args_type = "device:s,size:i", + .params = "device size", + .help = "Provide read interface for CirMemCharDriver. Read from" + "it and return the data with size.", + .mhandler.cmd = hmp_memchar_read, + }, + +STEXI +@item memchar_read @var{device} +@findex memchar_read +Provide read interface for CirMemCharDriver. Read from char device +'memory' and return the data. + +@var{size} is the size of data want to read from. Refer to unencoded +size of the raw data, would adjust to the init size of the memchar +if the requested size is larger than it. + ETEXI { @@ -465,29 +465,7 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict) return; } - monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20); - if (info->has_mem_swapped_in) { - monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in); - } - if (info->has_mem_swapped_out) { - monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out); - } - if (info->has_major_page_faults) { - monitor_printf(mon, " major_page_faults=%" PRId64, - info->major_page_faults); - } - if (info->has_minor_page_faults) { - monitor_printf(mon, " minor_page_faults=%" PRId64, - info->minor_page_faults); - } - if (info->has_free_mem) { - monitor_printf(mon, " free_mem=%" PRId64, info->free_mem); - } - if (info->has_total_mem) { - monitor_printf(mon, " total_mem=%" PRId64, info->total_mem); - } - - monitor_printf(mon, "\n"); + monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20); qapi_free_BalloonInfo(info); } @@ -684,6 +662,40 @@ void hmp_pmemsave(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &errp); } +void hmp_memchar_write(Monitor *mon, const QDict *qdict) +{ + uint32_t size; + const char *chardev = qdict_get_str(qdict, "device"); + const char *data = qdict_get_str(qdict, "data"); + Error *errp = NULL; + + size = strlen(data); + qmp_memchar_write(chardev, size, data, false, 0, &errp); + + hmp_handle_error(mon, &errp); +} + +void hmp_memchar_read(Monitor *mon, const QDict *qdict) +{ + uint32_t size = qdict_get_int(qdict, "size"); + const char *chardev = qdict_get_str(qdict, "device"); + MemCharRead *meminfo; + Error *errp = NULL; + + meminfo = qmp_memchar_read(chardev, size, false, 0, &errp); + if (errp) { + monitor_printf(mon, "%s\n", error_get_pretty(errp)); + error_free(errp); + return; + } + + if (meminfo->count > 0) { + monitor_printf(mon, "%s\n", meminfo->data); + } + + qapi_free_MemCharRead(meminfo); +} + static void hmp_cont_cb(void *opaque, int err) { if (!err) { @@ -796,7 +808,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict) qmp_drive_mirror(device, filename, !!format, format, full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, - true, mode, false, 0, + true, mode, false, 0, false, 0, false, 0, false, 0, false, 0, &errp); hmp_handle_error(mon, &errp); } @@ -43,6 +43,8 @@ void hmp_system_powerdown(Monitor *mon, const QDict *qdict); void hmp_cpu(Monitor *mon, const QDict *qdict); void hmp_memsave(Monitor *mon, const QDict *qdict); void hmp_pmemsave(Monitor *mon, const QDict *qdict); +void hmp_memchar_write(Monitor *mon, const QDict *qdict); +void hmp_memchar_read(Monitor *mon, const QDict *qdict); void hmp_cont(Monitor *mon, const QDict *qdict); void hmp_system_wakeup(Monitor *mon, const QDict *qdict); void hmp_inject_nmi(Monitor *mon, const QDict *qdict); diff --git a/hw/boards.h b/hw/boards.h index 3ff9665..3813d4e 100644 --- a/hw/boards.h +++ b/hw/boards.h @@ -33,6 +33,7 @@ typedef struct QEMUMachine { unsigned int no_serial:1, no_parallel:1, use_virtcon:1, + use_sclp:1, no_floppy:1, no_cdrom:1, no_sdcard:1; diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c index bdcd836..02618f2 100644 --- a/hw/fw_cfg.c +++ b/hw/fw_cfg.c @@ -504,7 +504,6 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port, fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16); fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type == DT_NOGRAPHIC)); fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus); - fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu); fw_cfg_bootsplash(s); fw_cfg_reboot(s); diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 21f50ea..ad0094f 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -241,7 +241,7 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val) if ((pr->cmd & PORT_CMD_FIS_ON) && !s->dev[port].init_d2h_sent) { ahci_init_d2h(&s->dev[port]); - s->dev[port].init_d2h_sent = 1; + s->dev[port].init_d2h_sent = true; } check_cmd(s, port); @@ -494,7 +494,7 @@ static void ahci_reset_port(AHCIState *s, int port) pr->scr_err = 0; pr->scr_act = 0; d->busy_slot = -1; - d->init_d2h_sent = 0; + d->init_d2h_sent = false; ide_state = &s->dev[port].port.ifs[0]; if (!ide_state->bs) { @@ -946,7 +946,7 @@ static int handle_cmd(AHCIState *s, int port, int slot) ide_state->hcyl = 0xeb; debug_print_fis(ide_state->io_buffer, 0x10); ide_state->feature = IDE_FEATURE_DMA; - s->dev[port].done_atapi_packet = 0; + s->dev[port].done_atapi_packet = false; /* XXX send PIO setup FIS */ } @@ -991,7 +991,7 @@ static int ahci_start_transfer(IDEDMA *dma) if (is_atapi && !ad->done_atapi_packet) { /* already prepopulated iobuffer */ - ad->done_atapi_packet = 1; + ad->done_atapi_packet = true; goto out; } @@ -1035,11 +1035,10 @@ out: static void ahci_start_dma(IDEDMA *dma, IDEState *s, BlockDriverCompletionFunc *dma_cb) { +#ifdef DEBUG_AHCI AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); - +#endif DPRINTF(ad->port_no, "\n"); - ad->dma_cb = dma_cb; - ad->dma_status |= BM_STATUS_DMAING; s->io_buffer_offset = 0; dma_cb(s, 0); } @@ -1095,7 +1094,6 @@ static int ahci_dma_set_unit(IDEDMA *dma, int unit) static int ahci_dma_add_status(IDEDMA *dma, int status) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); - ad->dma_status |= status; DPRINTF(ad->port_no, "set status: %x\n", status); if (status & BM_STATUS_INT) { @@ -1114,8 +1112,6 @@ static int ahci_dma_set_inactive(IDEDMA *dma) /* update d2h status */ ahci_write_fis_d2h(ad, NULL); - ad->dma_cb = NULL; - if (!ad->check_bh) { /* maybe we still have something to process, check later */ ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad); @@ -1203,6 +1199,82 @@ void ahci_reset(AHCIState *s) } } +static const VMStateDescription vmstate_ahci_device = { + .name = "ahci port", + .version_id = 1, + .fields = (VMStateField []) { + VMSTATE_IDE_BUS(port, AHCIDevice), + VMSTATE_UINT32(port_state, AHCIDevice), + VMSTATE_UINT32(finished, AHCIDevice), + VMSTATE_UINT32(port_regs.lst_addr, AHCIDevice), + VMSTATE_UINT32(port_regs.lst_addr_hi, AHCIDevice), + VMSTATE_UINT32(port_regs.fis_addr, AHCIDevice), + VMSTATE_UINT32(port_regs.fis_addr_hi, AHCIDevice), + VMSTATE_UINT32(port_regs.irq_stat, AHCIDevice), + VMSTATE_UINT32(port_regs.irq_mask, AHCIDevice), + VMSTATE_UINT32(port_regs.cmd, AHCIDevice), + VMSTATE_UINT32(port_regs.tfdata, AHCIDevice), + VMSTATE_UINT32(port_regs.sig, AHCIDevice), + VMSTATE_UINT32(port_regs.scr_stat, AHCIDevice), + VMSTATE_UINT32(port_regs.scr_ctl, AHCIDevice), + VMSTATE_UINT32(port_regs.scr_err, AHCIDevice), + VMSTATE_UINT32(port_regs.scr_act, AHCIDevice), + VMSTATE_UINT32(port_regs.cmd_issue, AHCIDevice), + VMSTATE_BOOL(done_atapi_packet, AHCIDevice), + VMSTATE_INT32(busy_slot, AHCIDevice), + VMSTATE_BOOL(init_d2h_sent, AHCIDevice), + VMSTATE_END_OF_LIST() + }, +}; + +static int ahci_state_post_load(void *opaque, int version_id) +{ + int i; + struct AHCIDevice *ad; + AHCIState *s = opaque; + + for (i = 0; i < s->ports; i++) { + ad = &s->dev[i]; + AHCIPortRegs *pr = &ad->port_regs; + + map_page(&ad->lst, + ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024); + map_page(&ad->res_fis, + ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256); + /* + * All pending i/o should be flushed out on a migrate. However, + * we might not have cleared the busy_slot since this is done + * in a bh. Also, issue i/o against any slots that are pending. + */ + if ((ad->busy_slot != -1) && + !(ad->port.ifs[0].status & (BUSY_STAT|DRQ_STAT))) { + pr->cmd_issue &= ~(1 << ad->busy_slot); + ad->busy_slot = -1; + } + check_cmd(s, i); + } + + return 0; +} + +const VMStateDescription vmstate_ahci = { + .name = "ahci", + .version_id = 1, + .post_load = ahci_state_post_load, + .fields = (VMStateField []) { + VMSTATE_STRUCT_VARRAY_POINTER_INT32(dev, AHCIState, ports, + vmstate_ahci_device, AHCIDevice), + VMSTATE_UINT32(control_regs.cap, AHCIState), + VMSTATE_UINT32(control_regs.ghc, AHCIState), + VMSTATE_UINT32(control_regs.irqstatus, AHCIState), + VMSTATE_UINT32(control_regs.impl, AHCIState), + VMSTATE_UINT32(control_regs.version, AHCIState), + VMSTATE_UINT32(idp_index, AHCIState), + VMSTATE_INT32(ports, AHCIState), + VMSTATE_END_OF_LIST() + }, +}; + typedef struct SysbusAHCIState { SysBusDevice busdev; AHCIState ahci; @@ -1211,7 +1283,11 @@ typedef struct SysbusAHCIState { static const VMStateDescription vmstate_sysbus_ahci = { .name = "sysbus-ahci", - .unmigratable = 1, + .unmigratable = 1, /* Still buggy under I/O load */ + .fields = (VMStateField []) { + VMSTATE_AHCI(ahci, AHCIPCIState), + VMSTATE_END_OF_LIST() + }, }; static void sysbus_ahci_reset(DeviceState *dev) diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h index 1200a56..85f37fe 100644 --- a/hw/ide/ahci.h +++ b/hw/ide/ahci.h @@ -281,11 +281,9 @@ struct AHCIDevice { QEMUBH *check_bh; uint8_t *lst; uint8_t *res_fis; - int dma_status; - int done_atapi_packet; - int busy_slot; - int init_d2h_sent; - BlockDriverCompletionFunc *dma_cb; + bool done_atapi_packet; + int32_t busy_slot; + bool init_d2h_sent; AHCICmdHdr *cur_cmd; NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; }; @@ -297,7 +295,7 @@ typedef struct AHCIState { MemoryRegion idp; /* Index-Data Pair I/O port space */ unsigned idp_offset; /* Offset of index in I/O port space */ uint32_t idp_index; /* Current IDP index */ - int ports; + int32_t ports; qemu_irq irq; DMAContext *dma; } AHCIState; @@ -307,6 +305,16 @@ typedef struct AHCIPCIState { AHCIState ahci; } AHCIPCIState; +extern const VMStateDescription vmstate_ahci; + +#define VMSTATE_AHCI(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(AHCIState), \ + .vmsd = &vmstate_ahci, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, AHCIState), \ +} + typedef struct NCQFrame { uint8_t fis_type; uint8_t c; diff --git a/hw/ide/core.c b/hw/ide/core.c index 14ad079..3743dc3 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1149,8 +1149,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) } ide_set_irq(s->bus); break; + case WIN_VERIFY_EXT: - lba48 = 1; + lba48 = 1; + /* fall through */ case WIN_VERIFY: case WIN_VERIFY_ONCE: /* do sector number check ? */ @@ -1158,8 +1160,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) s->status = READY_STAT | SEEK_STAT; ide_set_irq(s->bus); break; + case WIN_READ_EXT: - lba48 = 1; + lba48 = 1; + /* fall through */ case WIN_READ: case WIN_READ_ONCE: if (s->drive_kind == IDE_CD) { @@ -1173,8 +1177,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) s->req_nb_sectors = 1; ide_sector_read(s); break; + case WIN_WRITE_EXT: - lba48 = 1; + lba48 = 1; + /* fall through */ case WIN_WRITE: case WIN_WRITE_ONCE: case CFA_WRITE_SECT_WO_ERASE: @@ -1189,8 +1195,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) ide_transfer_start(s, s->io_buffer, 512, ide_sector_write); s->media_changed = 1; break; + case WIN_MULTREAD_EXT: - lba48 = 1; + lba48 = 1; + /* fall through */ case WIN_MULTREAD: if (!s->bs) { goto abort_cmd; @@ -1202,8 +1210,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) s->req_nb_sectors = s->mult_sectors; ide_sector_read(s); break; + case WIN_MULTWRITE_EXT: - lba48 = 1; + lba48 = 1; + /* fall through */ case WIN_MULTWRITE: case CFA_WRITE_MULTI_WO_ERASE: if (!s->bs) { @@ -1222,8 +1232,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) ide_transfer_start(s, s->io_buffer, 512 * n, ide_sector_write); s->media_changed = 1; break; + case WIN_READDMA_EXT: - lba48 = 1; + lba48 = 1; + /* fall through */ case WIN_READDMA: case WIN_READDMA_ONCE: if (!s->bs) { @@ -1232,8 +1244,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) ide_cmd_lba48_transform(s, lba48); ide_sector_start_dma(s, IDE_DMA_READ); break; + case WIN_WRITEDMA_EXT: - lba48 = 1; + lba48 = 1; + /* fall through */ case WIN_WRITEDMA: case WIN_WRITEDMA_ONCE: if (!s->bs) { @@ -1243,14 +1257,17 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) ide_sector_start_dma(s, IDE_DMA_WRITE); s->media_changed = 1; break; + case WIN_READ_NATIVE_MAX_EXT: - lba48 = 1; + lba48 = 1; + /* fall through */ case WIN_READ_NATIVE_MAX: ide_cmd_lba48_transform(s, lba48); ide_set_sector(s, s->nb_sectors - 1); s->status = READY_STAT | SEEK_STAT; ide_set_irq(s->bus); break; + case WIN_CHECKPOWERMODE1: case WIN_CHECKPOWERMODE2: s->error = 0; diff --git a/hw/ide/ich.c b/hw/ide/ich.c index 1fb803d..cc30adc 100644 --- a/hw/ide/ich.c +++ b/hw/ide/ich.c @@ -79,9 +79,15 @@ #define ICH9_IDP_INDEX 0x10 #define ICH9_IDP_INDEX_LOG2 0x04 -static const VMStateDescription vmstate_ahci = { - .name = "ahci", - .unmigratable = 1, +static const VMStateDescription vmstate_ich9_ahci = { + .name = "ich9_ahci", + .unmigratable = 1, /* Still buggy under I/O load */ + .version_id = 1, + .fields = (VMStateField []) { + VMSTATE_PCI_DEVICE(card, AHCIPCIState), + VMSTATE_AHCI(ahci, AHCIPCIState), + VMSTATE_END_OF_LIST() + }, }; static void pci_ich9_reset(DeviceState *dev) @@ -152,7 +158,7 @@ static void ich_ahci_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82801IR; k->revision = 0x02; k->class_id = PCI_CLASS_STORAGE_SATA; - dc->vmsd = &vmstate_ahci; + dc->vmsd = &vmstate_ich9_ahci; dc->reset = pci_ich9_reset; } diff --git a/hw/m25p80.c b/hw/m25p80.c index d392656..788c196 100644 --- a/hw/m25p80.c +++ b/hw/m25p80.c @@ -358,6 +358,8 @@ static void complete_collecting_data(Flash *s) s->cur_addr |= s->data[1] << 8; s->cur_addr |= s->data[2]; + s->state = STATE_IDLE; + switch (s->cmd_in_progress) { case DPP: case QPP: @@ -551,6 +551,18 @@ int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) return index; } +/* Calculates the limit to CPU APIC ID values + * + * This function returns the limit for the APIC ID value, so that all + * CPU APIC IDs are < pc_apic_id_limit(). + * + * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). + */ +static unsigned int pc_apic_id_limit(unsigned int max_cpus) +{ + return x86_cpu_apic_id_from_index(max_cpus - 1) + 1; +} + static void *bochs_bios_init(void) { void *fw_cfg; @@ -558,9 +570,24 @@ static void *bochs_bios_init(void) size_t smbios_len; uint64_t *numa_fw_cfg; int i, j; + unsigned int apic_id_limit = pc_apic_id_limit(max_cpus); fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0); - + /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86: + * + * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug + * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC + * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the + * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS + * may see". + * + * So, this means we must not use max_cpus, here, but the maximum possible + * APIC ID value, plus one. + * + * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is + * the APIC ID, not the "CPU index" + */ + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)apic_id_limit); fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, @@ -579,21 +606,24 @@ static void *bochs_bios_init(void) * of nodes, one word for each VCPU->node and one word for each node to * hold the amount of memory. */ - numa_fw_cfg = g_new0(uint64_t, 1 + max_cpus + nb_numa_nodes); + numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes); numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); for (i = 0; i < max_cpus; i++) { + unsigned int apic_id = x86_cpu_apic_id_from_index(i); + assert(apic_id < apic_id_limit); for (j = 0; j < nb_numa_nodes; j++) { if (test_bit(i, node_cpumask[j])) { - numa_fw_cfg[i + 1] = cpu_to_le64(j); + numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); break; } } } for (i = 0; i < nb_numa_nodes; i++) { - numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]); + numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]); } fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, - (1 + max_cpus + nb_numa_nodes) * sizeof(*numa_fw_cfg)); + (1 + apic_id_limit + nb_numa_nodes) * + sizeof(*numa_fw_cfg)); return fw_cfg; } diff --git a/hw/pc_piix.c b/hw/pc_piix.c index 0a6923d..b9a9b2e 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -235,10 +235,18 @@ static void pc_init_pci(QEMUMachineInitArgs *args) static void pc_init_pci_1_3(QEMUMachineInitArgs *args) { - enable_kvm_pv_eoi(); + enable_compat_apic_id_mode(); pc_init_pci(args); } +/* PC machine init function for pc-0.14 to pc-1.2 */ +static void pc_init_pci_1_2(QEMUMachineInitArgs *args) +{ + disable_kvm_pv_eoi(); + pc_init_pci_1_3(args); +} + +/* PC init function for pc-0.10 to pc-0.13, and reused by xenfv */ static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args) { ram_addr_t ram_size = args->ram_size; @@ -247,6 +255,8 @@ static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args) const char *kernel_cmdline = args->kernel_cmdline; const char *initrd_filename = args->initrd_filename; const char *boot_device = args->boot_device; + disable_kvm_pv_eoi(); + enable_compat_apic_id_mode(); pc_init1(get_system_memory(), get_system_io(), ram_size, boot_device, @@ -264,6 +274,8 @@ static void pc_init_isa(QEMUMachineInitArgs *args) const char *boot_device = args->boot_device; if (cpu_model == NULL) cpu_model = "486"; + disable_kvm_pv_eoi(); + enable_compat_apic_id_mode(); pc_init1(get_system_memory(), get_system_io(), ram_size, boot_device, @@ -286,7 +298,7 @@ static QEMUMachine pc_i440fx_machine_v1_4 = { .name = "pc-i440fx-1.4", .alias = "pc", .desc = "Standard PC (i440FX + PIIX, 1996)", - .init = pc_init_pci_1_3, + .init = pc_init_pci, .max_cpus = 255, .is_default = 1, DEFAULT_MACHINE_OPTIONS, @@ -342,7 +354,7 @@ static QEMUMachine pc_machine_v1_3 = { static QEMUMachine pc_machine_v1_2 = { .name = "pc-1.2", .desc = "Standard PC", - .init = pc_init_pci, + .init = pc_init_pci_1_2, .max_cpus = 255, .compat_props = (GlobalProperty[]) { PC_COMPAT_1_2, @@ -386,7 +398,7 @@ static QEMUMachine pc_machine_v1_2 = { static QEMUMachine pc_machine_v1_1 = { .name = "pc-1.1", .desc = "Standard PC", - .init = pc_init_pci, + .init = pc_init_pci_1_2, .max_cpus = 255, .compat_props = (GlobalProperty[]) { PC_COMPAT_1_1, @@ -422,7 +434,7 @@ static QEMUMachine pc_machine_v1_1 = { static QEMUMachine pc_machine_v1_0 = { .name = "pc-1.0", .desc = "Standard PC", - .init = pc_init_pci, + .init = pc_init_pci_1_2, .max_cpus = 255, .compat_props = (GlobalProperty[]) { PC_COMPAT_1_0, @@ -438,7 +450,7 @@ static QEMUMachine pc_machine_v1_0 = { static QEMUMachine pc_machine_v0_15 = { .name = "pc-0.15", .desc = "Standard PC", - .init = pc_init_pci, + .init = pc_init_pci_1_2, .max_cpus = 255, .compat_props = (GlobalProperty[]) { PC_COMPAT_0_15, @@ -471,7 +483,7 @@ static QEMUMachine pc_machine_v0_15 = { static QEMUMachine pc_machine_v0_14 = { .name = "pc-0.14", .desc = "Standard PC", - .init = pc_init_pci, + .init = pc_init_pci_1_2, .max_cpus = 255, .compat_props = (GlobalProperty[]) { PC_COMPAT_0_14, diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 6de810b..065ea87 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -413,6 +413,7 @@ static void ppc_core99_init(QEMUMachineInitArgs *args) /* No PCI init: the BIOS will do it */ fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, machine_arch); diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 9ed303a..2778e45 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -299,6 +299,7 @@ static void ppc_heathrow_init(QEMUMachineInitArgs *args) /* No PCI init: the BIOS will do it */ fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_HEATHROW); diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs index 1b40c2e..9f2f419 100644 --- a/hw/s390x/Makefile.objs +++ b/hw/s390x/Makefile.objs @@ -1,8 +1,9 @@ obj-y = s390-virtio-bus.o s390-virtio.o - -obj-y := $(addprefix ../,$(obj-y)) obj-y += s390-virtio-hcall.o obj-y += sclp.o obj-y += event-facility.o obj-y += sclpquiesce.o sclpconsole.o obj-y += ipl.o +obj-y += css.o +obj-y += s390-virtio-ccw.o +obj-y += virtio-ccw.o diff --git a/hw/s390x/css.c b/hw/s390x/css.c new file mode 100644 index 0000000..3244201 --- /dev/null +++ b/hw/s390x/css.c @@ -0,0 +1,1277 @@ +/* + * Channel subsystem base support. + * + * Copyright 2012 IBM Corp. + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include <hw/qdev.h> +#include "qemu/bitops.h" +#include "cpu.h" +#include "ioinst.h" +#include "css.h" +#include "trace.h" + +typedef struct CrwContainer { + CRW crw; + QTAILQ_ENTRY(CrwContainer) sibling; +} CrwContainer; + +typedef struct ChpInfo { + uint8_t in_use; + uint8_t type; + uint8_t is_virtual; +} ChpInfo; + +typedef struct SubchSet { + SubchDev *sch[MAX_SCHID + 1]; + unsigned long schids_used[BITS_TO_LONGS(MAX_SCHID + 1)]; + unsigned long devnos_used[BITS_TO_LONGS(MAX_SCHID + 1)]; +} SubchSet; + +typedef struct CssImage { + SubchSet *sch_set[MAX_SSID + 1]; + ChpInfo chpids[MAX_CHPID + 1]; +} CssImage; + +typedef struct ChannelSubSys { + QTAILQ_HEAD(, CrwContainer) pending_crws; + bool do_crw_mchk; + bool crws_lost; + uint8_t max_cssid; + uint8_t max_ssid; + bool chnmon_active; + uint64_t chnmon_area; + CssImage *css[MAX_CSSID + 1]; + uint8_t default_cssid; +} ChannelSubSys; + +static ChannelSubSys *channel_subsys; + +int css_create_css_image(uint8_t cssid, bool default_image) +{ + trace_css_new_image(cssid, default_image ? "(default)" : ""); + if (cssid > MAX_CSSID) { + return -EINVAL; + } + if (channel_subsys->css[cssid]) { + return -EBUSY; + } + channel_subsys->css[cssid] = g_malloc0(sizeof(CssImage)); + if (default_image) { + channel_subsys->default_cssid = cssid; + } + return 0; +} + +static uint16_t css_build_subchannel_id(SubchDev *sch) +{ + if (channel_subsys->max_cssid > 0) { + return (sch->cssid << 8) | (1 << 3) | (sch->ssid << 1) | 1; + } + return (sch->ssid << 1) | 1; +} + +static void css_inject_io_interrupt(SubchDev *sch) +{ + S390CPU *cpu = s390_cpu_addr2state(0); + uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11; + + trace_css_io_interrupt(sch->cssid, sch->ssid, sch->schid, + sch->curr_status.pmcw.intparm, isc, ""); + s390_io_interrupt(cpu, + css_build_subchannel_id(sch), + sch->schid, + sch->curr_status.pmcw.intparm, + (0x80 >> isc) << 24); +} + +void css_conditional_io_interrupt(SubchDev *sch) +{ + /* + * If the subchannel is not currently status pending, make it pending + * with alert status. + */ + if (!(sch->curr_status.scsw.ctrl & SCSW_STCTL_STATUS_PEND)) { + S390CPU *cpu = s390_cpu_addr2state(0); + uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11; + + trace_css_io_interrupt(sch->cssid, sch->ssid, sch->schid, + sch->curr_status.pmcw.intparm, isc, + "(unsolicited)"); + sch->curr_status.scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; + sch->curr_status.scsw.ctrl |= + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + /* Inject an I/O interrupt. */ + s390_io_interrupt(cpu, + css_build_subchannel_id(sch), + sch->schid, + sch->curr_status.pmcw.intparm, + (0x80 >> isc) << 24); + } +} + +static void sch_handle_clear_func(SubchDev *sch) +{ + PMCW *p = &sch->curr_status.pmcw; + SCSW *s = &sch->curr_status.scsw; + int path; + + /* Path management: In our simple css, we always choose the only path. */ + path = 0x80; + + /* Reset values prior to 'issueing the clear signal'. */ + p->lpum = 0; + p->pom = 0xff; + s->flags &= ~SCSW_FLAGS_MASK_PNO; + + /* We always 'attempt to issue the clear signal', and we always succeed. */ + sch->orb = NULL; + sch->channel_prog = 0x0; + sch->last_cmd_valid = false; + s->ctrl &= ~SCSW_ACTL_CLEAR_PEND; + s->ctrl |= SCSW_STCTL_STATUS_PEND; + + s->dstat = 0; + s->cstat = 0; + p->lpum = path; + +} + +static void sch_handle_halt_func(SubchDev *sch) +{ + + PMCW *p = &sch->curr_status.pmcw; + SCSW *s = &sch->curr_status.scsw; + int path; + + /* Path management: In our simple css, we always choose the only path. */ + path = 0x80; + + /* We always 'attempt to issue the halt signal', and we always succeed. */ + sch->orb = NULL; + sch->channel_prog = 0x0; + sch->last_cmd_valid = false; + s->ctrl &= ~SCSW_ACTL_HALT_PEND; + s->ctrl |= SCSW_STCTL_STATUS_PEND; + + if ((s->ctrl & (SCSW_ACTL_SUBCH_ACTIVE | SCSW_ACTL_DEVICE_ACTIVE)) || + !((s->ctrl & SCSW_ACTL_START_PEND) || + (s->ctrl & SCSW_ACTL_SUSP))) { + s->dstat = SCSW_DSTAT_DEVICE_END; + } + s->cstat = 0; + p->lpum = path; + +} + +static void copy_sense_id_to_guest(SenseId *dest, SenseId *src) +{ + int i; + + dest->reserved = src->reserved; + dest->cu_type = cpu_to_be16(src->cu_type); + dest->cu_model = src->cu_model; + dest->dev_type = cpu_to_be16(src->dev_type); + dest->dev_model = src->dev_model; + dest->unused = src->unused; + for (i = 0; i < ARRAY_SIZE(dest->ciw); i++) { + dest->ciw[i].type = src->ciw[i].type; + dest->ciw[i].command = src->ciw[i].command; + dest->ciw[i].count = cpu_to_be16(src->ciw[i].count); + } +} + +static CCW1 copy_ccw_from_guest(hwaddr addr) +{ + CCW1 tmp; + CCW1 ret; + + cpu_physical_memory_read(addr, &tmp, sizeof(tmp)); + ret.cmd_code = tmp.cmd_code; + ret.flags = tmp.flags; + ret.count = be16_to_cpu(tmp.count); + ret.cda = be32_to_cpu(tmp.cda); + + return ret; +} + +static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr) +{ + int ret; + bool check_len; + int len; + CCW1 ccw; + + if (!ccw_addr) { + return -EIO; + } + + ccw = copy_ccw_from_guest(ccw_addr); + + /* Check for invalid command codes. */ + if ((ccw.cmd_code & 0x0f) == 0) { + return -EINVAL; + } + if (((ccw.cmd_code & 0x0f) == CCW_CMD_TIC) && + ((ccw.cmd_code & 0xf0) != 0)) { + return -EINVAL; + } + + if (ccw.flags & CCW_FLAG_SUSPEND) { + return -EINPROGRESS; + } + + check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC)); + + /* Look at the command. */ + switch (ccw.cmd_code) { + case CCW_CMD_NOOP: + /* Nothing to do. */ + ret = 0; + break; + case CCW_CMD_BASIC_SENSE: + if (check_len) { + if (ccw.count != sizeof(sch->sense_data)) { + ret = -EINVAL; + break; + } + } + len = MIN(ccw.count, sizeof(sch->sense_data)); + cpu_physical_memory_write(ccw.cda, sch->sense_data, len); + sch->curr_status.scsw.count = ccw.count - len; + memset(sch->sense_data, 0, sizeof(sch->sense_data)); + ret = 0; + break; + case CCW_CMD_SENSE_ID: + { + SenseId sense_id; + + copy_sense_id_to_guest(&sense_id, &sch->id); + /* Sense ID information is device specific. */ + if (check_len) { + if (ccw.count != sizeof(sense_id)) { + ret = -EINVAL; + break; + } + } + len = MIN(ccw.count, sizeof(sense_id)); + /* + * Only indicate 0xff in the first sense byte if we actually + * have enough place to store at least bytes 0-3. + */ + if (len >= 4) { + sense_id.reserved = 0xff; + } else { + sense_id.reserved = 0; + } + cpu_physical_memory_write(ccw.cda, &sense_id, len); + sch->curr_status.scsw.count = ccw.count - len; + ret = 0; + break; + } + case CCW_CMD_TIC: + if (sch->last_cmd_valid && (sch->last_cmd.cmd_code == CCW_CMD_TIC)) { + ret = -EINVAL; + break; + } + if (ccw.flags & (CCW_FLAG_CC | CCW_FLAG_DC)) { + ret = -EINVAL; + break; + } + sch->channel_prog = ccw.cda; + ret = -EAGAIN; + break; + default: + if (sch->ccw_cb) { + /* Handle device specific commands. */ + ret = sch->ccw_cb(sch, ccw); + } else { + ret = -ENOSYS; + } + break; + } + sch->last_cmd = ccw; + sch->last_cmd_valid = true; + if (ret == 0) { + if (ccw.flags & CCW_FLAG_CC) { + sch->channel_prog += 8; + ret = -EAGAIN; + } + } + + return ret; +} + +static void sch_handle_start_func(SubchDev *sch) +{ + + PMCW *p = &sch->curr_status.pmcw; + SCSW *s = &sch->curr_status.scsw; + ORB *orb = sch->orb; + int path; + int ret; + + /* Path management: In our simple css, we always choose the only path. */ + path = 0x80; + + if (!(s->ctrl & SCSW_ACTL_SUSP)) { + /* Look at the orb and try to execute the channel program. */ + p->intparm = orb->intparm; + if (!(orb->lpm & path)) { + /* Generate a deferred cc 3 condition. */ + s->flags |= SCSW_FLAGS_MASK_CC; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND); + return; + } + } else { + s->ctrl &= ~(SCSW_ACTL_SUSP | SCSW_ACTL_RESUME_PEND); + } + sch->last_cmd_valid = false; + do { + ret = css_interpret_ccw(sch, sch->channel_prog); + switch (ret) { + case -EAGAIN: + /* ccw chain, continue processing */ + break; + case 0: + /* success */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_STATUS_PEND; + s->dstat = SCSW_DSTAT_CHANNEL_END | SCSW_DSTAT_DEVICE_END; + break; + case -ENOSYS: + /* unsupported command, generate unit check (command reject) */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->dstat = SCSW_DSTAT_UNIT_CHECK; + /* Set sense bit 0 in ecw0. */ + sch->sense_data[0] = 0x80; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + break; + case -EFAULT: + /* memory problem, generate channel data check */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->cstat = SCSW_CSTAT_DATA_CHECK; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + break; + case -EBUSY: + /* subchannel busy, generate deferred cc 1 */ + s->flags &= ~SCSW_FLAGS_MASK_CC; + s->flags |= (1 << 8); + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + break; + case -EINPROGRESS: + /* channel program has been suspended */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->ctrl |= SCSW_ACTL_SUSP; + break; + default: + /* error, generate channel program check */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->cstat = SCSW_CSTAT_PROG_CHECK; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + break; + } + } while (ret == -EAGAIN); + +} + +/* + * On real machines, this would run asynchronously to the main vcpus. + * We might want to make some parts of the ssch handling (interpreting + * read/writes) asynchronous later on if we start supporting more than + * our current very simple devices. + */ +static void do_subchannel_work(SubchDev *sch) +{ + + SCSW *s = &sch->curr_status.scsw; + + if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) { + sch_handle_clear_func(sch); + } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) { + sch_handle_halt_func(sch); + } else if (s->ctrl & SCSW_FCTL_START_FUNC) { + sch_handle_start_func(sch); + } else { + /* Cannot happen. */ + return; + } + css_inject_io_interrupt(sch); +} + +static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src) +{ + int i; + + dest->intparm = cpu_to_be32(src->intparm); + dest->flags = cpu_to_be16(src->flags); + dest->devno = cpu_to_be16(src->devno); + dest->lpm = src->lpm; + dest->pnom = src->pnom; + dest->lpum = src->lpum; + dest->pim = src->pim; + dest->mbi = cpu_to_be16(src->mbi); + dest->pom = src->pom; + dest->pam = src->pam; + for (i = 0; i < ARRAY_SIZE(dest->chpid); i++) { + dest->chpid[i] = src->chpid[i]; + } + dest->chars = cpu_to_be32(src->chars); +} + +static void copy_scsw_to_guest(SCSW *dest, const SCSW *src) +{ + dest->flags = cpu_to_be16(src->flags); + dest->ctrl = cpu_to_be16(src->ctrl); + dest->cpa = cpu_to_be32(src->cpa); + dest->dstat = src->dstat; + dest->cstat = src->cstat; + dest->count = cpu_to_be16(src->count); +} + +static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src) +{ + int i; + + copy_pmcw_to_guest(&dest->pmcw, &src->pmcw); + copy_scsw_to_guest(&dest->scsw, &src->scsw); + dest->mba = cpu_to_be64(src->mba); + for (i = 0; i < ARRAY_SIZE(dest->mda); i++) { + dest->mda[i] = src->mda[i]; + } +} + +int css_do_stsch(SubchDev *sch, SCHIB *schib) +{ + /* Use current status. */ + copy_schib_to_guest(schib, &sch->curr_status); + return 0; +} + +static void copy_pmcw_from_guest(PMCW *dest, const PMCW *src) +{ + int i; + + dest->intparm = be32_to_cpu(src->intparm); + dest->flags = be16_to_cpu(src->flags); + dest->devno = be16_to_cpu(src->devno); + dest->lpm = src->lpm; + dest->pnom = src->pnom; + dest->lpum = src->lpum; + dest->pim = src->pim; + dest->mbi = be16_to_cpu(src->mbi); + dest->pom = src->pom; + dest->pam = src->pam; + for (i = 0; i < ARRAY_SIZE(dest->chpid); i++) { + dest->chpid[i] = src->chpid[i]; + } + dest->chars = be32_to_cpu(src->chars); +} + +static void copy_scsw_from_guest(SCSW *dest, const SCSW *src) +{ + dest->flags = be16_to_cpu(src->flags); + dest->ctrl = be16_to_cpu(src->ctrl); + dest->cpa = be32_to_cpu(src->cpa); + dest->dstat = src->dstat; + dest->cstat = src->cstat; + dest->count = be16_to_cpu(src->count); +} + +static void copy_schib_from_guest(SCHIB *dest, const SCHIB *src) +{ + int i; + + copy_pmcw_from_guest(&dest->pmcw, &src->pmcw); + copy_scsw_from_guest(&dest->scsw, &src->scsw); + dest->mba = be64_to_cpu(src->mba); + for (i = 0; i < ARRAY_SIZE(dest->mda); i++) { + dest->mda[i] = src->mda[i]; + } +} + +int css_do_msch(SubchDev *sch, SCHIB *orig_schib) +{ + SCSW *s = &sch->curr_status.scsw; + PMCW *p = &sch->curr_status.pmcw; + int ret; + SCHIB schib; + + if (!(sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_DNV)) { + ret = 0; + goto out; + } + + if (s->ctrl & SCSW_STCTL_STATUS_PEND) { + ret = -EINPROGRESS; + goto out; + } + + if (s->ctrl & + (SCSW_FCTL_START_FUNC|SCSW_FCTL_HALT_FUNC|SCSW_FCTL_CLEAR_FUNC)) { + ret = -EBUSY; + goto out; + } + + copy_schib_from_guest(&schib, orig_schib); + /* Only update the program-modifiable fields. */ + p->intparm = schib.pmcw.intparm; + p->flags &= ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA | + PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME | + PMCW_FLAGS_MASK_MP); + p->flags |= schib.pmcw.flags & + (PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA | + PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME | + PMCW_FLAGS_MASK_MP); + p->lpm = schib.pmcw.lpm; + p->mbi = schib.pmcw.mbi; + p->pom = schib.pmcw.pom; + p->chars &= ~(PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_CSENSE); + p->chars |= schib.pmcw.chars & + (PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_CSENSE); + sch->curr_status.mba = schib.mba; + + ret = 0; + +out: + return ret; +} + +int css_do_xsch(SubchDev *sch) +{ + SCSW *s = &sch->curr_status.scsw; + PMCW *p = &sch->curr_status.pmcw; + int ret; + + if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) { + ret = -ENODEV; + goto out; + } + + if (!(s->ctrl & SCSW_CTRL_MASK_FCTL) || + ((s->ctrl & SCSW_CTRL_MASK_FCTL) != SCSW_FCTL_START_FUNC) || + (!(s->ctrl & + (SCSW_ACTL_RESUME_PEND | SCSW_ACTL_START_PEND | SCSW_ACTL_SUSP))) || + (s->ctrl & SCSW_ACTL_SUBCH_ACTIVE)) { + ret = -EINPROGRESS; + goto out; + } + + if (s->ctrl & SCSW_CTRL_MASK_STCTL) { + ret = -EBUSY; + goto out; + } + + /* Cancel the current operation. */ + s->ctrl &= ~(SCSW_FCTL_START_FUNC | + SCSW_ACTL_RESUME_PEND | + SCSW_ACTL_START_PEND | + SCSW_ACTL_SUSP); + sch->channel_prog = 0x0; + sch->last_cmd_valid = false; + sch->orb = NULL; + s->dstat = 0; + s->cstat = 0; + ret = 0; + +out: + return ret; +} + +int css_do_csch(SubchDev *sch) +{ + SCSW *s = &sch->curr_status.scsw; + PMCW *p = &sch->curr_status.pmcw; + int ret; + + if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) { + ret = -ENODEV; + goto out; + } + + /* Trigger the clear function. */ + s->ctrl &= ~(SCSW_CTRL_MASK_FCTL | SCSW_CTRL_MASK_ACTL); + s->ctrl |= SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_CLEAR_FUNC; + + do_subchannel_work(sch); + ret = 0; + +out: + return ret; +} + +int css_do_hsch(SubchDev *sch) +{ + SCSW *s = &sch->curr_status.scsw; + PMCW *p = &sch->curr_status.pmcw; + int ret; + + if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) { + ret = -ENODEV; + goto out; + } + + if (((s->ctrl & SCSW_CTRL_MASK_STCTL) == SCSW_STCTL_STATUS_PEND) || + (s->ctrl & (SCSW_STCTL_PRIMARY | + SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT))) { + ret = -EINPROGRESS; + goto out; + } + + if (s->ctrl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) { + ret = -EBUSY; + goto out; + } + + /* Trigger the halt function. */ + s->ctrl |= SCSW_FCTL_HALT_FUNC; + s->ctrl &= ~SCSW_FCTL_START_FUNC; + if (((s->ctrl & SCSW_CTRL_MASK_ACTL) == + (SCSW_ACTL_SUBCH_ACTIVE | SCSW_ACTL_DEVICE_ACTIVE)) && + ((s->ctrl & SCSW_CTRL_MASK_STCTL) == SCSW_STCTL_INTERMEDIATE)) { + s->ctrl &= ~SCSW_STCTL_STATUS_PEND; + } + s->ctrl |= SCSW_ACTL_HALT_PEND; + + do_subchannel_work(sch); + ret = 0; + +out: + return ret; +} + +static void css_update_chnmon(SubchDev *sch) +{ + if (!(sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_MME)) { + /* Not active. */ + return; + } + /* The counter is conveniently located at the beginning of the struct. */ + if (sch->curr_status.pmcw.chars & PMCW_CHARS_MASK_MBFC) { + /* Format 1, per-subchannel area. */ + uint32_t count; + + count = ldl_phys(sch->curr_status.mba); + count++; + stl_phys(sch->curr_status.mba, count); + } else { + /* Format 0, global area. */ + uint32_t offset; + uint16_t count; + + offset = sch->curr_status.pmcw.mbi << 5; + count = lduw_phys(channel_subsys->chnmon_area + offset); + count++; + stw_phys(channel_subsys->chnmon_area + offset, count); + } +} + +int css_do_ssch(SubchDev *sch, ORB *orb) +{ + SCSW *s = &sch->curr_status.scsw; + PMCW *p = &sch->curr_status.pmcw; + int ret; + + if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) { + ret = -ENODEV; + goto out; + } + + if (s->ctrl & SCSW_STCTL_STATUS_PEND) { + ret = -EINPROGRESS; + goto out; + } + + if (s->ctrl & (SCSW_FCTL_START_FUNC | + SCSW_FCTL_HALT_FUNC | + SCSW_FCTL_CLEAR_FUNC)) { + ret = -EBUSY; + goto out; + } + + /* If monitoring is active, update counter. */ + if (channel_subsys->chnmon_active) { + css_update_chnmon(sch); + } + sch->orb = orb; + sch->channel_prog = orb->cpa; + /* Trigger the start function. */ + s->ctrl |= (SCSW_FCTL_START_FUNC | SCSW_ACTL_START_PEND); + s->flags &= ~SCSW_FLAGS_MASK_PNO; + + do_subchannel_work(sch); + ret = 0; + +out: + return ret; +} + +static void copy_irb_to_guest(IRB *dest, const IRB *src) +{ + int i; + + copy_scsw_to_guest(&dest->scsw, &src->scsw); + + for (i = 0; i < ARRAY_SIZE(dest->esw); i++) { + dest->esw[i] = cpu_to_be32(src->esw[i]); + } + for (i = 0; i < ARRAY_SIZE(dest->ecw); i++) { + dest->ecw[i] = cpu_to_be32(src->ecw[i]); + } + for (i = 0; i < ARRAY_SIZE(dest->emw); i++) { + dest->emw[i] = cpu_to_be32(src->emw[i]); + } +} + +int css_do_tsch(SubchDev *sch, IRB *target_irb) +{ + SCSW *s = &sch->curr_status.scsw; + PMCW *p = &sch->curr_status.pmcw; + uint16_t stctl; + uint16_t fctl; + uint16_t actl; + IRB irb; + int ret; + + if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) { + ret = 3; + goto out; + } + + stctl = s->ctrl & SCSW_CTRL_MASK_STCTL; + fctl = s->ctrl & SCSW_CTRL_MASK_FCTL; + actl = s->ctrl & SCSW_CTRL_MASK_ACTL; + + /* Prepare the irb for the guest. */ + memset(&irb, 0, sizeof(IRB)); + + /* Copy scsw from current status. */ + memcpy(&irb.scsw, s, sizeof(SCSW)); + if (stctl & SCSW_STCTL_STATUS_PEND) { + if (s->cstat & (SCSW_CSTAT_DATA_CHECK | + SCSW_CSTAT_CHN_CTRL_CHK | + SCSW_CSTAT_INTF_CTRL_CHK)) { + irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF; + irb.esw[0] = 0x04804000; + } else { + irb.esw[0] = 0x00800000; + } + /* If a unit check is pending, copy sense data. */ + if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) && + (p->chars & PMCW_CHARS_MASK_CSENSE)) { + irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL; + memcpy(irb.ecw, sch->sense_data, sizeof(sch->sense_data)); + irb.esw[1] = 0x02000000 | (sizeof(sch->sense_data) << 8); + } + } + /* Store the irb to the guest. */ + copy_irb_to_guest(target_irb, &irb); + + /* Clear conditions on subchannel, if applicable. */ + if (stctl & SCSW_STCTL_STATUS_PEND) { + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + if ((stctl != (SCSW_STCTL_INTERMEDIATE | SCSW_STCTL_STATUS_PEND)) || + ((fctl & SCSW_FCTL_HALT_FUNC) && + (actl & SCSW_ACTL_SUSP))) { + s->ctrl &= ~SCSW_CTRL_MASK_FCTL; + } + if (stctl != (SCSW_STCTL_INTERMEDIATE | SCSW_STCTL_STATUS_PEND)) { + s->flags &= ~SCSW_FLAGS_MASK_PNO; + s->ctrl &= ~(SCSW_ACTL_RESUME_PEND | + SCSW_ACTL_START_PEND | + SCSW_ACTL_HALT_PEND | + SCSW_ACTL_CLEAR_PEND | + SCSW_ACTL_SUSP); + } else { + if ((actl & SCSW_ACTL_SUSP) && + (fctl & SCSW_FCTL_START_FUNC)) { + s->flags &= ~SCSW_FLAGS_MASK_PNO; + if (fctl & SCSW_FCTL_HALT_FUNC) { + s->ctrl &= ~(SCSW_ACTL_RESUME_PEND | + SCSW_ACTL_START_PEND | + SCSW_ACTL_HALT_PEND | + SCSW_ACTL_CLEAR_PEND | + SCSW_ACTL_SUSP); + } else { + s->ctrl &= ~SCSW_ACTL_RESUME_PEND; + } + } + } + /* Clear pending sense data. */ + if (p->chars & PMCW_CHARS_MASK_CSENSE) { + memset(sch->sense_data, 0 , sizeof(sch->sense_data)); + } + } + + ret = ((stctl & SCSW_STCTL_STATUS_PEND) == 0); + +out: + return ret; +} + +static void copy_crw_to_guest(CRW *dest, const CRW *src) +{ + dest->flags = cpu_to_be16(src->flags); + dest->rsid = cpu_to_be16(src->rsid); +} + +int css_do_stcrw(CRW *crw) +{ + CrwContainer *crw_cont; + int ret; + + crw_cont = QTAILQ_FIRST(&channel_subsys->pending_crws); + if (crw_cont) { + QTAILQ_REMOVE(&channel_subsys->pending_crws, crw_cont, sibling); + copy_crw_to_guest(crw, &crw_cont->crw); + g_free(crw_cont); + ret = 0; + } else { + /* List was empty, turn crw machine checks on again. */ + memset(crw, 0, sizeof(*crw)); + channel_subsys->do_crw_mchk = true; + ret = 1; + } + + return ret; +} + +int css_do_tpi(IOIntCode *int_code, int lowcore) +{ + /* No pending interrupts for !KVM. */ + return 0; + } + +int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid, + int rfmt, void *buf) +{ + int i, desc_size; + uint32_t words[8]; + uint32_t chpid_type_word; + CssImage *css; + + if (!m && !cssid) { + css = channel_subsys->css[channel_subsys->default_cssid]; + } else { + css = channel_subsys->css[cssid]; + } + if (!css) { + return 0; + } + desc_size = 0; + for (i = f_chpid; i <= l_chpid; i++) { + if (css->chpids[i].in_use) { + chpid_type_word = 0x80000000 | (css->chpids[i].type << 8) | i; + if (rfmt == 0) { + words[0] = cpu_to_be32(chpid_type_word); + words[1] = 0; + memcpy(buf + desc_size, words, 8); + desc_size += 8; + } else if (rfmt == 1) { + words[0] = cpu_to_be32(chpid_type_word); + words[1] = 0; + words[2] = 0; + words[3] = 0; + words[4] = 0; + words[5] = 0; + words[6] = 0; + words[7] = 0; + memcpy(buf + desc_size, words, 32); + desc_size += 32; + } + } + } + return desc_size; +} + +void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo) +{ + /* dct is currently ignored (not really meaningful for our devices) */ + /* TODO: Don't ignore mbk. */ + if (update && !channel_subsys->chnmon_active) { + /* Enable measuring. */ + channel_subsys->chnmon_area = mbo; + channel_subsys->chnmon_active = true; + } + if (!update && channel_subsys->chnmon_active) { + /* Disable measuring. */ + channel_subsys->chnmon_area = 0; + channel_subsys->chnmon_active = false; + } +} + +int css_do_rsch(SubchDev *sch) +{ + SCSW *s = &sch->curr_status.scsw; + PMCW *p = &sch->curr_status.pmcw; + int ret; + + if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) { + ret = -ENODEV; + goto out; + } + + if (s->ctrl & SCSW_STCTL_STATUS_PEND) { + ret = -EINPROGRESS; + goto out; + } + + if (((s->ctrl & SCSW_CTRL_MASK_FCTL) != SCSW_FCTL_START_FUNC) || + (s->ctrl & SCSW_ACTL_RESUME_PEND) || + (!(s->ctrl & SCSW_ACTL_SUSP))) { + ret = -EINVAL; + goto out; + } + + /* If monitoring is active, update counter. */ + if (channel_subsys->chnmon_active) { + css_update_chnmon(sch); + } + + s->ctrl |= SCSW_ACTL_RESUME_PEND; + do_subchannel_work(sch); + ret = 0; + +out: + return ret; +} + +int css_do_rchp(uint8_t cssid, uint8_t chpid) +{ + uint8_t real_cssid; + + if (cssid > channel_subsys->max_cssid) { + return -EINVAL; + } + if (channel_subsys->max_cssid == 0) { + real_cssid = channel_subsys->default_cssid; + } else { + real_cssid = cssid; + } + if (!channel_subsys->css[real_cssid]) { + return -EINVAL; + } + + if (!channel_subsys->css[real_cssid]->chpids[chpid].in_use) { + return -ENODEV; + } + + if (!channel_subsys->css[real_cssid]->chpids[chpid].is_virtual) { + fprintf(stderr, + "rchp unsupported for non-virtual chpid %x.%02x!\n", + real_cssid, chpid); + return -ENODEV; + } + + /* We don't really use a channel path, so we're done here. */ + css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT, + channel_subsys->max_cssid > 0 ? 1 : 0, chpid); + if (channel_subsys->max_cssid > 0) { + css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT, 0, real_cssid << 8); + } + return 0; +} + +bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid) +{ + SubchSet *set; + + if (cssid > MAX_CSSID || ssid > MAX_SSID || !channel_subsys->css[cssid] || + !channel_subsys->css[cssid]->sch_set[ssid]) { + return true; + } + set = channel_subsys->css[cssid]->sch_set[ssid]; + return schid > find_last_bit(set->schids_used, + (MAX_SCHID + 1) / sizeof(unsigned long)); +} + +static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type) +{ + CssImage *css; + + trace_css_chpid_add(cssid, chpid, type); + if (cssid > MAX_CSSID) { + return -EINVAL; + } + css = channel_subsys->css[cssid]; + if (!css) { + return -EINVAL; + } + if (css->chpids[chpid].in_use) { + return -EEXIST; + } + css->chpids[chpid].in_use = 1; + css->chpids[chpid].type = type; + css->chpids[chpid].is_virtual = 1; + + css_generate_chp_crws(cssid, chpid); + + return 0; +} + +void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type) +{ + PMCW *p = &sch->curr_status.pmcw; + SCSW *s = &sch->curr_status.scsw; + int i; + CssImage *css = channel_subsys->css[sch->cssid]; + + assert(css != NULL); + memset(p, 0, sizeof(PMCW)); + p->flags |= PMCW_FLAGS_MASK_DNV; + p->devno = sch->devno; + /* single path */ + p->pim = 0x80; + p->pom = 0xff; + p->pam = 0x80; + p->chpid[0] = chpid; + if (!css->chpids[chpid].in_use) { + css_add_virtual_chpid(sch->cssid, chpid, type); + } + + memset(s, 0, sizeof(SCSW)); + sch->curr_status.mba = 0; + for (i = 0; i < ARRAY_SIZE(sch->curr_status.mda); i++) { + sch->curr_status.mda[i] = 0; + } +} + +SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, uint16_t schid) +{ + uint8_t real_cssid; + + real_cssid = (!m && (cssid == 0)) ? channel_subsys->default_cssid : cssid; + + if (!channel_subsys->css[real_cssid]) { + return NULL; + } + + if (!channel_subsys->css[real_cssid]->sch_set[ssid]) { + return NULL; + } + + return channel_subsys->css[real_cssid]->sch_set[ssid]->sch[schid]; +} + +bool css_subch_visible(SubchDev *sch) +{ + if (sch->ssid > channel_subsys->max_ssid) { + return false; + } + + if (sch->cssid != channel_subsys->default_cssid) { + return (channel_subsys->max_cssid > 0); + } + + return true; +} + +bool css_present(uint8_t cssid) +{ + return (channel_subsys->css[cssid] != NULL); +} + +bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno) +{ + if (!channel_subsys->css[cssid]) { + return false; + } + if (!channel_subsys->css[cssid]->sch_set[ssid]) { + return false; + } + + return !!test_bit(devno, + channel_subsys->css[cssid]->sch_set[ssid]->devnos_used); +} + +void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid, + uint16_t devno, SubchDev *sch) +{ + CssImage *css; + SubchSet *s_set; + + trace_css_assign_subch(sch ? "assign" : "deassign", cssid, ssid, schid, + devno); + if (!channel_subsys->css[cssid]) { + fprintf(stderr, + "Suspicious call to %s (%x.%x.%04x) for non-existing css!\n", + __func__, cssid, ssid, schid); + return; + } + css = channel_subsys->css[cssid]; + + if (!css->sch_set[ssid]) { + css->sch_set[ssid] = g_malloc0(sizeof(SubchSet)); + } + s_set = css->sch_set[ssid]; + + s_set->sch[schid] = sch; + if (sch) { + set_bit(schid, s_set->schids_used); + set_bit(devno, s_set->devnos_used); + } else { + clear_bit(schid, s_set->schids_used); + clear_bit(devno, s_set->devnos_used); + } +} + +void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid) +{ + CrwContainer *crw_cont; + + trace_css_crw(rsc, erc, rsid, chain ? "(chained)" : ""); + /* TODO: Maybe use a static crw pool? */ + crw_cont = g_try_malloc0(sizeof(CrwContainer)); + if (!crw_cont) { + channel_subsys->crws_lost = true; + return; + } + crw_cont->crw.flags = (rsc << 8) | erc; + if (chain) { + crw_cont->crw.flags |= CRW_FLAGS_MASK_C; + } + crw_cont->crw.rsid = rsid; + if (channel_subsys->crws_lost) { + crw_cont->crw.flags |= CRW_FLAGS_MASK_R; + channel_subsys->crws_lost = false; + } + + QTAILQ_INSERT_TAIL(&channel_subsys->pending_crws, crw_cont, sibling); + + if (channel_subsys->do_crw_mchk) { + S390CPU *cpu = s390_cpu_addr2state(0); + + channel_subsys->do_crw_mchk = false; + /* Inject crw pending machine check. */ + s390_crw_mchk(cpu); + } +} + +void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, + int hotplugged, int add) +{ + uint8_t guest_cssid; + bool chain_crw; + + if (add && !hotplugged) { + return; + } + if (channel_subsys->max_cssid == 0) { + /* Default cssid shows up as 0. */ + guest_cssid = (cssid == channel_subsys->default_cssid) ? 0 : cssid; + } else { + /* Show real cssid to the guest. */ + guest_cssid = cssid; + } + /* + * Only notify for higher subchannel sets/channel subsystems if the + * guest has enabled it. + */ + if ((ssid > channel_subsys->max_ssid) || + (guest_cssid > channel_subsys->max_cssid) || + ((channel_subsys->max_cssid == 0) && + (cssid != channel_subsys->default_cssid))) { + return; + } + chain_crw = (channel_subsys->max_ssid > 0) || + (channel_subsys->max_cssid > 0); + css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, chain_crw ? 1 : 0, schid); + if (chain_crw) { + css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, 0, + (guest_cssid << 8) | (ssid << 4)); + } +} + +void css_generate_chp_crws(uint8_t cssid, uint8_t chpid) +{ + /* TODO */ +} + +int css_enable_mcsse(void) +{ + trace_css_enable_facility("mcsse"); + channel_subsys->max_cssid = MAX_CSSID; + return 0; +} + +int css_enable_mss(void) +{ + trace_css_enable_facility("mss"); + channel_subsys->max_ssid = MAX_SSID; + return 0; +} + +static void css_init(void) +{ + channel_subsys = g_malloc0(sizeof(*channel_subsys)); + QTAILQ_INIT(&channel_subsys->pending_crws); + channel_subsys->do_crw_mchk = true; + channel_subsys->crws_lost = false; + channel_subsys->chnmon_active = false; +} +machine_init(css_init); + +void css_reset_sch(SubchDev *sch) +{ + PMCW *p = &sch->curr_status.pmcw; + + p->intparm = 0; + p->flags &= ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA | + PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME | + PMCW_FLAGS_MASK_MP | PMCW_FLAGS_MASK_TF); + p->flags |= PMCW_FLAGS_MASK_DNV; + p->devno = sch->devno; + p->pim = 0x80; + p->lpm = p->pim; + p->pnom = 0; + p->lpum = 0; + p->mbi = 0; + p->pom = 0xff; + p->pam = 0x80; + p->chars &= ~(PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_XMWME | + PMCW_CHARS_MASK_CSENSE); + + memset(&sch->curr_status.scsw, 0, sizeof(sch->curr_status.scsw)); + sch->curr_status.mba = 0; + + sch->channel_prog = 0x0; + sch->last_cmd_valid = false; + sch->orb = NULL; +} + +void css_reset(void) +{ + CrwContainer *crw_cont; + + /* Clean up monitoring. */ + channel_subsys->chnmon_active = false; + channel_subsys->chnmon_area = 0; + + /* Clear pending CRWs. */ + while ((crw_cont = QTAILQ_FIRST(&channel_subsys->pending_crws))) { + QTAILQ_REMOVE(&channel_subsys->pending_crws, crw_cont, sibling); + g_free(crw_cont); + } + channel_subsys->do_crw_mchk = true; + channel_subsys->crws_lost = false; + + /* Reset maximum ids. */ + channel_subsys->max_cssid = 0; + channel_subsys->max_ssid = 0; +} diff --git a/hw/s390x/css.h b/hw/s390x/css.h new file mode 100644 index 0000000..85ed05d --- /dev/null +++ b/hw/s390x/css.h @@ -0,0 +1,99 @@ +/* + * Channel subsystem structures and definitions. + * + * Copyright 2012 IBM Corp. + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef CSS_H +#define CSS_H + +#include "ioinst.h" + +/* Channel subsystem constants. */ +#define MAX_SCHID 65535 +#define MAX_SSID 3 +#define MAX_CSSID 254 /* 255 is reserved */ +#define MAX_CHPID 255 + +#define MAX_CIWS 62 + +typedef struct CIW { + uint8_t type; + uint8_t command; + uint16_t count; +} QEMU_PACKED CIW; + +typedef struct SenseId { + /* common part */ + uint8_t reserved; /* always 0x'FF' */ + uint16_t cu_type; /* control unit type */ + uint8_t cu_model; /* control unit model */ + uint16_t dev_type; /* device type */ + uint8_t dev_model; /* device model */ + uint8_t unused; /* padding byte */ + /* extended part */ + CIW ciw[MAX_CIWS]; /* variable # of CIWs */ +} QEMU_PACKED SenseId; + +/* Channel measurements, from linux/drivers/s390/cio/cmf.c. */ +typedef struct CMB { + uint16_t ssch_rsch_count; + uint16_t sample_count; + uint32_t device_connect_time; + uint32_t function_pending_time; + uint32_t device_disconnect_time; + uint32_t control_unit_queuing_time; + uint32_t device_active_only_time; + uint32_t reserved[2]; +} QEMU_PACKED CMB; + +typedef struct CMBE { + uint32_t ssch_rsch_count; + uint32_t sample_count; + uint32_t device_connect_time; + uint32_t function_pending_time; + uint32_t device_disconnect_time; + uint32_t control_unit_queuing_time; + uint32_t device_active_only_time; + uint32_t device_busy_time; + uint32_t initial_command_response_time; + uint32_t reserved[7]; +} QEMU_PACKED CMBE; + +struct SubchDev { + /* channel-subsystem related things: */ + uint8_t cssid; + uint8_t ssid; + uint16_t schid; + uint16_t devno; + SCHIB curr_status; + uint8_t sense_data[32]; + hwaddr channel_prog; + CCW1 last_cmd; + bool last_cmd_valid; + ORB *orb; + /* transport-provided data: */ + int (*ccw_cb) (SubchDev *, CCW1); + SenseId id; + void *driver_data; +}; + +typedef SubchDev *(*css_subch_cb_func)(uint8_t m, uint8_t cssid, uint8_t ssid, + uint16_t schid); +int css_create_css_image(uint8_t cssid, bool default_image); +bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno); +void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid, + uint16_t devno, SubchDev *sch); +void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type); +void css_reset(void); +void css_reset_sch(SubchDev *sch); +void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid); +void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, + int hotplugged, int add); +void css_generate_chp_crws(uint8_t cssid, uint8_t chpid); +#endif diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 7cbbf99..86e8415 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -159,7 +159,7 @@ static void s390_ipl_class_init(ObjectClass *klass, void *data) dc->no_user = 1; } -static TypeInfo s390_ipl_info = { +static const TypeInfo s390_ipl_info = { .class_init = s390_ipl_class_init, .parent = TYPE_SYS_BUS_DEVICE, .name = "s390-ipl", diff --git a/hw/s390-virtio-bus.c b/hw/s390x/s390-virtio-bus.c index b5d1f2b..32f63b0 100644 --- a/hw/s390-virtio-bus.c +++ b/hw/s390x/s390-virtio-bus.c @@ -17,12 +17,12 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#include "hw.h" +#include "hw/hw.h" #include "block/block.h" #include "sysemu/sysemu.h" -#include "boards.h" +#include "hw/boards.h" #include "monitor/monitor.h" -#include "loader.h" +#include "hw/loader.h" #include "elf.h" #include "hw/virtio.h" #include "hw/virtio-rng.h" @@ -31,7 +31,7 @@ #include "hw/sysbus.h" #include "sysemu/kvm.h" -#include "hw/s390-virtio-bus.h" +#include "hw/s390x/s390-virtio-bus.h" #include "hw/virtio-bus.h" /* #define DEBUG_S390 */ @@ -508,6 +508,13 @@ static int s390_virtio_busdev_init(DeviceState *dev) return _info->init(_dev); } +static void s390_virtio_busdev_reset(DeviceState *dev) +{ + VirtIOS390Device *_dev = (VirtIOS390Device *)dev; + + virtio_reset(_dev->vdev); +} + static void virtio_s390_device_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -515,6 +522,7 @@ static void virtio_s390_device_class_init(ObjectClass *klass, void *data) dc->init = s390_virtio_busdev_init; dc->bus_type = TYPE_S390_VIRTIO_BUS; dc->unplug = qdev_simple_unplug_cb; + dc->reset = s390_virtio_busdev_reset; } static const TypeInfo virtio_s390_device_info = { diff --git a/hw/s390-virtio-bus.h b/hw/s390x/s390-virtio-bus.h index 438b37f..4aacf83 100644 --- a/hw/s390-virtio-bus.h +++ b/hw/s390x/s390-virtio-bus.h @@ -19,12 +19,12 @@ #ifndef HW_S390_VIRTIO_BUS_H #define HW_S390_VIRTIO_BUS_H 1 -#include "virtio-blk.h" -#include "virtio-net.h" -#include "virtio-rng.h" -#include "virtio-serial.h" -#include "virtio-scsi.h" -#include "virtio-bus.h" +#include "hw/virtio-blk.h" +#include "hw/virtio-net.h" +#include "hw/virtio-rng.h" +#include "hw/virtio-serial.h" +#include "hw/virtio-scsi.h" +#include "hw/virtio-bus.h" #define VIRTIO_DEV_OFFS_TYPE 0 /* 8 bits */ #define VIRTIO_DEV_OFFS_NUM_VQ 1 /* 8 bits */ diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c new file mode 100644 index 0000000..6549211 --- /dev/null +++ b/hw/s390x/s390-virtio-ccw.c @@ -0,0 +1,134 @@ +/* + * virtio ccw machine + * + * Copyright 2012 IBM Corp. + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include "hw/boards.h" +#include "exec/address-spaces.h" +#include "s390-virtio.h" +#include "sclp.h" +#include "ioinst.h" +#include "css.h" +#include "virtio-ccw.h" + +static int virtio_ccw_hcall_notify(const uint64_t *args) +{ + uint64_t subch_id = args[0]; + uint64_t queue = args[1]; + SubchDev *sch; + int cssid, ssid, schid, m; + + if (ioinst_disassemble_sch_ident(subch_id, &m, &cssid, &ssid, &schid)) { + return -EINVAL; + } + sch = css_find_subch(m, cssid, ssid, schid); + if (!sch || !css_subch_visible(sch)) { + return -EINVAL; + } + virtio_queue_notify(virtio_ccw_get_vdev(sch), queue); + return 0; + +} + +static int virtio_ccw_hcall_early_printk(const uint64_t *args) +{ + uint64_t mem = args[0]; + + if (mem < ram_size) { + /* Early printk */ + return 0; + } + return -EINVAL; +} + +static void virtio_ccw_register_hcalls(void) +{ + s390_register_virtio_hypercall(KVM_S390_VIRTIO_CCW_NOTIFY, + virtio_ccw_hcall_notify); + /* Tolerate early printk. */ + s390_register_virtio_hypercall(KVM_S390_VIRTIO_NOTIFY, + virtio_ccw_hcall_early_printk); +} + +static void ccw_init(QEMUMachineInitArgs *args) +{ + ram_addr_t my_ram_size = args->ram_size; + MemoryRegion *sysmem = get_system_memory(); + MemoryRegion *ram = g_new(MemoryRegion, 1); + int shift = 0; + uint8_t *storage_keys; + int ret; + VirtualCssBus *css_bus; + + /* s390x ram size detection needs a 16bit multiplier + an increment. So + guests > 64GB can be specified in 2MB steps etc. */ + while ((my_ram_size >> (20 + shift)) > 65535) { + shift++; + } + my_ram_size = my_ram_size >> (20 + shift) << (20 + shift); + + /* lets propagate the changed ram size into the global variable. */ + ram_size = my_ram_size; + + /* get a BUS */ + css_bus = virtual_css_bus_init(); + s390_sclp_init(); + s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline, + args->initrd_filename); + + /* register hypercalls */ + virtio_ccw_register_hcalls(); + + /* allocate RAM */ + memory_region_init_ram(ram, "s390.ram", my_ram_size); + vmstate_register_ram_global(ram); + memory_region_add_subregion(sysmem, 0, ram); + + /* allocate storage keys */ + storage_keys = g_malloc0(my_ram_size / TARGET_PAGE_SIZE); + + /* init CPUs */ + s390_init_cpus(args->cpu_model, storage_keys); + + if (kvm_enabled()) { + kvm_s390_enable_css_support(s390_cpu_addr2state(0)); + } + /* + * Create virtual css and set it as default so that non mcss-e + * enabled guests only see virtio devices. + */ + ret = css_create_css_image(VIRTUAL_CSSID, true); + assert(ret == 0); + + /* Create VirtIO network adapters */ + s390_create_virtio_net(BUS(css_bus), "virtio-net-ccw"); +} + +static QEMUMachine ccw_machine = { + .name = "s390-ccw-virtio", + .alias = "s390-ccw", + .desc = "VirtIO-ccw based S390 machine", + .init = ccw_init, + .block_default_type = IF_VIRTIO, + .no_cdrom = 1, + .no_floppy = 1, + .no_serial = 1, + .no_parallel = 1, + .no_sdcard = 1, + .use_sclp = 1, + .max_cpus = 255, + DEFAULT_MACHINE_OPTIONS, +}; + +static void ccw_machine_init(void) +{ + qemu_register_machine(&ccw_machine); +} + +machine_init(ccw_machine_init) diff --git a/hw/s390x/s390-virtio-hcall.c b/hw/s390x/s390-virtio-hcall.c index d7938c0..ee62649 100644 --- a/hw/s390x/s390-virtio-hcall.c +++ b/hw/s390x/s390-virtio-hcall.c @@ -10,7 +10,7 @@ */ #include "cpu.h" -#include "hw/s390-virtio.h" +#include "hw/s390x/s390-virtio.h" #define MAX_DIAG_SUBCODES 255 diff --git a/hw/s390-virtio.c b/hw/s390x/s390-virtio.c index 5edaabb..2a1d9ac 100644 --- a/hw/s390-virtio.c +++ b/hw/s390x/s390-virtio.c @@ -21,22 +21,22 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#include "hw.h" +#include "hw/hw.h" #include "block/block.h" #include "sysemu/blockdev.h" #include "sysemu/sysemu.h" #include "net/net.h" -#include "boards.h" +#include "hw/boards.h" #include "monitor/monitor.h" -#include "loader.h" +#include "hw/loader.h" #include "hw/virtio.h" #include "hw/sysbus.h" #include "sysemu/kvm.h" #include "exec/address-spaces.h" -#include "hw/s390-virtio-bus.h" +#include "hw/s390x/s390-virtio-bus.h" #include "hw/s390x/sclp.h" -#include "hw/s390-virtio.h" +#include "hw/s390x/s390-virtio.h" //#define DEBUG_S390 @@ -86,6 +86,9 @@ static int s390_virtio_hcall_reset(const uint64_t *args) VirtIOS390Device *dev; dev = s390_virtio_bus_find_mem(s390_bus, mem); + if (dev == NULL) { + return -EINVAL; + } virtio_reset(dev->vdev); stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_STATUS, 0); s390_virtio_device_sync(dev); @@ -147,13 +150,73 @@ unsigned s390_del_running_cpu(CPUS390XState *env) return s390_running_cpus; } +void s390_init_ipl_dev(const char *kernel_filename, + const char *kernel_cmdline, + const char *initrd_filename) +{ + DeviceState *dev; + + dev = qdev_create(NULL, "s390-ipl"); + if (kernel_filename) { + qdev_prop_set_string(dev, "kernel", kernel_filename); + } + if (initrd_filename) { + qdev_prop_set_string(dev, "initrd", initrd_filename); + } + qdev_prop_set_string(dev, "cmdline", kernel_cmdline); + qdev_init_nofail(dev); +} + +void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys) +{ + int i; + + if (cpu_model == NULL) { + cpu_model = "host"; + } + + ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus); + + for (i = 0; i < smp_cpus; i++) { + S390CPU *cpu; + + cpu = cpu_s390x_init(cpu_model); + + ipi_states[i] = cpu; + cpu->env.halted = 1; + cpu->env.exception_index = EXCP_HLT; + cpu->env.storage_keys = storage_keys; + } +} + + +void s390_create_virtio_net(BusState *bus, const char *name) +{ + int i; + + for (i = 0; i < nb_nics; i++) { + NICInfo *nd = &nd_table[i]; + DeviceState *dev; + + if (!nd->model) { + nd->model = g_strdup("virtio"); + } + + if (strcmp(nd->model, "virtio")) { + fprintf(stderr, "S390 only supports VirtIO nics\n"); + exit(1); + } + + dev = qdev_create(bus, name); + qdev_set_nic_properties(dev, nd); + qdev_init_nofail(dev); + } +} + /* PC hardware initialisation */ static void s390_init(QEMUMachineInitArgs *args) { ram_addr_t my_ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - CPUS390XState *env = NULL; - DeviceState *dev; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *ram = g_new(MemoryRegion, 1); int shift = 0; @@ -161,7 +224,6 @@ static void s390_init(QEMUMachineInitArgs *args) void *virtio_region; hwaddr virtio_region_len; hwaddr virtio_region_start; - int i; /* s390x ram size detection needs a 16bit multiplier + an increment. So guests > 64GB can be specified in 2MB steps etc. */ @@ -176,15 +238,8 @@ static void s390_init(QEMUMachineInitArgs *args) /* get a BUS */ s390_bus = s390_virtio_bus_init(&my_ram_size); s390_sclp_init(); - dev = qdev_create(NULL, "s390-ipl"); - if (args->kernel_filename) { - qdev_prop_set_string(dev, "kernel", args->kernel_filename); - } - if (args->initrd_filename) { - qdev_prop_set_string(dev, "initrd", args->initrd_filename); - } - qdev_prop_set_string(dev, "cmdline", args->kernel_cmdline); - qdev_init_nofail(dev); + s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline, + args->initrd_filename); /* register hypercalls */ s390_virtio_register_hcalls(); @@ -207,46 +262,10 @@ static void s390_init(QEMUMachineInitArgs *args) storage_keys = g_malloc0(my_ram_size / TARGET_PAGE_SIZE); /* init CPUs */ - if (cpu_model == NULL) { - cpu_model = "host"; - } - - ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus); - - for (i = 0; i < smp_cpus; i++) { - S390CPU *cpu; - CPUS390XState *tmp_env; - - cpu = cpu_s390x_init(cpu_model); - tmp_env = &cpu->env; - if (!env) { - env = tmp_env; - } - ipi_states[i] = cpu; - tmp_env->halted = 1; - tmp_env->exception_index = EXCP_HLT; - tmp_env->storage_keys = storage_keys; - } - + s390_init_cpus(args->cpu_model, storage_keys); /* Create VirtIO network adapters */ - for(i = 0; i < nb_nics; i++) { - NICInfo *nd = &nd_table[i]; - DeviceState *dev; - - if (!nd->model) { - nd->model = g_strdup("virtio"); - } - - if (strcmp(nd->model, "virtio")) { - fprintf(stderr, "S390 only supports VirtIO nics\n"); - exit(1); - } - - dev = qdev_create((BusState *)s390_bus, "virtio-net-s390"); - qdev_set_nic_properties(dev, nd); - qdev_init_nofail(dev); - } + s390_create_virtio_net((BusState *)s390_bus, "virtio-net-s390"); } static QEMUMachine s390_machine = { diff --git a/hw/s390-virtio.h b/hw/s390x/s390-virtio.h index 25bb610..a6c4c19 100644 --- a/hw/s390-virtio.h +++ b/hw/s390x/s390-virtio.h @@ -15,8 +15,14 @@ #define KVM_S390_VIRTIO_NOTIFY 0 #define KVM_S390_VIRTIO_RESET 1 #define KVM_S390_VIRTIO_SET_STATUS 2 +#define KVM_S390_VIRTIO_CCW_NOTIFY 3 typedef int (*s390_virtio_fn)(const uint64_t *args); void s390_register_virtio_hypercall(uint64_t code, s390_virtio_fn fn); +void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys); +void s390_init_ipl_dev(const char *kernel_filename, + const char *kernel_cmdline, + const char *initrd_filename); +void s390_create_virtio_net(BusState *bus, const char *name); #endif diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c new file mode 100644 index 0000000..231f81e --- /dev/null +++ b/hw/s390x/virtio-ccw.c @@ -0,0 +1,960 @@ +/* + * virtio ccw target implementation + * + * Copyright 2012 IBM Corp. + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include "hw/hw.h" +#include "block/block.h" +#include "sysemu/blockdev.h" +#include "sysemu/sysemu.h" +#include "net/net.h" +#include "monitor/monitor.h" +#include "hw/virtio.h" +#include "hw/virtio-serial.h" +#include "hw/virtio-net.h" +#include "hw/sysbus.h" +#include "qemu/bitops.h" +#include "hw/virtio-bus.h" + +#include "ioinst.h" +#include "css.h" +#include "virtio-ccw.h" +#include "trace.h" + +static int virtual_css_bus_reset(BusState *qbus) +{ + /* This should actually be modelled via the generic css */ + css_reset(); + + /* we dont traverse ourself, return 0 */ + return 0; +} + + +static void virtual_css_bus_class_init(ObjectClass *klass, void *data) +{ + BusClass *k = BUS_CLASS(klass); + + k->reset = virtual_css_bus_reset; +} + +static const TypeInfo virtual_css_bus_info = { + .name = TYPE_VIRTUAL_CSS_BUS, + .parent = TYPE_BUS, + .instance_size = sizeof(VirtualCssBus), + .class_init = virtual_css_bus_class_init, +}; + +static const VirtIOBindings virtio_ccw_bindings; + +VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch) +{ + VirtIODevice *vdev = NULL; + + if (sch->driver_data) { + vdev = ((VirtioCcwDevice *)sch->driver_data)->vdev; + } + return vdev; +} + +VirtualCssBus *virtual_css_bus_init(void) +{ + VirtualCssBus *cbus; + BusState *bus; + DeviceState *dev; + + /* Create bridge device */ + dev = qdev_create(NULL, "virtual-css-bridge"); + qdev_init_nofail(dev); + + /* Create bus on bridge device */ + bus = qbus_create(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css"); + cbus = VIRTUAL_CSS_BUS(bus); + + /* Enable hotplugging */ + bus->allow_hotplug = 1; + + return cbus; +} + +/* Communication blocks used by several channel commands. */ +typedef struct VqInfoBlock { + uint64_t queue; + uint32_t align; + uint16_t index; + uint16_t num; +} QEMU_PACKED VqInfoBlock; + +typedef struct VqConfigBlock { + uint16_t index; + uint16_t num_max; +} QEMU_PACKED VqConfigBlock; + +typedef struct VirtioFeatDesc { + uint32_t features; + uint8_t index; +} QEMU_PACKED VirtioFeatDesc; + +/* Specify where the virtqueues for the subchannel are in guest memory. */ +static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t addr, uint32_t align, + uint16_t index, uint16_t num) +{ + VirtioCcwDevice *dev = sch->driver_data; + + if (index > VIRTIO_PCI_QUEUE_MAX) { + return -EINVAL; + } + + /* Current code in virtio.c relies on 4K alignment. */ + if (addr && (align != 4096)) { + return -EINVAL; + } + + if (!dev) { + return -EINVAL; + } + + virtio_queue_set_addr(dev->vdev, index, addr); + if (!addr) { + virtio_queue_set_vector(dev->vdev, index, 0); + } else { + /* Fail if we don't have a big enough queue. */ + /* TODO: Add interface to handle vring.num changing */ + if (virtio_queue_get_num(dev->vdev, index) > num) { + return -EINVAL; + } + virtio_queue_set_vector(dev->vdev, index, index); + } + /* tell notify handler in case of config change */ + dev->vdev->config_vector = VIRTIO_PCI_QUEUE_MAX; + return 0; +} + +static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) +{ + int ret; + VqInfoBlock info; + uint8_t status; + VirtioFeatDesc features; + void *config; + hwaddr indicators; + VqConfigBlock vq_config; + VirtioCcwDevice *dev = sch->driver_data; + bool check_len; + int len; + hwaddr hw_len; + + if (!dev) { + return -EINVAL; + } + + trace_virtio_ccw_interpret_ccw(sch->cssid, sch->ssid, sch->schid, + ccw.cmd_code); + check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC)); + + /* Look at the command. */ + switch (ccw.cmd_code) { + case CCW_CMD_SET_VQ: + if (check_len) { + if (ccw.count != sizeof(info)) { + ret = -EINVAL; + break; + } + } else if (ccw.count < sizeof(info)) { + /* Can't execute command. */ + ret = -EINVAL; + break; + } + if (!ccw.cda) { + ret = -EFAULT; + } else { + info.queue = ldq_phys(ccw.cda); + info.align = ldl_phys(ccw.cda + sizeof(info.queue)); + info.index = lduw_phys(ccw.cda + sizeof(info.queue) + + sizeof(info.align)); + info.num = lduw_phys(ccw.cda + sizeof(info.queue) + + sizeof(info.align) + + sizeof(info.index)); + ret = virtio_ccw_set_vqs(sch, info.queue, info.align, info.index, + info.num); + sch->curr_status.scsw.count = 0; + } + break; + case CCW_CMD_VDEV_RESET: + virtio_reset(dev->vdev); + ret = 0; + break; + case CCW_CMD_READ_FEAT: + if (check_len) { + if (ccw.count != sizeof(features)) { + ret = -EINVAL; + break; + } + } else if (ccw.count < sizeof(features)) { + /* Can't execute command. */ + ret = -EINVAL; + break; + } + if (!ccw.cda) { + ret = -EFAULT; + } else { + features.index = ldub_phys(ccw.cda + sizeof(features.features)); + if (features.index < ARRAY_SIZE(dev->host_features)) { + features.features = dev->host_features[features.index]; + } else { + /* Return zeroes if the guest supports more feature bits. */ + features.features = 0; + } + stl_le_phys(ccw.cda, features.features); + sch->curr_status.scsw.count = ccw.count - sizeof(features); + ret = 0; + } + break; + case CCW_CMD_WRITE_FEAT: + if (check_len) { + if (ccw.count != sizeof(features)) { + ret = -EINVAL; + break; + } + } else if (ccw.count < sizeof(features)) { + /* Can't execute command. */ + ret = -EINVAL; + break; + } + if (!ccw.cda) { + ret = -EFAULT; + } else { + features.index = ldub_phys(ccw.cda + sizeof(features.features)); + features.features = ldl_le_phys(ccw.cda); + if (features.index < ARRAY_SIZE(dev->host_features)) { + if (dev->vdev->set_features) { + dev->vdev->set_features(dev->vdev, features.features); + } + dev->vdev->guest_features = features.features; + } else { + /* + * If the guest supports more feature bits, assert that it + * passes us zeroes for those we don't support. + */ + if (features.features) { + fprintf(stderr, "Guest bug: features[%i]=%x (expected 0)\n", + features.index, features.features); + /* XXX: do a unit check here? */ + } + } + sch->curr_status.scsw.count = ccw.count - sizeof(features); + ret = 0; + } + break; + case CCW_CMD_READ_CONF: + if (check_len) { + if (ccw.count > dev->vdev->config_len) { + ret = -EINVAL; + break; + } + } + len = MIN(ccw.count, dev->vdev->config_len); + if (!ccw.cda) { + ret = -EFAULT; + } else { + dev->vdev->get_config(dev->vdev, dev->vdev->config); + /* XXX config space endianness */ + cpu_physical_memory_write(ccw.cda, dev->vdev->config, len); + sch->curr_status.scsw.count = ccw.count - len; + ret = 0; + } + break; + case CCW_CMD_WRITE_CONF: + if (check_len) { + if (ccw.count > dev->vdev->config_len) { + ret = -EINVAL; + break; + } + } + len = MIN(ccw.count, dev->vdev->config_len); + hw_len = len; + if (!ccw.cda) { + ret = -EFAULT; + } else { + config = cpu_physical_memory_map(ccw.cda, &hw_len, 0); + if (!config) { + ret = -EFAULT; + } else { + len = hw_len; + /* XXX config space endianness */ + memcpy(dev->vdev->config, config, len); + cpu_physical_memory_unmap(config, hw_len, 0, hw_len); + if (dev->vdev->set_config) { + dev->vdev->set_config(dev->vdev, dev->vdev->config); + } + sch->curr_status.scsw.count = ccw.count - len; + ret = 0; + } + } + break; + case CCW_CMD_WRITE_STATUS: + if (check_len) { + if (ccw.count != sizeof(status)) { + ret = -EINVAL; + break; + } + } else if (ccw.count < sizeof(status)) { + /* Can't execute command. */ + ret = -EINVAL; + break; + } + if (!ccw.cda) { + ret = -EFAULT; + } else { + status = ldub_phys(ccw.cda); + virtio_set_status(dev->vdev, status); + if (dev->vdev->status == 0) { + virtio_reset(dev->vdev); + } + sch->curr_status.scsw.count = ccw.count - sizeof(status); + ret = 0; + } + break; + case CCW_CMD_SET_IND: + if (check_len) { + if (ccw.count != sizeof(indicators)) { + ret = -EINVAL; + break; + } + } else if (ccw.count < sizeof(indicators)) { + /* Can't execute command. */ + ret = -EINVAL; + break; + } + indicators = ldq_phys(ccw.cda); + if (!indicators) { + ret = -EFAULT; + } else { + dev->indicators = indicators; + sch->curr_status.scsw.count = ccw.count - sizeof(indicators); + ret = 0; + } + break; + case CCW_CMD_SET_CONF_IND: + if (check_len) { + if (ccw.count != sizeof(indicators)) { + ret = -EINVAL; + break; + } + } else if (ccw.count < sizeof(indicators)) { + /* Can't execute command. */ + ret = -EINVAL; + break; + } + indicators = ldq_phys(ccw.cda); + if (!indicators) { + ret = -EFAULT; + } else { + dev->indicators2 = indicators; + sch->curr_status.scsw.count = ccw.count - sizeof(indicators); + ret = 0; + } + break; + case CCW_CMD_READ_VQ_CONF: + if (check_len) { + if (ccw.count != sizeof(vq_config)) { + ret = -EINVAL; + break; + } + } else if (ccw.count < sizeof(vq_config)) { + /* Can't execute command. */ + ret = -EINVAL; + break; + } + if (!ccw.cda) { + ret = -EFAULT; + } else { + vq_config.index = lduw_phys(ccw.cda); + vq_config.num_max = virtio_queue_get_num(dev->vdev, + vq_config.index); + stw_phys(ccw.cda + sizeof(vq_config.index), vq_config.num_max); + sch->curr_status.scsw.count = ccw.count - sizeof(vq_config); + ret = 0; + } + break; + default: + ret = -ENOSYS; + break; + } + return ret; +} + +static int virtio_ccw_device_init(VirtioCcwDevice *dev, VirtIODevice *vdev) +{ + unsigned int cssid = 0; + unsigned int ssid = 0; + unsigned int schid; + unsigned int devno; + bool have_devno = false; + bool found = false; + SubchDev *sch; + int ret; + int num; + DeviceState *parent = DEVICE(dev); + + sch = g_malloc0(sizeof(SubchDev)); + + sch->driver_data = dev; + dev->sch = sch; + + dev->vdev = vdev; + dev->indicators = 0; + + /* Initialize subchannel structure. */ + sch->channel_prog = 0x0; + sch->last_cmd_valid = false; + sch->orb = NULL; + /* + * Use a device number if provided. Otherwise, fall back to subchannel + * number. + */ + if (dev->bus_id) { + num = sscanf(dev->bus_id, "%x.%x.%04x", &cssid, &ssid, &devno); + if (num == 3) { + if ((cssid > MAX_CSSID) || (ssid > MAX_SSID)) { + ret = -EINVAL; + error_report("Invalid cssid or ssid: cssid %x, ssid %x", + cssid, ssid); + goto out_err; + } + /* Enforce use of virtual cssid. */ + if (cssid != VIRTUAL_CSSID) { + ret = -EINVAL; + error_report("cssid %x not valid for virtio devices", cssid); + goto out_err; + } + if (css_devno_used(cssid, ssid, devno)) { + ret = -EEXIST; + error_report("Device %x.%x.%04x already exists", cssid, ssid, + devno); + goto out_err; + } + sch->cssid = cssid; + sch->ssid = ssid; + sch->devno = devno; + have_devno = true; + } else { + ret = -EINVAL; + error_report("Malformed devno parameter '%s'", dev->bus_id); + goto out_err; + } + } + + /* Find the next free id. */ + if (have_devno) { + for (schid = 0; schid <= MAX_SCHID; schid++) { + if (!css_find_subch(1, cssid, ssid, schid)) { + sch->schid = schid; + css_subch_assign(cssid, ssid, schid, devno, sch); + found = true; + break; + } + } + if (!found) { + ret = -ENODEV; + error_report("No free subchannel found for %x.%x.%04x", cssid, ssid, + devno); + goto out_err; + } + trace_virtio_ccw_new_device(cssid, ssid, schid, devno, + "user-configured"); + } else { + cssid = VIRTUAL_CSSID; + for (ssid = 0; ssid <= MAX_SSID; ssid++) { + for (schid = 0; schid <= MAX_SCHID; schid++) { + if (!css_find_subch(1, cssid, ssid, schid)) { + sch->cssid = cssid; + sch->ssid = ssid; + sch->schid = schid; + devno = schid; + /* + * If the devno is already taken, look further in this + * subchannel set. + */ + while (css_devno_used(cssid, ssid, devno)) { + if (devno == MAX_SCHID) { + devno = 0; + } else if (devno == schid - 1) { + ret = -ENODEV; + error_report("No free devno found"); + goto out_err; + } else { + devno++; + } + } + sch->devno = devno; + css_subch_assign(cssid, ssid, schid, devno, sch); + found = true; + break; + } + } + if (found) { + break; + } + } + if (!found) { + ret = -ENODEV; + error_report("Virtual channel subsystem is full!"); + goto out_err; + } + trace_virtio_ccw_new_device(cssid, ssid, schid, devno, + "auto-configured"); + } + + /* Build initial schib. */ + css_sch_build_virtual_schib(sch, 0, VIRTIO_CCW_CHPID_TYPE); + + sch->ccw_cb = virtio_ccw_cb; + + /* Build senseid data. */ + memset(&sch->id, 0, sizeof(SenseId)); + sch->id.reserved = 0xff; + sch->id.cu_type = VIRTIO_CCW_CU_TYPE; + sch->id.cu_model = dev->vdev->device_id; + + virtio_bind_device(vdev, &virtio_ccw_bindings, DEVICE(dev)); + /* Only the first 32 feature bits are used. */ + dev->host_features[0] = vdev->get_features(vdev, dev->host_features[0]); + dev->host_features[0] |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY; + dev->host_features[0] |= 0x1 << VIRTIO_F_BAD_FEATURE; + + css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid, + parent->hotplugged, 1); + return 0; + +out_err: + dev->sch = NULL; + g_free(sch); + return ret; +} + +static int virtio_ccw_exit(VirtioCcwDevice *dev) +{ + SubchDev *sch = dev->sch; + + if (sch) { + css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); + g_free(sch); + } + dev->indicators = 0; + return 0; +} + +static int virtio_ccw_net_init(VirtioCcwDevice *dev) +{ + VirtIODevice *vdev; + + vdev = virtio_net_init((DeviceState *)dev, &dev->nic, &dev->net); + if (!vdev) { + return -1; + } + + return virtio_ccw_device_init(dev, vdev); +} + +static int virtio_ccw_net_exit(VirtioCcwDevice *dev) +{ + virtio_net_exit(dev->vdev); + return virtio_ccw_exit(dev); +} + +static int virtio_ccw_blk_init(VirtioCcwDevice *dev) +{ + VirtIODevice *vdev; + + vdev = virtio_blk_init((DeviceState *)dev, &dev->blk); + if (!vdev) { + return -1; + } + + return virtio_ccw_device_init(dev, vdev); +} + +static int virtio_ccw_blk_exit(VirtioCcwDevice *dev) +{ + virtio_blk_exit(dev->vdev); + blockdev_mark_auto_del(dev->blk.conf.bs); + return virtio_ccw_exit(dev); +} + +static int virtio_ccw_serial_init(VirtioCcwDevice *dev) +{ + VirtIODevice *vdev; + + vdev = virtio_serial_init((DeviceState *)dev, &dev->serial); + if (!vdev) { + return -1; + } + + return virtio_ccw_device_init(dev, vdev); +} + +static int virtio_ccw_serial_exit(VirtioCcwDevice *dev) +{ + virtio_serial_exit(dev->vdev); + return virtio_ccw_exit(dev); +} + +static int virtio_ccw_balloon_init(VirtioCcwDevice *dev) +{ + VirtIODevice *vdev; + + vdev = virtio_balloon_init((DeviceState *)dev); + if (!vdev) { + return -1; + } + + return virtio_ccw_device_init(dev, vdev); +} + +static int virtio_ccw_balloon_exit(VirtioCcwDevice *dev) +{ + virtio_balloon_exit(dev->vdev); + return virtio_ccw_exit(dev); +} + +static int virtio_ccw_scsi_init(VirtioCcwDevice *dev) +{ + VirtIODevice *vdev; + + vdev = virtio_scsi_init((DeviceState *)dev, &dev->scsi); + if (!vdev) { + return -1; + } + + return virtio_ccw_device_init(dev, vdev); +} + +static int virtio_ccw_scsi_exit(VirtioCcwDevice *dev) +{ + virtio_scsi_exit(dev->vdev); + return virtio_ccw_exit(dev); +} + +/* DeviceState to VirtioCcwDevice. Note: used on datapath, + * be careful and test performance if you change this. + */ +static inline VirtioCcwDevice *to_virtio_ccw_dev_fast(DeviceState *d) +{ + return container_of(d, VirtioCcwDevice, parent_obj); +} + +static void virtio_ccw_notify(DeviceState *d, uint16_t vector) +{ + VirtioCcwDevice *dev = to_virtio_ccw_dev_fast(d); + SubchDev *sch = dev->sch; + uint64_t indicators; + + if (vector >= 128) { + return; + } + + if (vector < VIRTIO_PCI_QUEUE_MAX) { + indicators = ldq_phys(dev->indicators); + indicators |= 1ULL << vector; + stq_phys(dev->indicators, indicators); + } else { + vector = 0; + indicators = ldq_phys(dev->indicators2); + indicators |= 1ULL << vector; + stq_phys(dev->indicators2, indicators); + } + + css_conditional_io_interrupt(sch); + +} + +static unsigned virtio_ccw_get_features(DeviceState *d) +{ + VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); + + /* Only the first 32 feature bits are used. */ + return dev->host_features[0]; +} + +static void virtio_ccw_reset(DeviceState *d) +{ + VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); + + virtio_reset(dev->vdev); + css_reset_sch(dev->sch); +} + +/**************** Virtio-ccw Bus Device Descriptions *******************/ + +static const VirtIOBindings virtio_ccw_bindings = { + .notify = virtio_ccw_notify, + .get_features = virtio_ccw_get_features, +}; + +static Property virtio_ccw_net_properties[] = { + DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id), + DEFINE_VIRTIO_NET_FEATURES(VirtioCcwDevice, host_features[0]), + DEFINE_NIC_PROPERTIES(VirtioCcwDevice, nic), + DEFINE_PROP_UINT32("x-txtimer", VirtioCcwDevice, + net.txtimer, TX_TIMER_INTERVAL), + DEFINE_PROP_INT32("x-txburst", VirtioCcwDevice, + net.txburst, TX_BURST), + DEFINE_PROP_STRING("tx", VirtioCcwDevice, net.tx), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_ccw_net_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass); + + k->init = virtio_ccw_net_init; + k->exit = virtio_ccw_net_exit; + dc->reset = virtio_ccw_reset; + dc->props = virtio_ccw_net_properties; +} + +static const TypeInfo virtio_ccw_net = { + .name = "virtio-net-ccw", + .parent = TYPE_VIRTIO_CCW_DEVICE, + .instance_size = sizeof(VirtioCcwDevice), + .class_init = virtio_ccw_net_class_init, +}; + +static Property virtio_ccw_blk_properties[] = { + DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id), + DEFINE_BLOCK_PROPERTIES(VirtioCcwDevice, blk.conf), + DEFINE_PROP_STRING("serial", VirtioCcwDevice, blk.serial), +#ifdef __linux__ + DEFINE_PROP_BIT("scsi", VirtioCcwDevice, blk.scsi, 0, true), +#endif + DEFINE_VIRTIO_BLK_FEATURES(VirtioCcwDevice, host_features[0]), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_ccw_blk_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass); + + k->init = virtio_ccw_blk_init; + k->exit = virtio_ccw_blk_exit; + dc->reset = virtio_ccw_reset; + dc->props = virtio_ccw_blk_properties; +} + +static const TypeInfo virtio_ccw_blk = { + .name = "virtio-blk-ccw", + .parent = TYPE_VIRTIO_CCW_DEVICE, + .instance_size = sizeof(VirtioCcwDevice), + .class_init = virtio_ccw_blk_class_init, +}; + +static Property virtio_ccw_serial_properties[] = { + DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id), + DEFINE_PROP_UINT32("max_ports", VirtioCcwDevice, + serial.max_virtserial_ports, 31), + DEFINE_VIRTIO_COMMON_FEATURES(VirtioCcwDevice, host_features[0]), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_ccw_serial_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass); + + k->init = virtio_ccw_serial_init; + k->exit = virtio_ccw_serial_exit; + dc->reset = virtio_ccw_reset; + dc->props = virtio_ccw_serial_properties; +} + +static const TypeInfo virtio_ccw_serial = { + .name = "virtio-serial-ccw", + .parent = TYPE_VIRTIO_CCW_DEVICE, + .instance_size = sizeof(VirtioCcwDevice), + .class_init = virtio_ccw_serial_class_init, +}; + +static Property virtio_ccw_balloon_properties[] = { + DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id), + DEFINE_VIRTIO_COMMON_FEATURES(VirtioCcwDevice, host_features[0]), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_ccw_balloon_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass); + + k->init = virtio_ccw_balloon_init; + k->exit = virtio_ccw_balloon_exit; + dc->reset = virtio_ccw_reset; + dc->props = virtio_ccw_balloon_properties; +} + +static const TypeInfo virtio_ccw_balloon = { + .name = "virtio-balloon-ccw", + .parent = TYPE_VIRTIO_CCW_DEVICE, + .instance_size = sizeof(VirtioCcwDevice), + .class_init = virtio_ccw_balloon_class_init, +}; + +static Property virtio_ccw_scsi_properties[] = { + DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id), + DEFINE_VIRTIO_SCSI_PROPERTIES(VirtioCcwDevice, host_features[0], scsi), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_ccw_scsi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass); + + k->init = virtio_ccw_scsi_init; + k->exit = virtio_ccw_scsi_exit; + dc->reset = virtio_ccw_reset; + dc->props = virtio_ccw_scsi_properties; +} + +static const TypeInfo virtio_ccw_scsi = { + .name = "virtio-scsi-ccw", + .parent = TYPE_VIRTIO_CCW_DEVICE, + .instance_size = sizeof(VirtioCcwDevice), + .class_init = virtio_ccw_scsi_class_init, +}; + +static int virtio_ccw_busdev_init(DeviceState *dev) +{ + VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev; + VirtIOCCWDeviceClass *_info = VIRTIO_CCW_DEVICE_GET_CLASS(dev); + + virtio_ccw_bus_new(&_dev->bus, _dev); + + return _info->init(_dev); +} + +static int virtio_ccw_busdev_exit(DeviceState *dev) +{ + VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev; + VirtIOCCWDeviceClass *_info = VIRTIO_CCW_DEVICE_GET_CLASS(dev); + + return _info->exit(_dev); +} + +static int virtio_ccw_busdev_unplug(DeviceState *dev) +{ + VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev; + SubchDev *sch = _dev->sch; + + /* + * We should arrive here only for device_del, since we don't support + * direct hot(un)plug of channels, but only through virtio. + */ + assert(sch != NULL); + /* Subchannel is now disabled and no longer valid. */ + sch->curr_status.pmcw.flags &= ~(PMCW_FLAGS_MASK_ENA | + PMCW_FLAGS_MASK_DNV); + + css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid, 1, 0); + + object_unparent(OBJECT(dev)); + qdev_free(dev); + return 0; +} + +static void virtio_ccw_device_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->init = virtio_ccw_busdev_init; + dc->exit = virtio_ccw_busdev_exit; + dc->unplug = virtio_ccw_busdev_unplug; + dc->bus_type = TYPE_VIRTUAL_CSS_BUS; + +} + +static const TypeInfo virtio_ccw_device_info = { + .name = TYPE_VIRTIO_CCW_DEVICE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(VirtioCcwDevice), + .class_init = virtio_ccw_device_class_init, + .class_size = sizeof(VirtIOCCWDeviceClass), + .abstract = true, +}; + +/***************** Virtual-css Bus Bridge Device ********************/ +/* Only required to have the virtio bus as child in the system bus */ + +static int virtual_css_bridge_init(SysBusDevice *dev) +{ + /* nothing */ + return 0; +} + +static void virtual_css_bridge_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = virtual_css_bridge_init; + dc->no_user = 1; +} + +static const TypeInfo virtual_css_bridge_info = { + .name = "virtual-css-bridge", + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SysBusDevice), + .class_init = virtual_css_bridge_class_init, +}; + +/* virtio-ccw-bus */ + +void virtio_ccw_bus_new(VirtioBusState *bus, VirtioCcwDevice *dev) +{ + DeviceState *qdev = DEVICE(dev); + BusState *qbus; + + qbus_create_inplace((BusState *)bus, TYPE_VIRTIO_CCW_BUS, qdev, NULL); + qbus = BUS(bus); + qbus->allow_hotplug = 0; +} + +static void virtio_ccw_bus_class_init(ObjectClass *klass, void *data) +{ + VirtioBusClass *k = VIRTIO_BUS_CLASS(klass); + BusClass *bus_class = BUS_CLASS(klass); + + bus_class->max_dev = 1; + k->notify = virtio_ccw_notify; + k->get_features = virtio_ccw_get_features; +} + +static const TypeInfo virtio_ccw_bus_info = { + .name = TYPE_VIRTIO_CCW_BUS, + .parent = TYPE_VIRTIO_BUS, + .instance_size = sizeof(VirtioCcwBusState), + .class_init = virtio_ccw_bus_class_init, +}; + +static void virtio_ccw_register(void) +{ + type_register_static(&virtio_ccw_bus_info); + type_register_static(&virtual_css_bus_info); + type_register_static(&virtio_ccw_device_info); + type_register_static(&virtio_ccw_serial); + type_register_static(&virtio_ccw_blk); + type_register_static(&virtio_ccw_net); + type_register_static(&virtio_ccw_balloon); + type_register_static(&virtio_ccw_scsi); + type_register_static(&virtual_css_bridge_info); +} + +type_init(virtio_ccw_register) diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h new file mode 100644 index 0000000..48474b3 --- /dev/null +++ b/hw/s390x/virtio-ccw.h @@ -0,0 +1,98 @@ +/* + * virtio ccw target definitions + * + * Copyright 2012 IBM Corp. + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390X_VIRTIO_CCW_H +#define HW_S390X_VIRTIO_CCW_H + +#include <hw/virtio-blk.h> +#include <hw/virtio-net.h> +#include <hw/virtio-serial.h> +#include <hw/virtio-scsi.h> +#include <hw/virtio-bus.h> + +#define VIRTUAL_CSSID 0xfe + +#define VIRTIO_CCW_CU_TYPE 0x3832 +#define VIRTIO_CCW_CHPID_TYPE 0x32 + +#define CCW_CMD_SET_VQ 0x13 +#define CCW_CMD_VDEV_RESET 0x33 +#define CCW_CMD_READ_FEAT 0x12 +#define CCW_CMD_WRITE_FEAT 0x11 +#define CCW_CMD_READ_CONF 0x22 +#define CCW_CMD_WRITE_CONF 0x21 +#define CCW_CMD_WRITE_STATUS 0x31 +#define CCW_CMD_SET_IND 0x43 +#define CCW_CMD_SET_CONF_IND 0x53 +#define CCW_CMD_READ_VQ_CONF 0x32 + +#define TYPE_VIRTIO_CCW_DEVICE "virtio-ccw-device" +#define VIRTIO_CCW_DEVICE(obj) \ + OBJECT_CHECK(VirtioCcwDevice, (obj), TYPE_VIRTIO_CCW_DEVICE) +#define VIRTIO_CCW_DEVICE_CLASS(klass) \ + OBJECT_CLASS_CHECK(VirtIOCCWDeviceClass, (klass), TYPE_VIRTIO_CCW_DEVICE) +#define VIRTIO_CCW_DEVICE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(VirtIOCCWDeviceClass, (obj), TYPE_VIRTIO_CCW_DEVICE) + +typedef struct VirtioBusState VirtioCcwBusState; +typedef struct VirtioBusClass VirtioCcwBusClass; + +#define TYPE_VIRTIO_CCW_BUS "virtio-ccw-bus" +#define VIRTIO_CCW_BUS(obj) \ + OBJECT_CHECK(VirtioCcwBus, (obj), TYPE_VIRTIO_CCW_BUS) +#define VIRTIO_CCW_BUS_GET_CLASS(obj) \ + OBJECT_CHECK(VirtioCcwBusState, (obj), TYPE_VIRTIO_CCW_BUS) +#define VIRTIO_CCW_BUS_CLASS(klass) \ + OBJECT_CLASS_CHECK(VirtioCcwBusClass, klass, TYPE_VIRTIO_CCW_BUS) + +typedef struct VirtioCcwDevice VirtioCcwDevice; + +void virtio_ccw_bus_new(VirtioBusState *bus, VirtioCcwDevice *dev); + +typedef struct VirtIOCCWDeviceClass { + DeviceClass parent_class; + int (*init)(VirtioCcwDevice *dev); + int (*exit)(VirtioCcwDevice *dev); +} VirtIOCCWDeviceClass; + +/* Change here if we want to support more feature bits. */ +#define VIRTIO_CCW_FEATURE_SIZE 1 + +struct VirtioCcwDevice { + DeviceState parent_obj; + SubchDev *sch; + VirtIODevice *vdev; + char *bus_id; + VirtIOBlkConf blk; + NICConf nic; + uint32_t host_features[VIRTIO_CCW_FEATURE_SIZE]; + virtio_serial_conf serial; + virtio_net_conf net; + VirtIOSCSIConf scsi; + VirtioBusState bus; + /* Guest provided values: */ + hwaddr indicators; + hwaddr indicators2; +}; + +/* virtual css bus type */ +typedef struct VirtualCssBus { + BusState parent_obj; +} VirtualCssBus; + +#define TYPE_VIRTUAL_CSS_BUS "virtual-css-bus" +#define VIRTUAL_CSS_BUS(obj) \ + OBJECT_CHECK(VirtualCssBus, (obj), TYPE_VIRTUAL_CSS_BUS) + +VirtualCssBus *virtual_css_bus_init(void); +void virtio_ccw_device_update_status(SubchDev *sch); +VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch); +#endif @@ -1021,6 +1021,7 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size, hwdef->ecc_version); fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id); @@ -1665,6 +1666,7 @@ static void sun4d_hw_init(const struct sun4d_hwdef *hwdef, ram_addr_t RAM_size, "Sun4d"); fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id); @@ -1865,6 +1867,7 @@ static void sun4c_hw_init(const struct sun4c_hwdef *hwdef, ram_addr_t RAM_size, "Sun4c"); fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id); @@ -878,6 +878,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem, (uint8_t *)&nd_table[0].macaddr); fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id); diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c index 3040bc6..c0a7902 100644 --- a/hw/virtio-balloon.c +++ b/hw/virtio-balloon.c @@ -14,6 +14,7 @@ */ #include "qemu/iov.h" +#include "qemu/timer.h" #include "qemu-common.h" #include "virtio.h" #include "pc.h" @@ -22,6 +23,7 @@ #include "virtio-balloon.h" #include "sysemu/kvm.h" #include "exec/address-spaces.h" +#include "qapi/visitor.h" #if defined(__linux__) #include <sys/mman.h> @@ -36,6 +38,9 @@ typedef struct VirtIOBalloon uint64_t stats[VIRTIO_BALLOON_S_NR]; VirtQueueElement stats_vq_elem; size_t stats_vq_offset; + QEMUTimer *stats_timer; + int64_t stats_last_update; + int64_t stats_poll_interval; DeviceState *qdev; } VirtIOBalloon; @@ -53,6 +58,16 @@ static void balloon_page(void *addr, int deflate) #endif } +static const char *balloon_stat_names[] = { + [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in", + [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out", + [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults", + [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults", + [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory", + [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory", + [VIRTIO_BALLOON_S_NR] = NULL +}; + /* * reset_stats - Mark all items in the stats array as unset * @@ -67,6 +82,118 @@ static inline void reset_stats(VirtIOBalloon *dev) for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1); } +static bool balloon_stats_supported(const VirtIOBalloon *s) +{ + return s->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ); +} + +static bool balloon_stats_enabled(const VirtIOBalloon *s) +{ + return s->stats_poll_interval > 0; +} + +static void balloon_stats_destroy_timer(VirtIOBalloon *s) +{ + if (balloon_stats_enabled(s)) { + qemu_del_timer(s->stats_timer); + qemu_free_timer(s->stats_timer); + s->stats_timer = NULL; + s->stats_poll_interval = 0; + } +} + +static void balloon_stats_change_timer(VirtIOBalloon *s, int secs) +{ + qemu_mod_timer(s->stats_timer, qemu_get_clock_ms(vm_clock) + secs * 1000); +} + +static void balloon_stats_poll_cb(void *opaque) +{ + VirtIOBalloon *s = opaque; + + if (!balloon_stats_supported(s)) { + /* re-schedule */ + balloon_stats_change_timer(s, s->stats_poll_interval); + return; + } + + virtqueue_push(s->svq, &s->stats_vq_elem, s->stats_vq_offset); + virtio_notify(&s->vdev, s->svq); +} + +static void balloon_stats_get_all(Object *obj, struct Visitor *v, + void *opaque, const char *name, Error **errp) +{ + VirtIOBalloon *s = opaque; + int i; + + if (!s->stats_last_update) { + error_setg(errp, "guest hasn't updated any stats yet"); + return; + } + + visit_start_struct(v, NULL, "guest-stats", name, 0, errp); + visit_type_int(v, &s->stats_last_update, "last-update", errp); + + visit_start_struct(v, NULL, NULL, "stats", 0, errp); + for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) { + visit_type_int64(v, (int64_t *) &s->stats[i], balloon_stat_names[i], + errp); + } + visit_end_struct(v, errp); + + visit_end_struct(v, errp); +} + +static void balloon_stats_get_poll_interval(Object *obj, struct Visitor *v, + void *opaque, const char *name, + Error **errp) +{ + VirtIOBalloon *s = opaque; + visit_type_int(v, &s->stats_poll_interval, name, errp); +} + +static void balloon_stats_set_poll_interval(Object *obj, struct Visitor *v, + void *opaque, const char *name, + Error **errp) +{ + VirtIOBalloon *s = opaque; + int64_t value; + + visit_type_int(v, &value, name, errp); + if (error_is_set(errp)) { + return; + } + + if (value < 0) { + error_setg(errp, "timer value must be greater than zero"); + return; + } + + if (value == s->stats_poll_interval) { + return; + } + + if (value == 0) { + /* timer=0 disables the timer */ + balloon_stats_destroy_timer(s); + return; + } + + if (balloon_stats_enabled(s)) { + /* timer interval change */ + s->stats_poll_interval = value; + balloon_stats_change_timer(s, value); + return; + } + + /* create a new timer */ + g_assert(s->stats_timer == NULL); + s->stats_timer = qemu_new_timer_ms(vm_clock, balloon_stats_poll_cb, s); + s->stats_poll_interval = value; + balloon_stats_change_timer(s, 0); +} + static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VirtIOBalloon *s = to_virtio_balloon(vdev); @@ -107,9 +234,10 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) VirtQueueElement *elem = &s->stats_vq_elem; VirtIOBalloonStat stat; size_t offset = 0; + qemu_timeval tv; if (!virtqueue_pop(vq, elem)) { - return; + goto out; } /* Initialize the stats to get rid of any stale values. This is only @@ -128,6 +256,18 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) s->stats[tag] = val; } s->stats_vq_offset = offset; + + if (qemu_gettimeofday(&tv) < 0) { + fprintf(stderr, "warning: %s: failed to get time of day\n", __func__); + goto out; + } + + s->stats_last_update = tv.tv_sec; + +out: + if (balloon_stats_enabled(s)) { + balloon_stats_change_timer(s, s->stats_poll_interval); + } } static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) @@ -164,28 +304,6 @@ static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f) static void virtio_balloon_stat(void *opaque, BalloonInfo *info) { VirtIOBalloon *dev = opaque; - -#if 0 - /* Disable guest-provided stats for now. For more details please check: - * https://bugzilla.redhat.com/show_bug.cgi?id=623903 - * - * If you do enable it (which is probably not going to happen as we - * need a new command for it), remember that you also need to fill the - * appropriate members of the BalloonInfo structure so that the stats - * are returned to the client. - */ - if (dev->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ)) { - virtqueue_push(dev->svq, &dev->stats_vq_elem, dev->stats_vq_offset); - virtio_notify(&dev->vdev, dev->svq); - return; - } -#endif - - /* Stats are not supported. Clear out any stale values that might - * have been set by a more featureful guest kernel. - */ - reset_stats(dev); - info->actual = ram_size - ((uint64_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT); } @@ -255,12 +373,18 @@ VirtIODevice *virtio_balloon_init(DeviceState *dev) s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output); s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats); - reset_stats(s); - s->qdev = dev; register_savevm(dev, "virtio-balloon", -1, 1, virtio_balloon_save, virtio_balloon_load, s); + object_property_add(OBJECT(dev), "guest-stats", "guest statistics", + balloon_stats_get_all, NULL, NULL, s, NULL); + + object_property_add(OBJECT(dev), "guest-stats-polling-interval", "int", + balloon_stats_get_poll_interval, + balloon_stats_set_poll_interval, + NULL, s, NULL); + return &s->vdev; } @@ -268,6 +392,7 @@ void virtio_balloon_exit(VirtIODevice *vdev) { VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev); + balloon_stats_destroy_timer(s); qemu_remove_balloon_handler(s); unregister_savevm(s->qdev, "virtio-balloon", s); virtio_cleanup(vdev); diff --git a/hw/xilinx_ethlite.c b/hw/xilinx_ethlite.c index 2254851..11dfbc3 100644 --- a/hw/xilinx_ethlite.c +++ b/hw/xilinx_ethlite.c @@ -89,7 +89,7 @@ eth_read(void *opaque, hwaddr addr, unsigned int size) case R_RX_CTRL1: case R_RX_CTRL0: r = s->regs[addr]; - D(qemu_log("%s %x=%x\n", __func__, addr * 4, r)); + D(qemu_log("%s " TARGET_FMT_plx "=%x\n", __func__, addr * 4, r)); break; default: @@ -115,7 +115,8 @@ eth_write(void *opaque, hwaddr addr, if (addr == R_TX_CTRL1) base = 0x800 / 4; - D(qemu_log("%s addr=%x val=%x\n", __func__, addr * 4, value)); + D(qemu_log("%s addr=" TARGET_FMT_plx " val=%x\n", + __func__, addr * 4, value)); if ((value & (CTRL_P | CTRL_S)) == CTRL_S) { qemu_send_packet(&s->nic->nc, (void *) &s->regs[base], @@ -135,12 +136,16 @@ eth_write(void *opaque, hwaddr addr, break; /* Keep these native. */ + case R_RX_CTRL0: + case R_RX_CTRL1: + if (!(value & CTRL_S)) { + qemu_flush_queued_packets(&s->nic->nc); + } case R_TX_LEN0: case R_TX_LEN1: case R_TX_GIE0: - case R_RX_CTRL0: - case R_RX_CTRL1: - D(qemu_log("%s addr=%x val=%x\n", __func__, addr * 4, value)); + D(qemu_log("%s addr=" TARGET_FMT_plx " val=%x\n", + __func__, addr * 4, value)); s->regs[addr] = value; break; @@ -163,9 +168,9 @@ static const MemoryRegionOps eth_ops = { static int eth_can_rx(NetClientState *nc) { struct xlx_ethlite *s = DO_UPCAST(NICState, nc, nc)->opaque; - int r; - r = !(s->regs[R_RX_CTRL0] & CTRL_S); - return r; + unsigned int rxbase = s->rxbuf * (0x800 / 4); + + return !(s->regs[rxbase + R_RX_CTRL0] & CTRL_S); } static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size) @@ -182,7 +187,7 @@ static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size) return -1; } - D(qemu_log("%s %d rxbase=%x\n", __func__, size, rxbase)); + D(qemu_log("%s %zd rxbase=%x\n", __func__, size, rxbase)); memcpy(&s->regs[rxbase + R_RX_BUF0], buf, size); s->regs[rxbase + R_RX_CTRL0] |= CTRL_S; diff --git a/include/block/block.h b/include/block/block.h index ffd1936..5c3b911 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -309,6 +309,10 @@ int bdrv_get_flags(BlockDriverState *bs); int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); +void bdrv_round_to_clusters(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + int64_t *cluster_sector_num, + int *cluster_nb_sectors); const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, @@ -351,13 +355,12 @@ void bdrv_set_buffer_alignment(BlockDriverState *bs, int align); void *qemu_blockalign(BlockDriverState *bs, size_t size); bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); -#define BDRV_SECTORS_PER_DIRTY_CHUNK 2048 - -void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable); +struct HBitmapIter; +void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity); int bdrv_get_dirty(BlockDriverState *bs, int64_t sector); void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); -int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector); +void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi); int64_t bdrv_get_dirty_count(BlockDriverState *bs); void bdrv_enable_copy_on_read(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index f83ffb8..f7279b9 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -32,6 +32,7 @@ #include "qapi-types.h" #include "qapi/qmp/qerror.h" #include "monitor/monitor.h" +#include "qemu/hbitmap.h" #define BLOCK_FLAG_ENCRYPT 1 #define BLOCK_FLAG_COMPAT6 4 @@ -275,8 +276,7 @@ struct BlockDriverState { bool iostatus_enabled; BlockDeviceIoStatus iostatus; char device_name[32]; - unsigned long *dirty_bitmap; - int64_t dirty_count; + HBitmap *dirty_bitmap; int in_use; /* users other than guest access, eg. block migration */ QTAILQ_ENTRY(BlockDriverState) list; @@ -344,6 +344,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, * @bs: Block device to operate on. * @target: Block device to write to. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @granularity: The chosen granularity for the dirty bitmap. + * @buf_size: The amount of data that can be in flight at one time. * @mode: Whether to collapse all images in the chain to the target. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. @@ -357,8 +359,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, * @bs will be switched to read from @target. */ void mirror_start(BlockDriverState *bs, BlockDriverState *target, - int64_t speed, MirrorSyncMode mode, - BlockdevOnError on_source_error, + int64_t speed, int64_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); diff --git a/include/qemu-common.h b/include/qemu-common.h index ca464bb..af2379f 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -68,6 +68,9 @@ #if !defined(ECANCELED) #define ECANCELED 4097 #endif +#if !defined(EMEDIUMTYPE) +#define EMEDIUMTYPE 4098 +#endif #ifndef TIME_MAX #define TIME_MAX LONG_MAX #endif diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h new file mode 100644 index 0000000..73f5d1d --- /dev/null +++ b/include/qemu/hbitmap.h @@ -0,0 +1,208 @@ +/* + * Hierarchical Bitmap Data Type + * + * Copyright Red Hat, Inc., 2012 + * + * Author: Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef HBITMAP_H +#define HBITMAP_H 1 + +#include <limits.h> +#include <stdint.h> +#include <stdbool.h> +#include "bitops.h" + +typedef struct HBitmap HBitmap; +typedef struct HBitmapIter HBitmapIter; + +#define BITS_PER_LEVEL (BITS_PER_LONG == 32 ? 5 : 6) + +/* For 32-bit, the largest that fits in a 4 GiB address space. + * For 64-bit, the number of sectors in 1 PiB. Good luck, in + * either case... :) + */ +#define HBITMAP_LOG_MAX_SIZE (BITS_PER_LONG == 32 ? 34 : 41) + +/* We need to place a sentinel in level 0 to speed up iteration. Thus, + * we do this instead of HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL. The + * difference is that it allocates an extra level when HBITMAP_LOG_MAX_SIZE + * is an exact multiple of BITS_PER_LEVEL. + */ +#define HBITMAP_LEVELS ((HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL) + 1) + +struct HBitmapIter { + const HBitmap *hb; + + /* Copied from hb for access in the inline functions (hb is opaque). */ + int granularity; + + /* Entry offset into the last-level array of longs. */ + size_t pos; + + /* The currently-active path in the tree. Each item of cur[i] stores + * the bits (i.e. the subtrees) yet to be processed under that node. + */ + unsigned long cur[HBITMAP_LEVELS]; +}; + +/** + * hbitmap_alloc: + * @size: Number of bits in the bitmap. + * @granularity: Granularity of the bitmap. Aligned groups of 2^@granularity + * bits will be represented by a single bit. Each operation on a + * range of bits first rounds the bits to determine which group they land + * in, and then affect the entire set; iteration will only visit the first + * bit of each group. + * + * Allocate a new HBitmap. + */ +HBitmap *hbitmap_alloc(uint64_t size, int granularity); + +/** + * hbitmap_empty: + * @hb: HBitmap to operate on. + * + * Return whether the bitmap is empty. + */ +bool hbitmap_empty(const HBitmap *hb); + +/** + * hbitmap_granularity: + * @hb: HBitmap to operate on. + * + * Return the granularity of the HBitmap. + */ +int hbitmap_granularity(const HBitmap *hb); + +/** + * hbitmap_count: + * @hb: HBitmap to operate on. + * + * Return the number of bits set in the HBitmap. + */ +uint64_t hbitmap_count(const HBitmap *hb); + +/** + * hbitmap_set: + * @hb: HBitmap to operate on. + * @start: First bit to set (0-based). + * @count: Number of bits to set. + * + * Set a consecutive range of bits in an HBitmap. + */ +void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count); + +/** + * hbitmap_reset: + * @hb: HBitmap to operate on. + * @start: First bit to reset (0-based). + * @count: Number of bits to reset. + * + * Reset a consecutive range of bits in an HBitmap. + */ +void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count); + +/** + * hbitmap_get: + * @hb: HBitmap to operate on. + * @item: Bit to query (0-based). + * + * Return whether the @item-th bit in an HBitmap is set. + */ +bool hbitmap_get(const HBitmap *hb, uint64_t item); + +/** + * hbitmap_free: + * @hb: HBitmap to operate on. + * + * Free an HBitmap and all of its associated memory. + */ +void hbitmap_free(HBitmap *hb); + +/** + * hbitmap_iter_init: + * @hbi: HBitmapIter to initialize. + * @hb: HBitmap to iterate on. + * @first: First bit to visit (0-based, must be strictly less than the + * size of the bitmap). + * + * Set up @hbi to iterate on the HBitmap @hb. hbitmap_iter_next will return + * the lowest-numbered bit that is set in @hb, starting at @first. + * + * Concurrent setting of bits is acceptable, and will at worst cause the + * iteration to miss some of those bits. Resetting bits before the current + * position of the iterator is also okay. However, concurrent resetting of + * bits can lead to unexpected behavior if the iterator has not yet reached + * those bits. + */ +void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first); + +/* hbitmap_iter_skip_words: + * @hbi: HBitmapIter to operate on. + * + * Internal function used by hbitmap_iter_next and hbitmap_iter_next_word. + */ +unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi); + +/** + * hbitmap_iter_next: + * @hbi: HBitmapIter to operate on. + * + * Return the next bit that is set in @hbi's associated HBitmap, + * or -1 if all remaining bits are zero. + */ +static inline int64_t hbitmap_iter_next(HBitmapIter *hbi) +{ + unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1]; + int64_t item; + + if (cur == 0) { + cur = hbitmap_iter_skip_words(hbi); + if (cur == 0) { + return -1; + } + } + + /* The next call will resume work from the next bit. */ + hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1); + item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ffsl(cur) - 1; + + return item << hbi->granularity; +} + +/** + * hbitmap_iter_next_word: + * @hbi: HBitmapIter to operate on. + * @p_cur: Location where to store the next non-zero word. + * + * Return the index of the next nonzero word that is set in @hbi's + * associated HBitmap, and set *p_cur to the content of that word + * (bits before the index that was passed to hbitmap_iter_init are + * trimmed on the first call). Return -1, and set *p_cur to zero, + * if all remaining words are zero. + */ +static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_cur) +{ + unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1]; + + if (cur == 0) { + cur = hbitmap_iter_skip_words(hbi); + if (cur == 0) { + *p_cur = 0; + return -1; + } + } + + /* The next call will resume work from the next word. */ + hbi->cur[HBITMAP_LEVELS - 1] = 0; + *p_cur = cur; + return hbi->pos; +} + + +#endif diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index 81c9a75..2a32be4 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -26,6 +26,7 @@ #define HOST_UTILS_H 1 #include "qemu/compiler.h" /* QEMU_GNUC_PREREQ */ +#include <string.h> /* ffsl */ #if defined(__x86_64__) #define __HAVE_FAST_MULU64__ @@ -237,4 +238,29 @@ static inline int ctpop64(uint64_t val) #endif } +/* glibc does not provide an inline version of ffsl, so always define + * ours. We need to give it a different name, however. + */ +#ifdef __GLIBC__ +#define ffsl qemu_ffsl +#endif +static inline int ffsl(long val) +{ + if (!val) { + return 0; + } + +#if QEMU_GNUC_PREREQ(3, 4) + return __builtin_ctzl(val) + 1; +#else + if (sizeof(long) == 4) { + return ctz32(val) + 1; + } else if (sizeof(long) == 8) { + return ctz64(val) + 1; + } else { + abort(); + } +#endif +} + #endif diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 773caf9..46f2247 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -40,6 +40,8 @@ typedef struct CPUState CPUState; /** * CPUClass: + * @class_by_name: Callback to map -cpu command line model name to an + * instantiatable CPU type. * @reset: Callback to reset the #CPUState to its initial state. * * Represents a CPU family or model. @@ -49,6 +51,8 @@ typedef struct CPUClass { DeviceClass parent_class; /*< public >*/ + ObjectClass *(*class_by_name)(const char *cpu_model); + void (*reset)(CPUState *cpu); } CPUClass; @@ -89,10 +93,8 @@ struct CPUState { bool stop; bool stopped; -#if !defined(CONFIG_USER_ONLY) int kvm_fd; bool kvm_vcpu_dirty; -#endif struct KVMState *kvm_state; struct kvm_run *kvm_run; @@ -108,6 +110,17 @@ struct CPUState { void cpu_reset(CPUState *cpu); /** + * cpu_class_by_name: + * @typename: The CPU base type. + * @cpu_model: The model string without any parameters. + * + * Looks up a CPU #ObjectClass matching name @cpu_model. + * + * Returns: A #CPUClass or %NULL if not matching class is found. + */ +ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model); + +/** * qemu_cpu_has_work: * @cpu: The vCPU to check. * diff --git a/include/qom/object.h b/include/qom/object.h index 8e16ea8..48e80ba 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -691,6 +691,14 @@ ObjectClass *object_class_get_parent(ObjectClass *klass); const char *object_class_get_name(ObjectClass *klass); /** + * object_class_is_abstract: + * @klass: The class to obtain the abstractness for. + * + * Returns: %true if @klass is abstract, %false otherwise. + */ +bool object_class_is_abstract(ObjectClass *klass); + +/** * object_class_by_name: * @typename: The QOM typename to obtain the class for. * diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index 81bd817..f7f6854 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -13,9 +13,16 @@ void cpu_synchronize_all_post_init(void); void qtest_clock_warp(int64_t dest); +#ifndef CONFIG_USER_ONLY /* vl.c */ extern int smp_cores; extern int smp_threads; +#else +/* *-user doesn't have configurable SMP topology */ +#define smp_cores 1 +#define smp_threads 1 +#endif + void set_numa_modes(void); void set_cpu_log(const char *optarg); void set_cpu_log_filename(const char *optarg); diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 6bdd513..6e6dfb3 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -36,6 +36,7 @@ #define KVM_FEATURE_ASYNC_PF 0 #define KVM_FEATURE_STEAL_TIME 0 #define KVM_FEATURE_PV_EOI 0 +#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 0 #endif extern int kvm_allowed; @@ -158,7 +159,7 @@ int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap); int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset); #endif -int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr); +int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); int kvm_on_sigbus(int code, void *addr); /* internal API */ @@ -195,6 +196,9 @@ int kvm_arch_init(KVMState *s); int kvm_arch_init_vcpu(CPUState *cpu); +/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */ +unsigned long kvm_arch_vcpu_id(CPUState *cpu); + void kvm_arch_reset_vcpu(CPUState *cpu); int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); @@ -222,7 +222,7 @@ int kvm_init_vcpu(CPUState *cpu) DPRINTF("kvm_init_vcpu\n"); - ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, cpu->cpu_index); + ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)kvm_arch_vcpu_id(cpu)); if (ret < 0) { DPRINTF("kvm_create_vcpu failed\n"); goto err; @@ -2026,9 +2026,8 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign) return 0; } -int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr) +int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { - CPUState *cpu = ENV_GET_CPU(env); return kvm_arch_on_sigbus_vcpu(cpu, code, addr); } @@ -112,7 +112,7 @@ int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign, uint return -ENOSYS; } -int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr) +int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { return 1; } diff --git a/qapi-schema.json b/qapi-schema.json index 6d7252b..3a4817b 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -325,6 +325,80 @@ { 'command': 'query-chardev', 'returns': ['ChardevInfo'] } ## +# @DataFormat: +# +# An enumeration of data format. +# +# @utf8: The data format is 'utf8'. +# +# @base64: The data format is 'base64'. +# +# Since: 1.4 +## +{ 'enum': 'DataFormat' + 'data': [ 'utf8', 'base64' ] } + +## +# @memchar-write: +# +# Provide writing interface for memchardev. Write data to char +# device 'memory'. +# +# @device: the name of the memory char device. +# +# @size: the size to write in bytes. +# +# @data: the source data write to memchar. +# +# @format: #optional the format of the data write to chardev 'memory', +# by default is 'utf8'. +# +# Returns: Nothing on success +# If @device is not a valid char device, DeviceNotFound +# +# Since: 1.4 +## +{ 'command': 'memchar-write', + 'data': {'device': 'str', 'size': 'int', 'data': 'str', + '*format': 'DataFormat'} } + +## +# @MemCharRead +# +# Result of QMP command memchar-read. +# +# @data: The data read from memchar as string. +# +# @count: The numbers of bytes read from. +# +# Since: 1.4 +## +{ 'type': 'MemCharRead', + 'data': { 'data': 'str', 'count': 'int' } } + +## +# @memchar-read: +# +# Provide read interface for memchardev. Read from the char +# device 'memory' and return the data. +# +# @device: the name of the memory char device. +# +# @size: the size to read in bytes. +# +# @format: #optional the format of the data want to read from +# memchardev, by default is 'utf8'. +# +# Returns: @MemCharRead +# If @device is not a valid memchr device, DeviceNotFound +# +# Since: 1.4 +## +{ 'command': 'memchar-read', + 'data': {'device': 'str', 'size': 'int', '*format': 'DataFormat'}, + 'returns': 'MemCharRead' } + +## # @CommandInfo: # # Information about a QMP command @@ -667,10 +741,12 @@ # # @count: number of dirty bytes according to the dirty bitmap # +# @granularity: granularity of the dirty bitmap in bytes (since 1.4) +# # Since: 1.3 ## { 'type': 'BlockDirtyInfo', - 'data': {'count': 'int'} } + 'data': {'count': 'int', 'granularity': 'int'} } ## # @BlockInfo: @@ -977,28 +1053,10 @@ # # @actual: the number of bytes the balloon currently contains # -# @mem_swapped_in: #optional number of pages swapped in within the guest -# -# @mem_swapped_out: #optional number of pages swapped out within the guest -# -# @major_page_faults: #optional number of major page faults within the guest -# -# @minor_page_faults: #optional number of minor page faults within the guest -# -# @free_mem: #optional amount of memory (in bytes) free in the guest -# -# @total_mem: #optional amount of memory (in bytes) visible to the guest -# # Since: 0.14.0 # -# Notes: all current versions of QEMU do not fill out optional information in -# this structure. ## -{ 'type': 'BalloonInfo', - 'data': {'actual': 'int', '*mem_swapped_in': 'int', - '*mem_swapped_out': 'int', '*major_page_faults': 'int', - '*minor_page_faults': 'int', '*free_mem': 'int', - '*total_mem': 'int'} } +{ 'type': 'BalloonInfo', 'data': {'actual': 'int' } } ## # @query-balloon: @@ -1634,6 +1692,14 @@ # (all the disk, only the sectors allocated in the topmost image, or # only new I/O). # +# @granularity: #optional granularity of the dirty bitmap, default is 64K +# if the image format doesn't have clusters, 4K if the clusters +# are smaller than that, else the cluster size. Must be a +# power of 2 between 512 and 64M (since 1.4). +# +# @buf-size: #optional maximum amount of data in flight from source to +# target (since 1.4). +# # @on-source-error: #optional the action to take on an error on the source, # default 'report'. 'stop' and 'enospc' can only be used # if the block device supports io-status (see BlockInfo). @@ -1650,7 +1716,8 @@ { 'command': 'drive-mirror', 'data': { 'device': 'str', 'target': 'str', '*format': 'str', 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', - '*speed': 'int', '*on-source-error': 'BlockdevOnError', + '*speed': 'int', '*granularity': 'uint32', + '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError' } } ## diff --git a/qemu-char.c b/qemu-char.c index da1db1d..ac5d62d 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -98,6 +98,7 @@ #include "ui/qemu-spice.h" #define READ_BUF_LEN 4096 +#define CBUFF_SIZE 65536 /***********************************************************/ /* character device */ @@ -2643,6 +2644,199 @@ size_t qemu_chr_mem_osize(const CharDriverState *chr) return d->outbuf_size; } +/*********************************************************/ +/*CircularMemory chardev*/ + +typedef struct { + size_t size; + size_t prod; + size_t cons; + uint8_t *cbuf; +} CirMemCharDriver; + +static bool cirmem_chr_is_empty(const CharDriverState *chr) +{ + const CirMemCharDriver *d = chr->opaque; + + return d->cons == d->prod; +} + +static size_t qemu_chr_cirmem_count(const CharDriverState *chr) +{ + const CirMemCharDriver *d = chr->opaque; + + return (d->prod - d->cons); +} + +static int cirmem_chr_write(CharDriverState *chr, const uint8_t *buf, int len) +{ + CirMemCharDriver *d = chr->opaque; + int i; + + if (!buf || (len < 0)) { + return -1; + } + + for (i = 0; i < len; i++ ) { + /* Avoid writing the IAC information to the queue. */ + if ((unsigned char)buf[i] == IAC) { + continue; + } + + d->cbuf[d->prod++ % d->size] = buf[i]; + if ((d->prod - d->cons) > d->size) { + d->cons = d->prod - d->size; + } + } + + return 0; +} + +static int cirmem_chr_read(CharDriverState *chr, uint8_t *buf, int len) +{ + CirMemCharDriver *d = chr->opaque; + int i; + + for (i = 0; i < len && !cirmem_chr_is_empty(chr); i++) { + buf[i] = d->cbuf[d->cons++ % d->size]; + } + + return i; +} + +static void cirmem_chr_close(struct CharDriverState *chr) +{ + CirMemCharDriver *d = chr->opaque; + + g_free(d->cbuf); + g_free(d); + chr->opaque = NULL; +} + +static CharDriverState *qemu_chr_open_cirmemchr(QemuOpts *opts) +{ + CharDriverState *chr; + CirMemCharDriver *d; + + chr = g_malloc0(sizeof(CharDriverState)); + d = g_malloc(sizeof(*d)); + + d->size = qemu_opt_get_number(opts, "maxcapacity", 0); + if (d->size == 0) { + d->size = CBUFF_SIZE; + } + + /* The size must be power of 2 */ + if (d->size & (d->size - 1)) { + fprintf(stderr, "chardev: size of memory device must be power of 2\n"); + goto fail; + } + + d->prod = 0; + d->cons = 0; + d->cbuf = g_malloc0(d->size); + + chr->opaque = d; + chr->chr_write = cirmem_chr_write; + chr->chr_close = cirmem_chr_close; + + return chr; + +fail: + g_free(d); + g_free(chr); + return NULL; +} + +static bool qemu_is_chr(const CharDriverState *chr, const char *filename) +{ + return strcmp(chr->filename, filename); +} + +void qmp_memchar_write(const char *device, int64_t size, + const char *data, bool has_format, + enum DataFormat format, + Error **errp) +{ + CharDriverState *chr; + guchar *write_data; + int ret; + gsize write_count; + + chr = qemu_chr_find(device); + if (!chr) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + + if (qemu_is_chr(chr, "memory")) { + error_setg(errp,"%s is not memory char device", device); + return; + } + + write_count = (gsize)size; + + if (has_format && (format == DATA_FORMAT_BASE64)) { + write_data = g_base64_decode(data, &write_count); + } else { + write_data = (uint8_t *)data; + } + + ret = cirmem_chr_write(chr, write_data, write_count); + + if (ret < 0) { + error_setg(errp, "Failed to write to device %s", device); + return; + } +} + +MemCharRead *qmp_memchar_read(const char *device, int64_t size, + bool has_format, enum DataFormat format, + Error **errp) +{ + CharDriverState *chr; + guchar *read_data; + MemCharRead *meminfo; + size_t count; + + chr = qemu_chr_find(device); + if (!chr) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return NULL; + } + + if (qemu_is_chr(chr, "memory")) { + error_setg(errp,"%s is not memory char device", device); + return NULL; + } + + if (size <= 0) { + error_setg(errp, "size must be greater than zero"); + return NULL; + } + + meminfo = g_malloc0(sizeof(MemCharRead)); + + count = qemu_chr_cirmem_count(chr); + if (count == 0) { + meminfo->data = g_strdup(""); + return meminfo; + } + + size = size > count ? count : size; + read_data = g_malloc0(size + 1); + + meminfo->count = cirmem_chr_read(chr, read_data, size); + + if (has_format && (format == DATA_FORMAT_BASE64)) { + meminfo->data = g_base64_encode(read_data, (size_t)meminfo->count); + } else { + meminfo->data = (char *)read_data; + } + + return meminfo; +} + QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename) { char host[65], port[33], width[8], height[8]; @@ -2697,6 +2891,11 @@ QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename) qemu_opt_set(opts, "path", filename); return opts; } + if (strstart(filename, "memory", &p)) { + qemu_opt_set(opts, "backend", "memory"); + qemu_opt_set(opts, "maxcapacity", p); + return opts; + } if (strstart(filename, "file:", &p)) { qemu_opt_set(opts, "backend", "file"); qemu_opt_set(opts, "path", p); @@ -2796,6 +2995,7 @@ static const struct { { .name = "udp", .open = qemu_chr_open_udp }, { .name = "msmouse", .open = qemu_chr_open_msmouse }, { .name = "vc", .open = text_console_init }, + { .name = "memory", .open = qemu_chr_open_cirmemchr }, #ifdef _WIN32 { .name = "file", .open = qemu_chr_open_win_file_out }, { .name = "pipe", .open = qemu_chr_open_win_pipe }, @@ -3055,6 +3255,9 @@ QemuOptsList qemu_chardev_opts = { },{ .name = "debug", .type = QEMU_OPT_NUMBER, + },{ + .name = "maxcapacity", + .type = QEMU_OPT_NUMBER, }, { /* end of list */ } }, diff --git a/qemu-options.hx b/qemu-options.hx index 4e2b499..2d44137 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1736,6 +1736,7 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev, "-chardev msmouse,id=id[,mux=on|off]\n" "-chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]]\n" " [,mux=on|off]\n" + "-chardev memory,id=id,maxcapacity=maxcapacity\n" "-chardev file,id=id,path=path[,mux=on|off]\n" "-chardev pipe,id=id,path=path[,mux=on|off]\n" #ifdef _WIN32 @@ -1777,6 +1778,7 @@ Backend is one of: @option{udp}, @option{msmouse}, @option{vc}, +@option{memory}, @option{file}, @option{pipe}, @option{console}, @@ -1885,6 +1887,14 @@ the console, in pixels. @option{cols} and @option{rows} specify that the console be sized to fit a text console with the given dimensions. +@item -chardev memory ,id=@var{id} ,maxcapacity=@var{maxcapacity} + +Create a circular buffer with fixed size indicated by optionally @option{maxcapacity} +which will be default 64K if it is not given. + +@option{maxcapacity} specifies the max capacity of the size of circular buffer +to create. Should be power of 2. + @item -chardev file ,id=@var{id} ,path=@var{path} Log all traffic received from the guest to a file. diff --git a/qga/commands-posix.c b/qga/commands-posix.c index 0ad73f3..7a0202e 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -611,13 +611,14 @@ int64_t qmp_guest_fsfreeze_thaw(Error **err) static void guest_fsfreeze_cleanup(void) { - int64_t ret; Error *err = NULL; if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) { - ret = qmp_guest_fsfreeze_thaw(&err); - if (ret < 0 || err) { - slog("failed to clean up frozen filesystems"); + qmp_guest_fsfreeze_thaw(&err); + if (err) { + slog("failed to clean up frozen filesystems: %s", + error_get_pretty(err)); + error_free(err); } } } @@ -934,9 +935,11 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp) error_setg_errno(errp, errno, "failed to get MAC address of %s", ifa->ifa_name); + close(sock); goto error; } + close(sock); mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data; info->value->hardware_address = @@ -946,20 +949,19 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp) (int) mac_addr[4], (int) mac_addr[5]); info->value->has_hardware_address = true; - close(sock); } if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { /* interface with IPv4 address */ - address_item = g_malloc0(sizeof(*address_item)); - address_item->value = g_malloc0(sizeof(*address_item->value)); p = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; if (!inet_ntop(AF_INET, p, addr4, sizeof(addr4))) { error_setg_errno(errp, errno, "inet_ntop failed"); goto error; } + address_item = g_malloc0(sizeof(*address_item)); + address_item->value = g_malloc0(sizeof(*address_item->value)); address_item->value->ip_address = g_strdup(addr4); address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV4; @@ -972,14 +974,14 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp) } else if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) { /* interface with IPv6 address */ - address_item = g_malloc0(sizeof(*address_item)); - address_item->value = g_malloc0(sizeof(*address_item->value)); p = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr; if (!inet_ntop(AF_INET6, p, addr6, sizeof(addr6))) { error_setg_errno(errp, errno, "inet_ntop failed"); goto error; } + address_item = g_malloc0(sizeof(*address_item)); + address_item->value = g_malloc0(sizeof(*address_item->value)); address_item->value->ip_address = g_strdup(addr6); address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV6; diff --git a/qmp-commands.hx b/qmp-commands.hx index cbf1280..f58a841 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -466,6 +466,72 @@ Note: inject-nmi fails when the guest doesn't support injecting. EQMP { + .name = "memchar-write", + .args_type = "device:s,size:i,data:s,format:s?", + .mhandler.cmd_new = qmp_marshal_input_memchar_write, + }, + +SQMP +memchar-write +------------- + +Provide writing interface for CirMemCharDriver. Write data to memory +char device. + +Arguments: + +- "device": the name of the char device, must be unique (json-string) +- "size": the memory size, in bytes, should be power of 2 (json-int) +- "data": the source data write to memory (json-string) +- "format": the data format write to memory, default is + utf8. (json-string, optional) + - Possible values: "utf8", "base64" + +Example: + +-> { "execute": "memchar-write", + "arguments": { "device": foo, + "size": 8, + "data": "abcdefgh", + "format": "utf8" } } +<- { "return": {} } + +EQMP + + { + .name = "memchar-read", + .args_type = "device:s,size:i,format:s?", + .mhandler.cmd_new = qmp_marshal_input_memchar_read, + }, + +SQMP +memchar-read +------------- + +Provide read interface for CirMemCharDriver. Read from the char +device memory and return the data with size. + +Arguments: + +- "device": the name of the char device, must be unique (json-string) +- "size": the memory size wanted to read in bytes (refer to unencoded + size of the raw data), would adjust to the init size of the + memchar if the requested size is larger than it. (json-int) +- "format": the data format write to memchardev, default is + utf8. (json-string, optional) + - Possible values: "utf8", "base64" + +Example: + +-> { "execute": "memchar-read", + "arguments": { "device": foo, + "size": 1000, + "format": "utf8" } } +<- { "return": { "data": "data string...", "count": 1000 } } + +EQMP + + { .name = "xen-save-devices-state", .args_type = "filename:F", .mhandler.cmd_new = qmp_marshal_input_xen_save_devices_state, @@ -938,7 +1004,8 @@ EQMP { .name = "drive-mirror", .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," - "on-source-error:s?,on-target-error:s?", + "on-source-error:s?,on-target-error:s?," + "granularity:i?,buf-size:i?", .mhandler.cmd_new = qmp_marshal_input_drive_mirror, }, @@ -962,6 +1029,9 @@ Arguments: file/device (NewImageMode, optional, default 'absolute-paths') - "speed": maximum speed of the streaming job, in bytes per second (json-int) +- "granularity": granularity of the dirty bitmap, in bytes (json-int, optional) +- "buf_size": maximum amount of data in flight from source to target, in bytes + (json-int, default 10M) - "sync": what parts of the disk image should be copied to the destination; possibilities include "full" for all the disk, "top" for only the sectors allocated in the topmost image, or "none" to only replicate new I/O @@ -971,6 +1041,10 @@ Arguments: - "on-target-error": the action to take on an error on the target (BlockdevOnError, default 'report') +The default value of the granularity is the image cluster size clamped +between 4096 and 65536, if the image format defines one. If the format +does not define a cluster size, the default value of the granularity +is 65536. Example: @@ -2549,13 +2623,6 @@ Make an asynchronous request for balloon info. When the request completes a json-object will be returned containing the following data: - "actual": current balloon value in bytes (json-int) -- "mem_swapped_in": Amount of memory swapped in bytes (json-int, optional) -- "mem_swapped_out": Amount of memory swapped out in bytes (json-int, optional) -- "major_page_faults": Number of major faults (json-int, optional) -- "minor_page_faults": Number of minor faults (json-int, optional) -- "free_mem": Total amount of free and unused memory in - bytes (json-int, optional) -- "total_mem": Total amount of available memory in bytes (json-int, optional) Example: @@ -2563,12 +2630,6 @@ Example: <- { "return":{ "actual":1073741824, - "mem_swapped_in":0, - "mem_swapped_out":0, - "major_page_faults":142, - "minor_page_faults":239245, - "free_mem":1014185984, - "total_mem":1044668416 } } @@ -34,11 +34,24 @@ static void cpu_common_reset(CPUState *cpu) { } +ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model) +{ + CPUClass *cc = CPU_CLASS(object_class_by_name(typename)); + + return cc->class_by_name(cpu_model); +} + +static ObjectClass *cpu_common_class_by_name(const char *cpu_model) +{ + return NULL; +} + static void cpu_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); CPUClass *k = CPU_CLASS(klass); + k->class_by_name = cpu_common_class_by_name; k->reset = cpu_common_reset; dc->no_user = 1; } diff --git a/qom/object.c b/qom/object.c index 03e6f24..e200282 100644 --- a/qom/object.c +++ b/qom/object.c @@ -501,6 +501,11 @@ ObjectClass *object_get_class(Object *obj) return obj->class; } +bool object_class_is_abstract(ObjectClass *klass) +{ + return klass->type->abstract; +} + const char *object_class_get_name(ObjectClass *klass) { return klass->type->name; diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index cbe6440..0b23f77 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -147,6 +147,7 @@ controls = [ 5: 'Enable VPID', 6: 'WBINVD exiting', 7: 'Unrestricted guest', + 9: 'Virtual interrupt delivery', 10: 'PAUSE-loop exiting', 11: 'RDRAND exiting', 12: 'Enable INVPCID', diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c index 40e9809..0ad69f0 100644 --- a/target-alpha/cpu.c +++ b/target-alpha/cpu.c @@ -96,14 +96,15 @@ static ObjectClass *alpha_cpu_class_by_name(const char *cpu_model) } oc = object_class_by_name(cpu_model); - if (oc != NULL) { + if (oc != NULL && object_class_dynamic_cast(oc, TYPE_ALPHA_CPU) != NULL && + !object_class_is_abstract(oc)) { return oc; } for (i = 0; i < ARRAY_SIZE(alpha_cpu_aliases); i++) { if (strcmp(cpu_model, alpha_cpu_aliases[i].alias) == 0) { oc = object_class_by_name(alpha_cpu_aliases[i].typename); - assert(oc != NULL); + assert(oc != NULL && !object_class_is_abstract(oc)); return oc; } } @@ -111,6 +112,9 @@ static ObjectClass *alpha_cpu_class_by_name(const char *cpu_model) typename = g_strdup_printf("%s-" TYPE_ALPHA_CPU, cpu_model); oc = object_class_by_name(typename); g_free(typename); + if (oc != NULL && object_class_is_abstract(oc)) { + oc = NULL; + } return oc; } @@ -244,6 +248,13 @@ static void alpha_cpu_initfn(Object *obj) env->fen = 1; } +static void alpha_cpu_class_init(ObjectClass *oc, void *data) +{ + CPUClass *cc = CPU_CLASS(oc); + + cc->class_by_name = alpha_cpu_class_by_name; +} + static const TypeInfo alpha_cpu_type_info = { .name = TYPE_ALPHA_CPU, .parent = TYPE_CPU, @@ -251,6 +262,7 @@ static const TypeInfo alpha_cpu_type_info = { .instance_init = alpha_cpu_initfn, .abstract = true, .class_size = sizeof(AlphaCPUClass), + .class_init = alpha_cpu_class_init, }; static void alpha_cpu_register_types(void) diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 07588a1..d1a4c82 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -201,6 +201,22 @@ void arm_cpu_realize(ARMCPU *cpu) /* CPU models */ +static ObjectClass *arm_cpu_class_by_name(const char *cpu_model) +{ + ObjectClass *oc; + + if (!cpu_model) { + return NULL; + } + + oc = object_class_by_name(cpu_model); + if (!oc || !object_class_dynamic_cast(oc, TYPE_ARM_CPU) || + object_class_is_abstract(oc)) { + return NULL; + } + return oc; +} + static void arm926_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); @@ -766,6 +782,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) acc->parent_reset = cc->reset; cc->reset = arm_cpu_reset; + + cc->class_by_name = arm_cpu_class_by_name; } static void cpu_register(const ARMCPUInfo *info) diff --git a/target-arm/helper.c b/target-arm/helper.c index 37c34a1..7a10fdd 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -1262,12 +1262,14 @@ ARMCPU *cpu_arm_init(const char *cpu_model) { ARMCPU *cpu; CPUARMState *env; + ObjectClass *oc; static int inited = 0; - if (!object_class_by_name(cpu_model)) { + oc = cpu_class_by_name(TYPE_ARM_CPU, cpu_model); + if (!oc) { return NULL; } - cpu = ARM_CPU(object_new(cpu_model)); + cpu = ARM_CPU(object_new(object_class_get_name(oc))); env = &cpu->env; env->cpu_model_str = cpu_model; arm_cpu_realize(cpu); diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 376d4c8..5c108e1 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -23,6 +23,8 @@ #include "cpu.h" #include "sysemu/kvm.h" +#include "sysemu/cpus.h" +#include "topology.h" #include "qemu/option.h" #include "qemu/config-file.h" @@ -45,6 +47,18 @@ #include "hw/apic_internal.h" #endif +static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, + uint32_t vendor2, uint32_t vendor3) +{ + int i; + for (i = 0; i < 4; i++) { + dst[i] = vendor1 >> (8 * i); + dst[i + 4] = vendor2 >> (8 * i); + dst[i + 8] = vendor3 >> (8 * i); + } + dst[CPUID_VENDOR_SZ] = '\0'; +} + /* feature flags taken from "Intel Processor Identification and the CPUID * Instruction" and AMD's "CPUID Specification". In cases of disagreement * between feature naming conventions, aliases may be added. @@ -206,22 +220,17 @@ typedef struct model_features_t { int check_cpuid = 0; int enforce_cpuid = 0; -#if defined(CONFIG_KVM) static uint32_t kvm_default_features = (1 << KVM_FEATURE_CLOCKSOURCE) | (1 << KVM_FEATURE_NOP_IO_DELAY) | (1 << KVM_FEATURE_CLOCKSOURCE2) | (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_STEAL_TIME) | + (1 << KVM_FEATURE_PV_EOI) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); -static const uint32_t kvm_pv_eoi_features = (0x1 << KVM_FEATURE_PV_EOI); -#else -static uint32_t kvm_default_features = 0; -static const uint32_t kvm_pv_eoi_features = 0; -#endif -void enable_kvm_pv_eoi(void) +void disable_kvm_pv_eoi(void) { - kvm_default_features |= kvm_pv_eoi_features; + kvm_default_features &= ~(1UL << KVM_FEATURE_PV_EOI); } void host_cpuid(uint32_t function, uint32_t count, @@ -338,19 +347,17 @@ static void add_flagname_to_bitmaps(const char *flagname, } typedef struct x86_def_t { - struct x86_def_t *next; const char *name; uint32_t level; - uint32_t vendor1, vendor2, vendor3; + /* vendor is zero-terminated, 12 character ASCII string */ + char vendor[CPUID_VENDOR_SZ + 1]; int family; int model; int stepping; - int tsc_khz; uint32_t features, ext_features, ext2_features, ext3_features; uint32_t kvm_features, svm_features; uint32_t xlevel; char model_id[48]; - int vendor_override; /* Store the results of Centaur's CPUID instructions */ uint32_t ext4_features; uint32_t xlevel2; @@ -396,19 +403,13 @@ typedef struct x86_def_t { #define TCG_SVM_FEATURES 0 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP) -/* maintains list of cpu model definitions - */ -static x86_def_t *x86_defs = {NULL}; - -/* built-in cpu model definitions (deprecated) +/* built-in CPU model definitions */ static x86_def_t builtin_x86_defs[] = { { .name = "qemu64", .level = 4, - .vendor1 = CPUID_VENDOR_AMD_1, - .vendor2 = CPUID_VENDOR_AMD_2, - .vendor3 = CPUID_VENDOR_AMD_3, + .vendor = CPUID_VENDOR_AMD, .family = 6, .model = 2, .stepping = 3, @@ -425,9 +426,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "phenom", .level = 5, - .vendor1 = CPUID_VENDOR_AMD_1, - .vendor2 = CPUID_VENDOR_AMD_2, - .vendor3 = CPUID_VENDOR_AMD_3, + .vendor = CPUID_VENDOR_AMD, .family = 16, .model = 2, .stepping = 3, @@ -453,9 +452,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "core2duo", .level = 10, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 15, .stepping = 11, @@ -474,9 +471,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "kvm64", .level = 5, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 15, .model = 6, .stepping = 1, @@ -500,9 +495,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "qemu32", .level = 4, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 3, .stepping = 3, @@ -513,9 +506,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "kvm32", .level = 5, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 15, .model = 6, .stepping = 1, @@ -530,9 +521,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "coreduo", .level = 10, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 14, .stepping = 8, @@ -548,9 +537,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "486", .level = 1, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 4, .model = 0, .stepping = 0, @@ -560,9 +547,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "pentium", .level = 1, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 5, .model = 4, .stepping = 3, @@ -572,9 +557,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "pentium2", .level = 2, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 5, .stepping = 2, @@ -584,9 +567,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "pentium3", .level = 2, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 7, .stepping = 3, @@ -596,9 +577,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "athlon", .level = 2, - .vendor1 = CPUID_VENDOR_AMD_1, - .vendor2 = CPUID_VENDOR_AMD_2, - .vendor3 = CPUID_VENDOR_AMD_3, + .vendor = CPUID_VENDOR_AMD, .family = 6, .model = 2, .stepping = 3, @@ -612,9 +591,7 @@ static x86_def_t builtin_x86_defs[] = { .name = "n270", /* original is on level 10 */ .level = 5, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 28, .stepping = 2, @@ -633,9 +610,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "Conroe", .level = 2, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 2, .stepping = 3, @@ -653,9 +628,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "Penryn", .level = 2, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 2, .stepping = 3, @@ -674,9 +647,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "Nehalem", .level = 2, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 2, .stepping = 3, @@ -695,9 +666,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "Westmere", .level = 11, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 44, .stepping = 1, @@ -717,9 +686,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "SandyBridge", .level = 0xd, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 42, .stepping = 1, @@ -742,9 +709,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "Haswell", .level = 0xd, - .vendor1 = CPUID_VENDOR_INTEL_1, - .vendor2 = CPUID_VENDOR_INTEL_2, - .vendor3 = CPUID_VENDOR_INTEL_3, + .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 60, .stepping = 1, @@ -772,9 +737,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "Opteron_G1", .level = 5, - .vendor1 = CPUID_VENDOR_AMD_1, - .vendor2 = CPUID_VENDOR_AMD_2, - .vendor3 = CPUID_VENDOR_AMD_3, + .vendor = CPUID_VENDOR_AMD, .family = 15, .model = 6, .stepping = 1, @@ -796,9 +759,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "Opteron_G2", .level = 5, - .vendor1 = CPUID_VENDOR_AMD_1, - .vendor2 = CPUID_VENDOR_AMD_2, - .vendor3 = CPUID_VENDOR_AMD_3, + .vendor = CPUID_VENDOR_AMD, .family = 15, .model = 6, .stepping = 1, @@ -822,9 +783,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "Opteron_G3", .level = 5, - .vendor1 = CPUID_VENDOR_AMD_1, - .vendor2 = CPUID_VENDOR_AMD_2, - .vendor3 = CPUID_VENDOR_AMD_3, + .vendor = CPUID_VENDOR_AMD, .family = 15, .model = 6, .stepping = 1, @@ -850,9 +809,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "Opteron_G4", .level = 0xd, - .vendor1 = CPUID_VENDOR_AMD_1, - .vendor2 = CPUID_VENDOR_AMD_2, - .vendor3 = CPUID_VENDOR_AMD_3, + .vendor = CPUID_VENDOR_AMD, .family = 21, .model = 1, .stepping = 2, @@ -882,9 +839,7 @@ static x86_def_t builtin_x86_defs[] = { { .name = "Opteron_G5", .level = 0xd, - .vendor1 = CPUID_VENDOR_AMD_1, - .vendor2 = CPUID_VENDOR_AMD_2, - .vendor3 = CPUID_VENDOR_AMD_3, + .vendor = CPUID_VENDOR_AMD, .family = 21, .model = 2, .stepping = 0, @@ -945,9 +900,7 @@ static void kvm_cpu_fill_host(x86_def_t *x86_cpu_def) x86_cpu_def->name = "host"; host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx); - x86_cpu_def->vendor1 = ebx; - x86_cpu_def->vendor2 = edx; - x86_cpu_def->vendor3 = ecx; + x86_cpu_vendor_words2str(x86_cpu_def->vendor, ebx, edx, ecx); host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx); x86_cpu_def->family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); @@ -972,12 +925,9 @@ static void kvm_cpu_fill_host(x86_def_t *x86_cpu_def) kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_ECX); cpu_x86_fill_model_id(x86_cpu_def->model_id); - x86_cpu_def->vendor_override = 0; /* Call Centaur's CPUID instruction. */ - if (x86_cpu_def->vendor1 == CPUID_VENDOR_VIA_1 && - x86_cpu_def->vendor2 == CPUID_VENDOR_VIA_2 && - x86_cpu_def->vendor3 == CPUID_VENDOR_VIA_3) { + if (!strcmp(x86_cpu_def->vendor, CPUID_VENDOR_VIA)) { host_cpuid(0xC0000000, 0, &eax, &ebx, &ecx, &edx); eax = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX); if (eax >= 0xC0000001) { @@ -1213,15 +1163,10 @@ static char *x86_cpuid_get_vendor(Object *obj, Error **errp) X86CPU *cpu = X86_CPU(obj); CPUX86State *env = &cpu->env; char *value; - int i; value = (char *)g_malloc(CPUID_VENDOR_SZ + 1); - for (i = 0; i < 4; i++) { - value[i ] = env->cpuid_vendor1 >> (8 * i); - value[i + 4] = env->cpuid_vendor2 >> (8 * i); - value[i + 8] = env->cpuid_vendor3 >> (8 * i); - } - value[CPUID_VENDOR_SZ] = '\0'; + x86_cpu_vendor_words2str(value, env->cpuid_vendor1, env->cpuid_vendor2, + env->cpuid_vendor3); return value; } @@ -1246,7 +1191,6 @@ static void x86_cpuid_set_vendor(Object *obj, const char *value, env->cpuid_vendor2 |= ((uint8_t)value[i + 4]) << (8 * i); env->cpuid_vendor3 |= ((uint8_t)value[i + 8]) << (8 * i); } - env->cpuid_vendor_override = 1; } static char *x86_cpuid_get_model_id(Object *obj, Error **errp) @@ -1320,34 +1264,50 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor *v, void *opaque, static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *name) { x86_def_t *def; + int i; - for (def = x86_defs; def; def = def->next) { - if (name && !strcmp(name, def->name)) { - break; - } + if (name == NULL) { + return -1; } - if (kvm_enabled() && name && strcmp(name, "host") == 0) { + if (kvm_enabled() && strcmp(name, "host") == 0) { kvm_cpu_fill_host(x86_cpu_def); - } else if (!def) { - return -1; - } else { - memcpy(x86_cpu_def, def, sizeof(*def)); + return 0; } - return 0; + for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) { + def = &builtin_x86_defs[i]; + if (strcmp(name, def->name) == 0) { + memcpy(x86_cpu_def, def, sizeof(*def)); + /* sysenter isn't supported in compatibility mode on AMD, + * syscall isn't supported in compatibility mode on Intel. + * Normally we advertise the actual CPU vendor, but you can + * override this using the 'vendor' property if you want to use + * KVM's sysenter/syscall emulation in compatibility mode and + * when doing cross vendor migration + */ + if (kvm_enabled()) { + uint32_t ebx = 0, ecx = 0, edx = 0; + host_cpuid(0, 0, NULL, &ebx, &ecx, &edx); + x86_cpu_vendor_words2str(x86_cpu_def->vendor, ebx, edx, ecx); + } + return 0; + } + } + + return -1; } /* Parse "+feature,-feature,feature=foo" CPU feature string */ -static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features) +static void cpu_x86_parse_featurestr(X86CPU *cpu, char *features, Error **errp) { - unsigned int i; char *featurestr; /* Single 'key=value" string being parsed */ /* Features to be added */ FeatureWordArray plus_features = { 0 }; /* Features to be removed */ FeatureWordArray minus_features = { 0 }; uint32_t numvalue; + CPUX86State *env = &cpu->env; featurestr = features ? strtok(features, ",") : NULL; @@ -1360,87 +1320,57 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features) } else if ((val = strchr(featurestr, '='))) { *val = 0; val++; if (!strcmp(featurestr, "family")) { - char *err; - numvalue = strtoul(val, &err, 0); - if (!*val || *err || numvalue > 0xff + 0xf) { - fprintf(stderr, "bad numerical value %s\n", val); - goto error; - } - x86_cpu_def->family = numvalue; + object_property_parse(OBJECT(cpu), val, featurestr, errp); } else if (!strcmp(featurestr, "model")) { - char *err; - numvalue = strtoul(val, &err, 0); - if (!*val || *err || numvalue > 0xff) { - fprintf(stderr, "bad numerical value %s\n", val); - goto error; - } - x86_cpu_def->model = numvalue; + object_property_parse(OBJECT(cpu), val, featurestr, errp); } else if (!strcmp(featurestr, "stepping")) { - char *err; - numvalue = strtoul(val, &err, 0); - if (!*val || *err || numvalue > 0xf) { - fprintf(stderr, "bad numerical value %s\n", val); - goto error; - } - x86_cpu_def->stepping = numvalue ; + object_property_parse(OBJECT(cpu), val, featurestr, errp); } else if (!strcmp(featurestr, "level")) { - char *err; - numvalue = strtoul(val, &err, 0); - if (!*val || *err) { - fprintf(stderr, "bad numerical value %s\n", val); - goto error; - } - x86_cpu_def->level = numvalue; + object_property_parse(OBJECT(cpu), val, featurestr, errp); } else if (!strcmp(featurestr, "xlevel")) { char *err; + char num[32]; + numvalue = strtoul(val, &err, 0); if (!*val || *err) { - fprintf(stderr, "bad numerical value %s\n", val); - goto error; + error_setg(errp, "bad numerical value %s\n", val); + goto out; } if (numvalue < 0x80000000) { + fprintf(stderr, "xlevel value shall always be >= 0x80000000" + ", fixup will be removed in future versions\n"); numvalue += 0x80000000; } - x86_cpu_def->xlevel = numvalue; + snprintf(num, sizeof(num), "%" PRIu32, numvalue); + object_property_parse(OBJECT(cpu), num, featurestr, errp); } else if (!strcmp(featurestr, "vendor")) { - if (strlen(val) != 12) { - fprintf(stderr, "vendor string must be 12 chars long\n"); - goto error; - } - x86_cpu_def->vendor1 = 0; - x86_cpu_def->vendor2 = 0; - x86_cpu_def->vendor3 = 0; - for(i = 0; i < 4; i++) { - x86_cpu_def->vendor1 |= ((uint8_t)val[i ]) << (8 * i); - x86_cpu_def->vendor2 |= ((uint8_t)val[i + 4]) << (8 * i); - x86_cpu_def->vendor3 |= ((uint8_t)val[i + 8]) << (8 * i); - } - x86_cpu_def->vendor_override = 1; + object_property_parse(OBJECT(cpu), val, featurestr, errp); } else if (!strcmp(featurestr, "model_id")) { - pstrcpy(x86_cpu_def->model_id, sizeof(x86_cpu_def->model_id), - val); + object_property_parse(OBJECT(cpu), val, "model-id", errp); } else if (!strcmp(featurestr, "tsc_freq")) { int64_t tsc_freq; char *err; + char num[32]; tsc_freq = strtosz_suffix_unit(val, &err, STRTOSZ_DEFSUFFIX_B, 1000); if (tsc_freq < 0 || *err) { - fprintf(stderr, "bad numerical value %s\n", val); - goto error; + error_setg(errp, "bad numerical value %s\n", val); + goto out; } - x86_cpu_def->tsc_khz = tsc_freq / 1000; + snprintf(num, sizeof(num), "%" PRId64, tsc_freq); + object_property_parse(OBJECT(cpu), num, "tsc-frequency", errp); } else if (!strcmp(featurestr, "hv_spinlocks")) { char *err; numvalue = strtoul(val, &err, 0); if (!*val || *err) { - fprintf(stderr, "bad numerical value %s\n", val); - goto error; + error_setg(errp, "bad numerical value %s\n", val); + goto out; } hyperv_set_spinlock_retries(numvalue); } else { - fprintf(stderr, "unrecognized feature %s\n", featurestr); - goto error; + error_setg(errp, "unrecognized feature %s\n", featurestr); + goto out; } } else if (!strcmp(featurestr, "check")) { check_cpuid = 1; @@ -1451,31 +1381,34 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features) } else if (!strcmp(featurestr, "hv_vapic")) { hyperv_enable_vapic_recommended(true); } else { - fprintf(stderr, "feature string `%s' not in format (+feature|-feature|feature=xyz)\n", featurestr); - goto error; + error_setg(errp, "feature string `%s' not in format (+feature|" + "-feature|feature=xyz)\n", featurestr); + goto out; + } + if (error_is_set(errp)) { + goto out; } featurestr = strtok(NULL, ","); } - x86_cpu_def->features |= plus_features[FEAT_1_EDX]; - x86_cpu_def->ext_features |= plus_features[FEAT_1_ECX]; - x86_cpu_def->ext2_features |= plus_features[FEAT_8000_0001_EDX]; - x86_cpu_def->ext3_features |= plus_features[FEAT_8000_0001_ECX]; - x86_cpu_def->ext4_features |= plus_features[FEAT_C000_0001_EDX]; - x86_cpu_def->kvm_features |= plus_features[FEAT_KVM]; - x86_cpu_def->svm_features |= plus_features[FEAT_SVM]; - x86_cpu_def->cpuid_7_0_ebx_features |= plus_features[FEAT_7_0_EBX]; - x86_cpu_def->features &= ~minus_features[FEAT_1_EDX]; - x86_cpu_def->ext_features &= ~minus_features[FEAT_1_ECX]; - x86_cpu_def->ext2_features &= ~minus_features[FEAT_8000_0001_EDX]; - x86_cpu_def->ext3_features &= ~minus_features[FEAT_8000_0001_ECX]; - x86_cpu_def->ext4_features &= ~minus_features[FEAT_C000_0001_EDX]; - x86_cpu_def->kvm_features &= ~minus_features[FEAT_KVM]; - x86_cpu_def->svm_features &= ~minus_features[FEAT_SVM]; - x86_cpu_def->cpuid_7_0_ebx_features &= ~minus_features[FEAT_7_0_EBX]; - return 0; + env->cpuid_features |= plus_features[FEAT_1_EDX]; + env->cpuid_ext_features |= plus_features[FEAT_1_ECX]; + env->cpuid_ext2_features |= plus_features[FEAT_8000_0001_EDX]; + env->cpuid_ext3_features |= plus_features[FEAT_8000_0001_ECX]; + env->cpuid_ext4_features |= plus_features[FEAT_C000_0001_EDX]; + env->cpuid_kvm_features |= plus_features[FEAT_KVM]; + env->cpuid_svm_features |= plus_features[FEAT_SVM]; + env->cpuid_7_0_ebx_features |= plus_features[FEAT_7_0_EBX]; + env->cpuid_features &= ~minus_features[FEAT_1_EDX]; + env->cpuid_ext_features &= ~minus_features[FEAT_1_ECX]; + env->cpuid_ext2_features &= ~minus_features[FEAT_8000_0001_EDX]; + env->cpuid_ext3_features &= ~minus_features[FEAT_8000_0001_ECX]; + env->cpuid_ext4_features &= ~minus_features[FEAT_C000_0001_EDX]; + env->cpuid_kvm_features &= ~minus_features[FEAT_KVM]; + env->cpuid_svm_features &= ~minus_features[FEAT_SVM]; + env->cpuid_7_0_ebx_features &= ~minus_features[FEAT_7_0_EBX]; -error: - return -1; +out: + return; } /* generate a composite string into buf of all cpuid names in featureset @@ -1513,8 +1446,10 @@ void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf) { x86_def_t *def; char buf[256]; + int i; - for (def = x86_defs; def; def = def->next) { + for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) { + def = &builtin_x86_defs[i]; snprintf(buf, sizeof(buf), "%s", def->name); (*cpu_fprintf)(f, "x86 %16s %-48s\n", buf, def->model_id); } @@ -1536,11 +1471,13 @@ CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp) { CpuDefinitionInfoList *cpu_list = NULL; x86_def_t *def; + int i; - for (def = x86_defs; def; def = def->next) { + for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) { CpuDefinitionInfoList *entry; CpuDefinitionInfo *info; + def = &builtin_x86_defs[i]; info = g_malloc0(sizeof(*info)); info->name = g_strdup(def->name); @@ -1602,18 +1539,12 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model) goto out; } - def->kvm_features |= kvm_default_features; + if (kvm_enabled()) { + def->kvm_features |= kvm_default_features; + } def->ext_features |= CPUID_EXT_HYPERVISOR; - if (cpu_x86_parse_featurestr(def, features) < 0) { - error_setg(&error, "Invalid cpu_model string format: %s", cpu_model); - goto out; - } - assert(def->vendor1); - env->cpuid_vendor1 = def->vendor1; - env->cpuid_vendor2 = def->vendor2; - env->cpuid_vendor3 = def->vendor3; - env->cpuid_vendor_override = def->vendor_override; + object_property_set_str(OBJECT(cpu), def->vendor, "vendor", &error); object_property_set_int(OBJECT(cpu), def->level, "level", &error); object_property_set_int(OBJECT(cpu), def->family, "family", &error); object_property_set_int(OBJECT(cpu), def->model, "model", &error); @@ -1628,11 +1559,13 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model) env->cpuid_ext4_features = def->ext4_features; env->cpuid_7_0_ebx_features = def->cpuid_7_0_ebx_features; env->cpuid_xlevel2 = def->xlevel2; - object_property_set_int(OBJECT(cpu), (int64_t)def->tsc_khz * 1000, - "tsc-frequency", &error); object_property_set_str(OBJECT(cpu), def->model_id, "model-id", &error); + if (error) { + goto out; + } + cpu_x86_parse_featurestr(cpu, features, &error); out: g_strfreev(model_pieces); if (error) { @@ -1661,7 +1594,6 @@ void x86_cpudef_setup(void) for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); ++i) { x86_def_t *def = &builtin_x86_defs[i]; - def->next = x86_defs; /* Look for specific "cpudef" models that */ /* have the QEMU version in .model_id */ @@ -1674,8 +1606,6 @@ void x86_cpudef_setup(void) break; } } - - x86_defs = def; } } @@ -1685,16 +1615,6 @@ static void get_cpuid_vendor(CPUX86State *env, uint32_t *ebx, *ebx = env->cpuid_vendor1; *edx = env->cpuid_vendor2; *ecx = env->cpuid_vendor3; - - /* sysenter isn't supported on compatibility mode on AMD, syscall - * isn't supported in compatibility mode on Intel. - * Normally we advertise the actual cpu vendor, but you can override - * this if you want to use KVM's sysenter/syscall emulation - * in compatibility mode and when doing cross vendor migration - */ - if (kvm_enabled() && ! env->cpuid_vendor_override) { - host_cpuid(0, 0, NULL, ebx, ecx, edx); - } } void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, @@ -2197,6 +2117,39 @@ void x86_cpu_realize(Object *obj, Error **errp) cpu_reset(CPU(cpu)); } +/* Enables contiguous-apic-ID mode, for compatibility */ +static bool compat_apic_id_mode; + +void enable_compat_apic_id_mode(void) +{ + compat_apic_id_mode = true; +} + +/* Calculates initial APIC ID for a specific CPU index + * + * Currently we need to be able to calculate the APIC ID from the CPU index + * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have + * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of + * all CPUs up to max_cpus. + */ +uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index) +{ + uint32_t correct_id; + static bool warned; + + correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index); + if (compat_apic_id_mode) { + if (cpu_index != correct_id && !warned) { + error_report("APIC IDs set in compatibility mode, " + "CPU topology won't match the configuration"); + warned = true; + } + return cpu_index; + } else { + return correct_id; + } +} + static void x86_cpu_initfn(Object *obj) { CPUState *cs = CPU(obj); @@ -2231,7 +2184,7 @@ static void x86_cpu_initfn(Object *obj) x86_cpuid_get_tsc_freq, x86_cpuid_set_tsc_freq, NULL, NULL, NULL); - env->cpuid_apic_id = cs->cpu_index; + env->cpuid_apic_id = x86_cpu_apic_id_from_index(cs->cpu_index); /* init various static tables used in TCG mode */ if (tcg_enabled() && !inited) { diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 4e091cd..62508dc 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -537,14 +537,14 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */ #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */ #define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */ +#define CPUID_VENDOR_INTEL "GenuineIntel" #define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */ #define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */ #define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */ +#define CPUID_VENDOR_AMD "AuthenticAMD" -#define CPUID_VENDOR_VIA_1 0x746e6543 /* "Cent" */ -#define CPUID_VENDOR_VIA_2 0x48727561 /* "aurH" */ -#define CPUID_VENDOR_VIA_3 0x736c7561 /* "auls" */ +#define CPUID_VENDOR_VIA "CentaurHauls" #define CPUID_MWAIT_IBE (1 << 1) /* Interrupts can exit capability */ #define CPUID_MWAIT_EMX (1 << 0) /* enumeration supported */ @@ -835,7 +835,6 @@ typedef struct CPUX86State { uint32_t cpuid_ext2_features; uint32_t cpuid_ext3_features; uint32_t cpuid_apic_id; - int cpuid_vendor_override; /* Store the results of Centaur's CPUID instructions */ uint32_t cpuid_xlevel2; uint32_t cpuid_ext4_features; @@ -1250,9 +1249,12 @@ void do_smm_enter(CPUX86State *env1); void cpu_report_tpr_access(CPUX86State *env, TPRAccess access); -void enable_kvm_pv_eoi(void); +void disable_kvm_pv_eoi(void); /* Return name of 32-bit register, from a R_* constant */ const char *get_register_name_32(unsigned int reg); +uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index); +void enable_compat_apic_id_mode(void); + #endif /* CPU_I386_H */ diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 3acff40..9ebf181 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -411,11 +411,19 @@ static void cpu_update_state(void *opaque, int running, RunState state) } } +unsigned long kvm_arch_vcpu_id(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + return cpu->env.cpuid_apic_id; +} + +#define KVM_MAX_CPUID_ENTRIES 100 + int kvm_arch_init_vcpu(CPUState *cs) { struct { struct kvm_cpuid2 cpuid; - struct kvm_cpuid_entry2 entries[100]; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; } QEMU_PACKED cpuid_data; X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; @@ -502,6 +510,10 @@ int kvm_arch_init_vcpu(CPUState *cs) cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); for (i = 0; i <= limit; i++) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "unsupported level value: 0x%x\n", limit); + abort(); + } c = &cpuid_data.entries[cpuid_i++]; switch (i) { @@ -516,6 +528,11 @@ int kvm_arch_init_vcpu(CPUState *cs) times = c->eax & 0xff; for (j = 1; j < times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "cpuid_data is full, no space for " + "cpuid(eax:2):eax & 0xf = 0x%x\n", times); + abort(); + } c = &cpuid_data.entries[cpuid_i++]; c->function = i; c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; @@ -544,6 +561,11 @@ int kvm_arch_init_vcpu(CPUState *cs) if (i == 0xd && c->eax == 0) { continue; } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "cpuid_data is full, no space for " + "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); + abort(); + } c = &cpuid_data.entries[cpuid_i++]; } break; @@ -557,6 +579,10 @@ int kvm_arch_init_vcpu(CPUState *cs) cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); for (i = 0x80000000; i <= limit; i++) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit); + abort(); + } c = &cpuid_data.entries[cpuid_i++]; c->function = i; @@ -569,6 +595,10 @@ int kvm_arch_init_vcpu(CPUState *cs) cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); for (i = 0xC0000000; i <= limit; i++) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit); + abort(); + } c = &cpuid_data.entries[cpuid_i++]; c->function = i; diff --git a/target-i386/topology.h b/target-i386/topology.h new file mode 100644 index 0000000..24ed525 --- /dev/null +++ b/target-i386/topology.h @@ -0,0 +1,136 @@ +/* + * x86 CPU topology data structures and functions + * + * Copyright (c) 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef TARGET_I386_TOPOLOGY_H +#define TARGET_I386_TOPOLOGY_H + +/* This file implements the APIC-ID-based CPU topology enumeration logic, + * documented at the following document: + * Intel® 64 Architecture Processor Topology Enumeration + * http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/ + * + * This code should be compatible with AMD's "Extended Method" described at: + * AMD CPUID Specification (Publication #25481) + * Section 3: Multiple Core Calcuation + * as long as: + * nr_threads is set to 1; + * OFFSET_IDX is assumed to be 0; + * CPUID Fn8000_0008_ECX[ApicIdCoreIdSize[3:0]] is set to apicid_core_width(). + */ + +#include <stdint.h> +#include <string.h> + +#include "qemu/bitops.h" + +/* APIC IDs can be 32-bit, but beware: APIC IDs > 255 require x2APIC support + */ +typedef uint32_t apic_id_t; + +/* Return the bit width needed for 'count' IDs + */ +static unsigned apicid_bitwidth_for_count(unsigned count) +{ + g_assert(count >= 1); + if (count == 1) { + return 0; + } + return bitops_flsl(count - 1) + 1; +} + +/* Bit width of the SMT_ID (thread ID) field on the APIC ID + */ +static inline unsigned apicid_smt_width(unsigned nr_cores, unsigned nr_threads) +{ + return apicid_bitwidth_for_count(nr_threads); +} + +/* Bit width of the Core_ID field + */ +static inline unsigned apicid_core_width(unsigned nr_cores, unsigned nr_threads) +{ + return apicid_bitwidth_for_count(nr_cores); +} + +/* Bit offset of the Core_ID field + */ +static inline unsigned apicid_core_offset(unsigned nr_cores, + unsigned nr_threads) +{ + return apicid_smt_width(nr_cores, nr_threads); +} + +/* Bit offset of the Pkg_ID (socket ID) field + */ +static inline unsigned apicid_pkg_offset(unsigned nr_cores, unsigned nr_threads) +{ + return apicid_core_offset(nr_cores, nr_threads) + + apicid_core_width(nr_cores, nr_threads); +} + +/* Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID + * + * The caller must make sure core_id < nr_cores and smt_id < nr_threads. + */ +static inline apic_id_t apicid_from_topo_ids(unsigned nr_cores, + unsigned nr_threads, + unsigned pkg_id, + unsigned core_id, + unsigned smt_id) +{ + return (pkg_id << apicid_pkg_offset(nr_cores, nr_threads)) | + (core_id << apicid_core_offset(nr_cores, nr_threads)) | + smt_id; +} + +/* Calculate thread/core/package IDs for a specific topology, + * based on (contiguous) CPU index + */ +static inline void x86_topo_ids_from_idx(unsigned nr_cores, + unsigned nr_threads, + unsigned cpu_index, + unsigned *pkg_id, + unsigned *core_id, + unsigned *smt_id) +{ + unsigned core_index = cpu_index / nr_threads; + *smt_id = cpu_index % nr_threads; + *core_id = core_index % nr_cores; + *pkg_id = core_index / nr_cores; +} + +/* Make APIC ID for the CPU 'cpu_index' + * + * 'cpu_index' is a sequential, contiguous ID for the CPU. + */ +static inline apic_id_t x86_apicid_from_cpu_idx(unsigned nr_cores, + unsigned nr_threads, + unsigned cpu_index) +{ + unsigned pkg_id, core_id, smt_id; + x86_topo_ids_from_idx(nr_cores, nr_threads, cpu_index, + &pkg_id, &core_id, &smt_id); + return apicid_from_topo_ids(nr_cores, nr_threads, pkg_id, core_id, smt_id); +} + +#endif /* TARGET_I386_TOPOLOGY_H */ diff --git a/target-m68k/cpu.c b/target-m68k/cpu.c index ce89674..5c78031 100644 --- a/target-m68k/cpu.c +++ b/target-m68k/cpu.c @@ -55,6 +55,22 @@ static void m68k_cpu_reset(CPUState *s) /* CPU models */ +static ObjectClass *m68k_cpu_class_by_name(const char *cpu_model) +{ + ObjectClass *oc; + + if (cpu_model == NULL) { + return NULL; + } + + oc = object_class_by_name(cpu_model); + if (oc != NULL && (object_class_dynamic_cast(oc, TYPE_M68K_CPU) == NULL || + object_class_is_abstract(oc))) { + return NULL; + } + return oc; +} + static void m5206_cpu_initfn(Object *obj) { M68kCPU *cpu = M68K_CPU(obj); @@ -134,6 +150,8 @@ static void m68k_cpu_class_init(ObjectClass *c, void *data) mcc->parent_reset = cc->reset; cc->reset = m68k_cpu_reset; + + cc->class_by_name = m68k_cpu_class_by_name; } static void register_cpu_type(const M68kCPUInfo *info) @@ -144,7 +162,7 @@ static void register_cpu_type(const M68kCPUInfo *info) .instance_init = info->instance_init, }; - type_register_static(&type_info); + type_register(&type_info); } static const TypeInfo m68k_cpu_type_info = { diff --git a/target-m68k/helper.c b/target-m68k/helper.c index 097fc78..f66e12b 100644 --- a/target-m68k/helper.c +++ b/target-m68k/helper.c @@ -97,12 +97,14 @@ CPUM68KState *cpu_m68k_init(const char *cpu_model) { M68kCPU *cpu; CPUM68KState *env; + ObjectClass *oc; static int inited; - if (object_class_by_name(cpu_model) == NULL) { + oc = cpu_class_by_name(TYPE_M68K_CPU, cpu_model); + if (oc == NULL) { return NULL; } - cpu = M68K_CPU(object_new(cpu_model)); + cpu = M68K_CPU(object_new(object_class_get_name(oc))); env = &cpu->env; if (!inited) { diff --git a/target-openrisc/cpu.c b/target-openrisc/cpu.c index 56544d8..54876d9 100644 --- a/target-openrisc/cpu.c +++ b/target-openrisc/cpu.c @@ -88,6 +88,23 @@ static void openrisc_cpu_initfn(Object *obj) } /* CPU models */ + +static ObjectClass *openrisc_cpu_class_by_name(const char *cpu_model) +{ + ObjectClass *oc; + + if (cpu_model == NULL) { + return NULL; + } + + oc = object_class_by_name(cpu_model); + if (oc != NULL && (!object_class_dynamic_cast(oc, TYPE_OPENRISC_CPU) || + object_class_is_abstract(oc))) { + return NULL; + } + return oc; +} + static void or1200_initfn(Object *obj) { OpenRISCCPU *cpu = OPENRISC_CPU(obj); @@ -120,6 +137,8 @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data) occ->parent_reset = cc->reset; cc->reset = openrisc_cpu_reset; + + cc->class_by_name = openrisc_cpu_class_by_name; } static void cpu_register(const OpenRISCCPUInfo *info) @@ -132,7 +151,7 @@ static void cpu_register(const OpenRISCCPUInfo *info) .class_size = sizeof(OpenRISCCPUClass), }; - type_register_static(&type_info); + type_register(&type_info); } static const TypeInfo openrisc_cpu_type_info = { @@ -158,11 +177,13 @@ static void openrisc_cpu_register_types(void) OpenRISCCPU *cpu_openrisc_init(const char *cpu_model) { OpenRISCCPU *cpu; + ObjectClass *oc; - if (!object_class_by_name(cpu_model)) { + oc = openrisc_cpu_class_by_name(cpu_model); + if (oc == NULL) { return NULL; } - cpu = OPENRISC_CPU(object_new(cpu_model)); + cpu = OPENRISC_CPU(object_new(object_class_get_name(oc))); cpu->env.cpu_model_str = cpu_model; openrisc_cpu_realize(OBJECT(cpu), NULL); @@ -170,11 +191,6 @@ OpenRISCCPU *cpu_openrisc_init(const char *cpu_model) return cpu; } -typedef struct OpenRISCCPUList { - fprintf_function cpu_fprintf; - FILE *file; -} OpenRISCCPUList; - /* Sort alphabetically by type name, except for "any". */ static gint openrisc_cpu_list_compare(gconstpointer a, gconstpointer b) { @@ -196,7 +212,7 @@ static gint openrisc_cpu_list_compare(gconstpointer a, gconstpointer b) static void openrisc_cpu_list_entry(gpointer data, gpointer user_data) { ObjectClass *oc = data; - OpenRISCCPUList *s = user_data; + CPUListState *s = user_data; (*s->cpu_fprintf)(s->file, " %s\n", object_class_get_name(oc)); @@ -204,7 +220,7 @@ static void openrisc_cpu_list_entry(gpointer data, gpointer user_data) void cpu_openrisc_list(FILE *f, fprintf_function cpu_fprintf) { - OpenRISCCPUList s = { + CPUListState s = { .file = f, .cpu_fprintf = cpu_fprintf, }; diff --git a/target-openrisc/exception_helper.c b/target-openrisc/exception_helper.c index dab4148..0c53b77 100644 --- a/target-openrisc/exception_helper.c +++ b/target-openrisc/exception_helper.c @@ -23,7 +23,7 @@ void HELPER(exception)(CPUOpenRISCState *env, uint32_t excp) { - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); raise_exception(cpu, excp); } diff --git a/target-openrisc/fpu_helper.c b/target-openrisc/fpu_helper.c index b184d5e..4615a36 100644 --- a/target-openrisc/fpu_helper.c +++ b/target-openrisc/fpu_helper.c @@ -68,7 +68,7 @@ static inline void update_fpcsr(OpenRISCCPU *cpu) uint64_t HELPER(itofd)(CPUOpenRISCState *env, uint64_t val) { uint64_t itofd; - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); set_float_exception_flags(0, &cpu->env.fp_status); itofd = int32_to_float64(val, &cpu->env.fp_status); @@ -80,7 +80,7 @@ uint64_t HELPER(itofd)(CPUOpenRISCState *env, uint64_t val) uint32_t HELPER(itofs)(CPUOpenRISCState *env, uint32_t val) { uint32_t itofs; - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); set_float_exception_flags(0, &cpu->env.fp_status); itofs = int32_to_float32(val, &cpu->env.fp_status); @@ -92,7 +92,7 @@ uint32_t HELPER(itofs)(CPUOpenRISCState *env, uint32_t val) uint64_t HELPER(ftoid)(CPUOpenRISCState *env, uint64_t val) { uint64_t ftoid; - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); set_float_exception_flags(0, &cpu->env.fp_status); ftoid = float32_to_int64(val, &cpu->env.fp_status); @@ -104,7 +104,7 @@ uint64_t HELPER(ftoid)(CPUOpenRISCState *env, uint64_t val) uint32_t HELPER(ftois)(CPUOpenRISCState *env, uint32_t val) { uint32_t ftois; - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); set_float_exception_flags(0, &cpu->env.fp_status); ftois = float32_to_int32(val, &cpu->env.fp_status); @@ -120,7 +120,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \ uint64_t fdt0, uint64_t fdt1) \ { \ uint64_t result; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ set_float_exception_flags(0, &cpu->env.fp_status); \ result = float64_ ## name(fdt0, fdt1, &cpu->env.fp_status); \ update_fpcsr(cpu); \ @@ -131,7 +131,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \ uint32_t fdt0, uint32_t fdt1) \ { \ uint32_t result; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ set_float_exception_flags(0, &cpu->env.fp_status); \ result = float32_ ## name(fdt0, fdt1, &cpu->env.fp_status); \ update_fpcsr(cpu); \ @@ -152,7 +152,7 @@ uint64_t helper_float_ ## name1 ## name2 ## _d(CPUOpenRISCState *env, \ { \ uint64_t result, temp, hi, lo; \ uint32_t val1, val2; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ hi = env->fpmaddhi; \ lo = env->fpmaddlo; \ set_float_exception_flags(0, &cpu->env.fp_status); \ @@ -174,7 +174,7 @@ uint32_t helper_float_ ## name1 ## name2 ## _s(CPUOpenRISCState *env, \ { \ uint64_t result, temp, hi, lo; \ uint32_t val1, val2; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ hi = cpu->env.fpmaddhi; \ lo = cpu->env.fpmaddlo; \ set_float_exception_flags(0, &cpu->env.fp_status); \ @@ -198,7 +198,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \ uint64_t fdt0, uint64_t fdt1) \ { \ int res; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ set_float_exception_flags(0, &cpu->env.fp_status); \ res = float64_ ## name(fdt0, fdt1, &cpu->env.fp_status); \ update_fpcsr(cpu); \ @@ -209,7 +209,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \ uint32_t fdt0, uint32_t fdt1)\ { \ int res; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ set_float_exception_flags(0, &cpu->env.fp_status); \ res = float32_ ## name(fdt0, fdt1, &cpu->env.fp_status); \ update_fpcsr(cpu); \ @@ -227,7 +227,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \ uint64_t fdt0, uint64_t fdt1) \ { \ int res; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ set_float_exception_flags(0, &cpu->env.fp_status); \ res = !float64_eq_quiet(fdt0, fdt1, &cpu->env.fp_status); \ update_fpcsr(cpu); \ @@ -238,7 +238,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \ uint32_t fdt0, uint32_t fdt1) \ { \ int res; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ set_float_exception_flags(0, &cpu->env.fp_status); \ res = !float32_eq_quiet(fdt0, fdt1, &cpu->env.fp_status); \ update_fpcsr(cpu); \ @@ -253,7 +253,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \ uint64_t fdt0, uint64_t fdt1) \ { \ int res; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ set_float_exception_flags(0, &cpu->env.fp_status); \ res = !float64_le(fdt0, fdt1, &cpu->env.fp_status); \ update_fpcsr(cpu); \ @@ -264,7 +264,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \ uint32_t fdt0, uint32_t fdt1) \ { \ int res; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ set_float_exception_flags(0, &cpu->env.fp_status); \ res = !float32_le(fdt0, fdt1, &cpu->env.fp_status); \ update_fpcsr(cpu); \ @@ -278,7 +278,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \ uint64_t fdt0, uint64_t fdt1) \ { \ int res; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ set_float_exception_flags(0, &cpu->env.fp_status); \ res = !float64_lt(fdt0, fdt1, &cpu->env.fp_status); \ update_fpcsr(cpu); \ @@ -289,7 +289,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \ uint32_t fdt0, uint32_t fdt1) \ { \ int res; \ - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \ + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \ set_float_exception_flags(0, &cpu->env.fp_status); \ res = !float32_lt(fdt0, fdt1, &cpu->env.fp_status); \ update_fpcsr(cpu); \ diff --git a/target-openrisc/int_helper.c b/target-openrisc/int_helper.c index 20f9837..16cb5ab 100644 --- a/target-openrisc/int_helper.c +++ b/target-openrisc/int_helper.c @@ -48,7 +48,7 @@ uint32_t HELPER(mul32)(CPUOpenRISCState *env, uint64_t result; uint32_t high, cy; - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); result = (uint64_t)ra * rb; /* regisiers in or32 is 32bit, so 32 is NOT a magic number. diff --git a/target-openrisc/interrupt_helper.c b/target-openrisc/interrupt_helper.c index 79f5afe..a176441 100644 --- a/target-openrisc/interrupt_helper.c +++ b/target-openrisc/interrupt_helper.c @@ -23,7 +23,7 @@ void HELPER(rfe)(CPUOpenRISCState *env) { - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); #ifndef CONFIG_USER_ONLY int need_flush_tlb = (cpu->env.sr & (SR_SM | SR_IME | SR_DME)) ^ (cpu->env.esr & (SR_SM | SR_IME | SR_DME)); diff --git a/target-openrisc/mmu.c b/target-openrisc/mmu.c index 8364652..d354e1f 100644 --- a/target-openrisc/mmu.c +++ b/target-openrisc/mmu.c @@ -187,7 +187,7 @@ int cpu_openrisc_handle_mmu_fault(CPUOpenRISCState *env, int ret = 0; hwaddr physical = 0; int prot = 0; - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); ret = cpu_openrisc_get_phys_addr(cpu, &physical, &prot, address, rw); @@ -209,7 +209,7 @@ int cpu_openrisc_handle_mmu_fault(CPUOpenRISCState *env, target_ulong address, int rw, int mmu_idx) { int ret = 0; - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); cpu_openrisc_raise_mmu_exception(cpu, address, rw, ret); ret = 1; @@ -224,7 +224,7 @@ hwaddr cpu_get_phys_page_debug(CPUOpenRISCState *env, { hwaddr phys_addr; int prot; - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); if (cpu_openrisc_get_phys_addr(cpu, &phys_addr, &prot, addr, 0)) { return -1; diff --git a/target-openrisc/sys_helper.c b/target-openrisc/sys_helper.c index f160dc3..3c5f45a 100644 --- a/target-openrisc/sys_helper.c +++ b/target-openrisc/sys_helper.c @@ -30,7 +30,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, int spr = (ra | offset); int idx; - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); switch (spr) { case TO_SPR(0, 0): /* VR */ @@ -177,7 +177,7 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, int spr = (ra | offset); int idx; - OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); + OpenRISCCPU *cpu = openrisc_env_get_cpu(env); switch (spr) { case TO_SPR(0, 0): /* VR */ diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 2f4f068..2c64c63 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -384,6 +384,11 @@ static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) #endif /* !defined (TARGET_PPC64) */ +unsigned long kvm_arch_vcpu_id(CPUState *cpu) +{ + return cpu->cpu_index; +} + int kvm_arch_init_vcpu(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 4f767c9..e143af5 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -10578,6 +10578,8 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data) pcc->parent_reset = cc->reset; cc->reset = ppc_cpu_reset; + + cc->class_by_name = ppc_cpu_class_by_name; } static const TypeInfo ppc_cpu_type_info = { diff --git a/target-s390x/Makefile.objs b/target-s390x/Makefile.objs index e728abf..3afb0b7 100644 --- a/target-s390x/Makefile.objs +++ b/target-s390x/Makefile.objs @@ -1,4 +1,4 @@ obj-y += translate.o helper.o cpu.o interrupt.o obj-y += int_helper.o fpu_helper.o cc_helper.o mem_helper.o misc_helper.o -obj-$(CONFIG_SOFTMMU) += machine.o +obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o obj-$(CONFIG_KVM) += kvm.o diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h index 1f2d942..9be4a47 100644 --- a/target-s390x/cpu.h +++ b/target-s390x/cpu.h @@ -50,6 +50,11 @@ #define MMU_USER_IDX 1 #define MAX_EXT_QUEUE 16 +#define MAX_IO_QUEUE 16 +#define MAX_MCHK_QUEUE 16 + +#define PSW_MCHK_MASK 0x0004000000000000 +#define PSW_IO_MASK 0x0200000000000000 typedef struct PSW { uint64_t mask; @@ -62,6 +67,17 @@ typedef struct ExtQueue { uint32_t param64; } ExtQueue; +typedef struct IOIntQueue { + uint16_t id; + uint16_t nr; + uint32_t parm; + uint32_t word; +} IOIntQueue; + +typedef struct MchkQueue { + uint16_t type; +} MchkQueue; + typedef struct CPUS390XState { uint64_t regs[16]; /* GP registers */ CPU_DoubleU fregs[16]; /* FP registers */ @@ -93,9 +109,17 @@ typedef struct CPUS390XState { uint64_t cregs[16]; /* control registers */ ExtQueue ext_queue[MAX_EXT_QUEUE]; - int pending_int; + IOIntQueue io_queue[MAX_IO_QUEUE][8]; + MchkQueue mchk_queue[MAX_MCHK_QUEUE]; + int pending_int; int ext_index; + int io_index[8]; + int mchk_index; + + uint64_t ckc; + uint64_t cputm; + uint32_t todpr; CPU_COMMON @@ -123,6 +147,9 @@ static inline void cpu_clone_regs(CPUS390XState *env, target_ulong newsp) } #endif +/* distinguish between 24 bit and 31 bit addressing */ +#define HIGH_ORDER_BIT 0x80000000 + /* Interrupt Codes */ /* Program Interrupts */ #define PGM_OPERATION 0x0001 @@ -300,8 +327,27 @@ int cpu_s390x_handle_mmu_fault (CPUS390XState *env, target_ulong address, int rw int mmu_idx); #define cpu_handle_mmu_fault cpu_s390x_handle_mmu_fault +#include "ioinst.h" #ifndef CONFIG_USER_ONLY +void *s390_cpu_physical_memory_map(CPUS390XState *env, hwaddr addr, hwaddr *len, + int is_write); +void s390_cpu_physical_memory_unmap(CPUS390XState *env, void *addr, hwaddr len, + int is_write); +static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb) +{ + hwaddr addr = 0; + uint8_t reg; + + reg = ipb >> 28; + if (reg > 0) { + addr = env->regs[reg]; + } + addr += (ipb >> 16) & 0xfff; + + return addr; +} + void s390x_tod_timer(void *opaque); void s390x_cpu_timer(void *opaque); @@ -351,6 +397,114 @@ static inline unsigned s390_del_running_cpu(CPUS390XState *env) void cpu_lock(void); void cpu_unlock(void); +typedef struct SubchDev SubchDev; + +#ifndef CONFIG_USER_ONLY +SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, + uint16_t schid); +bool css_subch_visible(SubchDev *sch); +void css_conditional_io_interrupt(SubchDev *sch); +int css_do_stsch(SubchDev *sch, SCHIB *schib); +bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid); +int css_do_msch(SubchDev *sch, SCHIB *schib); +int css_do_xsch(SubchDev *sch); +int css_do_csch(SubchDev *sch); +int css_do_hsch(SubchDev *sch); +int css_do_ssch(SubchDev *sch, ORB *orb); +int css_do_tsch(SubchDev *sch, IRB *irb); +int css_do_stcrw(CRW *crw); +int css_do_tpi(IOIntCode *int_code, int lowcore); +int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid, + int rfmt, void *buf); +void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo); +int css_enable_mcsse(void); +int css_enable_mss(void); +int css_do_rsch(SubchDev *sch); +int css_do_rchp(uint8_t cssid, uint8_t chpid); +bool css_present(uint8_t cssid); +#else +static inline SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, + uint16_t schid) +{ + return NULL; +} +static inline bool css_subch_visible(SubchDev *sch) +{ + return false; +} +static inline void css_conditional_io_interrupt(SubchDev *sch) +{ +} +static inline int css_do_stsch(SubchDev *sch, SCHIB *schib) +{ + return -ENODEV; +} +static inline bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid) +{ + return true; +} +static inline int css_do_msch(SubchDev *sch, SCHIB *schib) +{ + return -ENODEV; +} +static inline int css_do_xsch(SubchDev *sch) +{ + return -ENODEV; +} +static inline int css_do_csch(SubchDev *sch) +{ + return -ENODEV; +} +static inline int css_do_hsch(SubchDev *sch) +{ + return -ENODEV; +} +static inline int css_do_ssch(SubchDev *sch, ORB *orb) +{ + return -ENODEV; +} +static inline int css_do_tsch(SubchDev *sch, IRB *irb) +{ + return -ENODEV; +} +static inline int css_do_stcrw(CRW *crw) +{ + return 1; +} +static inline int css_do_tpi(IOIntCode *int_code, int lowcore) +{ + return 0; +} +static inline int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, + int rfmt, uint8_t l_chpid, void *buf) +{ + return 0; +} +static inline void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo) +{ +} +static inline int css_enable_mss(void) +{ + return -EINVAL; +} +static inline int css_enable_mcsse(void) +{ + return -EINVAL; +} +static inline int css_do_rsch(SubchDev *sch) +{ + return -ENODEV; +} +static inline int css_do_rchp(uint8_t cssid, uint8_t chpid) +{ + return -ENODEV; +} +static inline bool css_present(uint8_t cssid) +{ + return false; +} +#endif + static inline void cpu_set_tls(CPUS390XState *env, target_ulong newtls) { env->aregs[0] = newtls >> 32; @@ -370,10 +524,14 @@ void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf); #define EXCP_EXT 1 /* external interrupt */ #define EXCP_SVC 2 /* supervisor call (syscall) */ #define EXCP_PGM 3 /* program interruption */ +#define EXCP_IO 7 /* I/O interrupt */ +#define EXCP_MCHK 8 /* machine check */ #define INTERRUPT_EXT (1 << 0) #define INTERRUPT_TOD (1 << 1) #define INTERRUPT_CPUTIMER (1 << 2) +#define INTERRUPT_IO (1 << 3) +#define INTERRUPT_MCHK (1 << 4) /* Program Status Word. */ #define S390_PSWM_REGNUM 0 @@ -836,6 +994,45 @@ static inline void cpu_inject_ext(CPUS390XState *env, uint32_t code, uint32_t pa cpu_interrupt(env, CPU_INTERRUPT_HARD); } +static inline void cpu_inject_io(CPUS390XState *env, uint16_t subchannel_id, + uint16_t subchannel_number, + uint32_t io_int_parm, uint32_t io_int_word) +{ + int isc = ffs(io_int_word << 2) - 1; + + if (env->io_index[isc] == MAX_IO_QUEUE - 1) { + /* ugh - can't queue anymore. Let's drop. */ + return; + } + + env->io_index[isc]++; + assert(env->io_index[isc] < MAX_IO_QUEUE); + + env->io_queue[env->io_index[isc]][isc].id = subchannel_id; + env->io_queue[env->io_index[isc]][isc].nr = subchannel_number; + env->io_queue[env->io_index[isc]][isc].parm = io_int_parm; + env->io_queue[env->io_index[isc]][isc].word = io_int_word; + + env->pending_int |= INTERRUPT_IO; + cpu_interrupt(env, CPU_INTERRUPT_HARD); +} + +static inline void cpu_inject_crw_mchk(CPUS390XState *env) +{ + if (env->mchk_index == MAX_MCHK_QUEUE - 1) { + /* ugh - can't queue anymore. Let's drop. */ + return; + } + + env->mchk_index++; + assert(env->mchk_index < MAX_MCHK_QUEUE); + + env->mchk_queue[env->mchk_index].type = 1; + + env->pending_int |= INTERRUPT_MCHK; + cpu_interrupt(env, CPU_INTERRUPT_HARD); +} + static inline bool cpu_has_work(CPUState *cpu) { CPUS390XState *env = &S390_CPU(cpu)->env; @@ -859,4 +1056,52 @@ void program_interrupt(CPUS390XState *env, uint32_t code, int ilen); void QEMU_NORETURN runtime_exception(CPUS390XState *env, int excp, uintptr_t retaddr); +#include <sysemu/kvm.h> + +#ifdef CONFIG_KVM +void kvm_s390_io_interrupt(S390CPU *cpu, uint16_t subchannel_id, + uint16_t subchannel_nr, uint32_t io_int_parm, + uint32_t io_int_word); +void kvm_s390_crw_mchk(S390CPU *cpu); +void kvm_s390_enable_css_support(S390CPU *cpu); +#else +static inline void kvm_s390_io_interrupt(S390CPU *cpu, + uint16_t subchannel_id, + uint16_t subchannel_nr, + uint32_t io_int_parm, + uint32_t io_int_word) +{ +} +static inline void kvm_s390_crw_mchk(S390CPU *cpu) +{ +} +static inline void kvm_s390_enable_css_support(S390CPU *cpu) +{ +} +#endif + +static inline void s390_io_interrupt(S390CPU *cpu, + uint16_t subchannel_id, + uint16_t subchannel_nr, + uint32_t io_int_parm, + uint32_t io_int_word) +{ + if (kvm_enabled()) { + kvm_s390_io_interrupt(cpu, subchannel_id, subchannel_nr, io_int_parm, + io_int_word); + } else { + cpu_inject_io(&cpu->env, subchannel_id, subchannel_nr, io_int_parm, + io_int_word); + } +} + +static inline void s390_crw_mchk(S390CPU *cpu) +{ + if (kvm_enabled()) { + kvm_s390_crw_mchk(cpu); + } else { + cpu_inject_crw_mchk(&cpu->env); + } +} + #endif diff --git a/target-s390x/helper.c b/target-s390x/helper.c index 9a132e6..857c897 100644 --- a/target-s390x/helper.c +++ b/target-s390x/helper.c @@ -471,13 +471,56 @@ static uint64_t get_psw_mask(CPUS390XState *env) return r; } +static LowCore *cpu_map_lowcore(CPUS390XState *env) +{ + LowCore *lowcore; + hwaddr len = sizeof(LowCore); + + lowcore = cpu_physical_memory_map(env->psa, &len, 1); + + if (len < sizeof(LowCore)) { + cpu_abort(env, "Could not map lowcore\n"); + } + + return lowcore; +} + +static void cpu_unmap_lowcore(LowCore *lowcore) +{ + cpu_physical_memory_unmap(lowcore, sizeof(LowCore), 1, sizeof(LowCore)); +} + +void *s390_cpu_physical_memory_map(CPUS390XState *env, hwaddr addr, hwaddr *len, + int is_write) +{ + hwaddr start = addr; + + /* Mind the prefix area. */ + if (addr < 8192) { + /* Map the lowcore. */ + start += env->psa; + *len = MIN(*len, 8192 - addr); + } else if ((addr >= env->psa) && (addr < env->psa + 8192)) { + /* Map the 0 page. */ + start -= env->psa; + *len = MIN(*len, 8192 - start); + } + + return cpu_physical_memory_map(start, len, is_write); +} + +void s390_cpu_physical_memory_unmap(CPUS390XState *env, void *addr, hwaddr len, + int is_write) +{ + cpu_physical_memory_unmap(addr, len, is_write, len); +} + static void do_svc_interrupt(CPUS390XState *env) { uint64_t mask, addr; LowCore *lowcore; - hwaddr len = TARGET_PAGE_SIZE; - lowcore = cpu_physical_memory_map(env->psa, &len, 1); + lowcore = cpu_map_lowcore(env); lowcore->svc_code = cpu_to_be16(env->int_svc_code); lowcore->svc_ilen = cpu_to_be16(env->int_svc_ilen); @@ -486,7 +529,7 @@ static void do_svc_interrupt(CPUS390XState *env) mask = be64_to_cpu(lowcore->svc_new_psw.mask); addr = be64_to_cpu(lowcore->svc_new_psw.addr); - cpu_physical_memory_unmap(lowcore, len, 1, len); + cpu_unmap_lowcore(lowcore); load_psw(env, mask, addr); } @@ -495,7 +538,6 @@ static void do_program_interrupt(CPUS390XState *env) { uint64_t mask, addr; LowCore *lowcore; - hwaddr len = TARGET_PAGE_SIZE; int ilen = env->int_pgm_ilen; switch (ilen) { @@ -513,7 +555,7 @@ static void do_program_interrupt(CPUS390XState *env) qemu_log_mask(CPU_LOG_INT, "%s: code=0x%x ilen=%d\n", __func__, env->int_pgm_code, ilen); - lowcore = cpu_physical_memory_map(env->psa, &len, 1); + lowcore = cpu_map_lowcore(env); lowcore->pgm_ilen = cpu_to_be16(ilen); lowcore->pgm_code = cpu_to_be16(env->int_pgm_code); @@ -522,7 +564,7 @@ static void do_program_interrupt(CPUS390XState *env) mask = be64_to_cpu(lowcore->program_new_psw.mask); addr = be64_to_cpu(lowcore->program_new_psw.addr); - cpu_physical_memory_unmap(lowcore, len, 1, len); + cpu_unmap_lowcore(lowcore); DPRINTF("%s: %x %x %" PRIx64 " %" PRIx64 "\n", __func__, env->int_pgm_code, ilen, env->psw.mask, @@ -537,7 +579,6 @@ static void do_ext_interrupt(CPUS390XState *env) { uint64_t mask, addr; LowCore *lowcore; - hwaddr len = TARGET_PAGE_SIZE; ExtQueue *q; if (!(env->psw.mask & PSW_MASK_EXT)) { @@ -549,7 +590,7 @@ static void do_ext_interrupt(CPUS390XState *env) } q = &env->ext_queue[env->ext_index]; - lowcore = cpu_physical_memory_map(env->psa, &len, 1); + lowcore = cpu_map_lowcore(env); lowcore->ext_int_code = cpu_to_be16(q->code); lowcore->ext_params = cpu_to_be32(q->param); @@ -560,7 +601,7 @@ static void do_ext_interrupt(CPUS390XState *env) mask = be64_to_cpu(lowcore->external_new_psw.mask); addr = be64_to_cpu(lowcore->external_new_psw.addr); - cpu_physical_memory_unmap(lowcore, len, 1, len); + cpu_unmap_lowcore(lowcore); env->ext_index--; if (env->ext_index == -1) { @@ -573,12 +614,140 @@ static void do_ext_interrupt(CPUS390XState *env) load_psw(env, mask, addr); } +static void do_io_interrupt(CPUS390XState *env) +{ + uint64_t mask, addr; + LowCore *lowcore; + IOIntQueue *q; + uint8_t isc; + int disable = 1; + int found = 0; + + if (!(env->psw.mask & PSW_MASK_IO)) { + cpu_abort(env, "I/O int w/o I/O mask\n"); + } + + for (isc = 0; isc < ARRAY_SIZE(env->io_index); isc++) { + if (env->io_index[isc] < 0) { + continue; + } + if (env->io_index[isc] > MAX_IO_QUEUE) { + cpu_abort(env, "I/O queue overrun for isc %d: %d\n", + isc, env->io_index[isc]); + } + + q = &env->io_queue[env->io_index[isc]][isc]; + if (!(env->cregs[6] & q->word)) { + disable = 0; + continue; + } + found = 1; + lowcore = cpu_map_lowcore(env); + + lowcore->subchannel_id = cpu_to_be16(q->id); + lowcore->subchannel_nr = cpu_to_be16(q->nr); + lowcore->io_int_parm = cpu_to_be32(q->parm); + lowcore->io_int_word = cpu_to_be32(q->word); + lowcore->io_old_psw.mask = cpu_to_be64(get_psw_mask(env)); + lowcore->io_old_psw.addr = cpu_to_be64(env->psw.addr); + mask = be64_to_cpu(lowcore->io_new_psw.mask); + addr = be64_to_cpu(lowcore->io_new_psw.addr); + + cpu_unmap_lowcore(lowcore); + + env->io_index[isc]--; + if (env->io_index >= 0) { + disable = 0; + } + break; + } + + if (disable) { + env->pending_int &= ~INTERRUPT_IO; + } + + if (found) { + DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__, + env->psw.mask, env->psw.addr); + load_psw(env, mask, addr); + } +} + +static void do_mchk_interrupt(CPUS390XState *env) +{ + uint64_t mask, addr; + LowCore *lowcore; + MchkQueue *q; + int i; + + if (!(env->psw.mask & PSW_MASK_MCHECK)) { + cpu_abort(env, "Machine check w/o mchk mask\n"); + } + + if (env->mchk_index < 0 || env->mchk_index > MAX_MCHK_QUEUE) { + cpu_abort(env, "Mchk queue overrun: %d\n", env->mchk_index); + } + + q = &env->mchk_queue[env->mchk_index]; + + if (q->type != 1) { + /* Don't know how to handle this... */ + cpu_abort(env, "Unknown machine check type %d\n", q->type); + } + if (!(env->cregs[14] & (1 << 28))) { + /* CRW machine checks disabled */ + return; + } + + lowcore = cpu_map_lowcore(env); + + for (i = 0; i < 16; i++) { + lowcore->floating_pt_save_area[i] = cpu_to_be64(env->fregs[i].ll); + lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]); + lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]); + lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]); + } + lowcore->prefixreg_save_area = cpu_to_be32(env->psa); + lowcore->fpt_creg_save_area = cpu_to_be32(env->fpc); + lowcore->tod_progreg_save_area = cpu_to_be32(env->todpr); + lowcore->cpu_timer_save_area[0] = cpu_to_be32(env->cputm >> 32); + lowcore->cpu_timer_save_area[1] = cpu_to_be32((uint32_t)env->cputm); + lowcore->clock_comp_save_area[0] = cpu_to_be32(env->ckc >> 32); + lowcore->clock_comp_save_area[1] = cpu_to_be32((uint32_t)env->ckc); + + lowcore->mcck_interruption_code[0] = cpu_to_be32(0x00400f1d); + lowcore->mcck_interruption_code[1] = cpu_to_be32(0x40330000); + lowcore->mcck_old_psw.mask = cpu_to_be64(get_psw_mask(env)); + lowcore->mcck_old_psw.addr = cpu_to_be64(env->psw.addr); + mask = be64_to_cpu(lowcore->mcck_new_psw.mask); + addr = be64_to_cpu(lowcore->mcck_new_psw.addr); + + cpu_unmap_lowcore(lowcore); + + env->mchk_index--; + if (env->mchk_index == -1) { + env->pending_int &= ~INTERRUPT_MCHK; + } + + DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__, + env->psw.mask, env->psw.addr); + + load_psw(env, mask, addr); +} + void do_interrupt(CPUS390XState *env) { qemu_log_mask(CPU_LOG_INT, "%s: %d at pc=%" PRIx64 "\n", __func__, env->exception_index, env->psw.addr); s390_add_running_cpu(env); + /* handle machine checks */ + if ((env->psw.mask & PSW_MASK_MCHECK) && + (env->exception_index == -1)) { + if (env->pending_int & INTERRUPT_MCHK) { + env->exception_index = EXCP_MCHK; + } + } /* handle external interrupts */ if ((env->psw.mask & PSW_MASK_EXT) && env->exception_index == -1) { @@ -597,6 +766,13 @@ void do_interrupt(CPUS390XState *env) env->pending_int &= ~INTERRUPT_TOD; } } + /* handle I/O interrupts */ + if ((env->psw.mask & PSW_MASK_IO) && + (env->exception_index == -1)) { + if (env->pending_int & INTERRUPT_IO) { + env->exception_index = EXCP_IO; + } + } switch (env->exception_index) { case EXCP_PGM: @@ -608,6 +784,12 @@ void do_interrupt(CPUS390XState *env) case EXCP_EXT: do_ext_interrupt(env); break; + case EXCP_IO: + do_io_interrupt(env); + break; + case EXCP_MCHK: + do_mchk_interrupt(env); + break; } env->exception_index = -1; diff --git a/target-s390x/ioinst.c b/target-s390x/ioinst.c new file mode 100644 index 0000000..e3531f3 --- /dev/null +++ b/target-s390x/ioinst.c @@ -0,0 +1,761 @@ +/* + * I/O instructions for S/390 + * + * Copyright 2012 IBM Corp. + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include <sys/types.h> + +#include "cpu.h" +#include "ioinst.h" +#include "trace.h" + +int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid, + int *schid) +{ + if (!IOINST_SCHID_ONE(value)) { + return -EINVAL; + } + if (!IOINST_SCHID_M(value)) { + if (IOINST_SCHID_CSSID(value)) { + return -EINVAL; + } + *cssid = 0; + *m = 0; + } else { + *cssid = IOINST_SCHID_CSSID(value); + *m = 1; + } + *ssid = IOINST_SCHID_SSID(value); + *schid = IOINST_SCHID_NR(value); + return 0; +} + +int ioinst_handle_xsch(CPUS390XState *env, uint64_t reg1) +{ + int cssid, ssid, schid, m; + SubchDev *sch; + int ret = -ENODEV; + int cc; + + if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + trace_ioinst_sch_id("xsch", cssid, ssid, schid); + sch = css_find_subch(m, cssid, ssid, schid); + if (sch && css_subch_visible(sch)) { + ret = css_do_xsch(sch); + } + switch (ret) { + case -ENODEV: + cc = 3; + break; + case -EBUSY: + cc = 2; + break; + case 0: + cc = 0; + break; + default: + cc = 1; + break; + } + + return cc; +} + +int ioinst_handle_csch(CPUS390XState *env, uint64_t reg1) +{ + int cssid, ssid, schid, m; + SubchDev *sch; + int ret = -ENODEV; + int cc; + + if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + trace_ioinst_sch_id("csch", cssid, ssid, schid); + sch = css_find_subch(m, cssid, ssid, schid); + if (sch && css_subch_visible(sch)) { + ret = css_do_csch(sch); + } + if (ret == -ENODEV) { + cc = 3; + } else { + cc = 0; + } + return cc; +} + +int ioinst_handle_hsch(CPUS390XState *env, uint64_t reg1) +{ + int cssid, ssid, schid, m; + SubchDev *sch; + int ret = -ENODEV; + int cc; + + if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + trace_ioinst_sch_id("hsch", cssid, ssid, schid); + sch = css_find_subch(m, cssid, ssid, schid); + if (sch && css_subch_visible(sch)) { + ret = css_do_hsch(sch); + } + switch (ret) { + case -ENODEV: + cc = 3; + break; + case -EBUSY: + cc = 2; + break; + case 0: + cc = 0; + break; + default: + cc = 1; + break; + } + + return cc; +} + +static int ioinst_schib_valid(SCHIB *schib) +{ + if ((schib->pmcw.flags & PMCW_FLAGS_MASK_INVALID) || + (schib->pmcw.chars & PMCW_CHARS_MASK_INVALID)) { + return 0; + } + /* Disallow extended measurements for now. */ + if (schib->pmcw.chars & PMCW_CHARS_MASK_XMWME) { + return 0; + } + return 1; +} + +int ioinst_handle_msch(CPUS390XState *env, uint64_t reg1, uint32_t ipb) +{ + int cssid, ssid, schid, m; + SubchDev *sch; + SCHIB *schib; + uint64_t addr; + int ret = -ENODEV; + int cc; + hwaddr len = sizeof(*schib); + + if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + trace_ioinst_sch_id("msch", cssid, ssid, schid); + addr = decode_basedisp_s(env, ipb); + schib = s390_cpu_physical_memory_map(env, addr, &len, 0); + if (!schib || len != sizeof(*schib)) { + program_interrupt(env, PGM_SPECIFICATION, 2); + cc = -EIO; + goto out; + } + if (!ioinst_schib_valid(schib)) { + program_interrupt(env, PGM_OPERAND, 2); + cc = -EIO; + goto out; + } + sch = css_find_subch(m, cssid, ssid, schid); + if (sch && css_subch_visible(sch)) { + ret = css_do_msch(sch, schib); + } + switch (ret) { + case -ENODEV: + cc = 3; + break; + case -EBUSY: + cc = 2; + break; + case 0: + cc = 0; + break; + default: + cc = 1; + break; + } +out: + s390_cpu_physical_memory_unmap(env, schib, len, 0); + return cc; +} + +static void copy_orb_from_guest(ORB *dest, const ORB *src) +{ + dest->intparm = be32_to_cpu(src->intparm); + dest->ctrl0 = be16_to_cpu(src->ctrl0); + dest->lpm = src->lpm; + dest->ctrl1 = src->ctrl1; + dest->cpa = be32_to_cpu(src->cpa); +} + +static int ioinst_orb_valid(ORB *orb) +{ + if ((orb->ctrl0 & ORB_CTRL0_MASK_INVALID) || + (orb->ctrl1 & ORB_CTRL1_MASK_INVALID)) { + return 0; + } + if ((orb->cpa & HIGH_ORDER_BIT) != 0) { + return 0; + } + return 1; +} + +int ioinst_handle_ssch(CPUS390XState *env, uint64_t reg1, uint32_t ipb) +{ + int cssid, ssid, schid, m; + SubchDev *sch; + ORB *orig_orb, orb; + uint64_t addr; + int ret = -ENODEV; + int cc; + hwaddr len = sizeof(*orig_orb); + + if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + trace_ioinst_sch_id("ssch", cssid, ssid, schid); + addr = decode_basedisp_s(env, ipb); + orig_orb = s390_cpu_physical_memory_map(env, addr, &len, 0); + if (!orig_orb || len != sizeof(*orig_orb)) { + program_interrupt(env, PGM_SPECIFICATION, 2); + cc = -EIO; + goto out; + } + copy_orb_from_guest(&orb, orig_orb); + if (!ioinst_orb_valid(&orb)) { + program_interrupt(env, PGM_OPERAND, 2); + cc = -EIO; + goto out; + } + sch = css_find_subch(m, cssid, ssid, schid); + if (sch && css_subch_visible(sch)) { + ret = css_do_ssch(sch, &orb); + } + switch (ret) { + case -ENODEV: + cc = 3; + break; + case -EBUSY: + cc = 2; + break; + case 0: + cc = 0; + break; + default: + cc = 1; + break; + } + +out: + s390_cpu_physical_memory_unmap(env, orig_orb, len, 0); + return cc; +} + +int ioinst_handle_stcrw(CPUS390XState *env, uint32_t ipb) +{ + CRW *crw; + uint64_t addr; + int cc; + hwaddr len = sizeof(*crw); + + addr = decode_basedisp_s(env, ipb); + crw = s390_cpu_physical_memory_map(env, addr, &len, 1); + if (!crw || len != sizeof(*crw)) { + program_interrupt(env, PGM_SPECIFICATION, 2); + cc = -EIO; + goto out; + } + cc = css_do_stcrw(crw); + /* 0 - crw stored, 1 - zeroes stored */ +out: + s390_cpu_physical_memory_unmap(env, crw, len, 1); + return cc; +} + +int ioinst_handle_stsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb) +{ + int cssid, ssid, schid, m; + SubchDev *sch; + uint64_t addr; + int cc; + SCHIB *schib; + hwaddr len = sizeof(*schib); + + if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + trace_ioinst_sch_id("stsch", cssid, ssid, schid); + addr = decode_basedisp_s(env, ipb); + schib = s390_cpu_physical_memory_map(env, addr, &len, 1); + if (!schib || len != sizeof(*schib)) { + program_interrupt(env, PGM_SPECIFICATION, 2); + cc = -EIO; + goto out; + } + sch = css_find_subch(m, cssid, ssid, schid); + if (sch) { + if (css_subch_visible(sch)) { + css_do_stsch(sch, schib); + cc = 0; + } else { + /* Indicate no more subchannels in this css/ss */ + cc = 3; + } + } else { + if (css_schid_final(cssid, ssid, schid)) { + cc = 3; /* No more subchannels in this css/ss */ + } else { + /* Store an empty schib. */ + memset(schib, 0, sizeof(*schib)); + cc = 0; + } + } +out: + s390_cpu_physical_memory_unmap(env, schib, len, 1); + return cc; +} + +int ioinst_handle_tsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb) +{ + int cssid, ssid, schid, m; + SubchDev *sch; + IRB *irb; + uint64_t addr; + int ret = -ENODEV; + int cc; + hwaddr len = sizeof(*irb); + + if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + trace_ioinst_sch_id("tsch", cssid, ssid, schid); + addr = decode_basedisp_s(env, ipb); + irb = s390_cpu_physical_memory_map(env, addr, &len, 1); + if (!irb || len != sizeof(*irb)) { + program_interrupt(env, PGM_SPECIFICATION, 2); + cc = -EIO; + goto out; + } + sch = css_find_subch(m, cssid, ssid, schid); + if (sch && css_subch_visible(sch)) { + ret = css_do_tsch(sch, irb); + /* 0 - status pending, 1 - not status pending */ + cc = ret; + } else { + cc = 3; + } +out: + s390_cpu_physical_memory_unmap(env, irb, sizeof(*irb), 1); + return cc; +} + +typedef struct ChscReq { + uint16_t len; + uint16_t command; + uint32_t param0; + uint32_t param1; + uint32_t param2; +} QEMU_PACKED ChscReq; + +typedef struct ChscResp { + uint16_t len; + uint16_t code; + uint32_t param; + char data[0]; +} QEMU_PACKED ChscResp; + +#define CHSC_MIN_RESP_LEN 0x0008 + +#define CHSC_SCPD 0x0002 +#define CHSC_SCSC 0x0010 +#define CHSC_SDA 0x0031 + +#define CHSC_SCPD_0_M 0x20000000 +#define CHSC_SCPD_0_C 0x10000000 +#define CHSC_SCPD_0_FMT 0x0f000000 +#define CHSC_SCPD_0_CSSID 0x00ff0000 +#define CHSC_SCPD_0_RFMT 0x00000f00 +#define CHSC_SCPD_0_RES 0xc000f000 +#define CHSC_SCPD_1_RES 0xffffff00 +#define CHSC_SCPD_01_CHPID 0x000000ff +static void ioinst_handle_chsc_scpd(ChscReq *req, ChscResp *res) +{ + uint16_t len = be16_to_cpu(req->len); + uint32_t param0 = be32_to_cpu(req->param0); + uint32_t param1 = be32_to_cpu(req->param1); + uint16_t resp_code; + int rfmt; + uint16_t cssid; + uint8_t f_chpid, l_chpid; + int desc_size; + int m; + + rfmt = (param0 & CHSC_SCPD_0_RFMT) >> 8; + if ((rfmt == 0) || (rfmt == 1)) { + rfmt = !!(param0 & CHSC_SCPD_0_C); + } + if ((len != 0x0010) || (param0 & CHSC_SCPD_0_RES) || + (param1 & CHSC_SCPD_1_RES) || req->param2) { + resp_code = 0x0003; + goto out_err; + } + if (param0 & CHSC_SCPD_0_FMT) { + resp_code = 0x0007; + goto out_err; + } + cssid = (param0 & CHSC_SCPD_0_CSSID) >> 16; + m = param0 & CHSC_SCPD_0_M; + if (cssid != 0) { + if (!m || !css_present(cssid)) { + resp_code = 0x0008; + goto out_err; + } + } + f_chpid = param0 & CHSC_SCPD_01_CHPID; + l_chpid = param1 & CHSC_SCPD_01_CHPID; + if (l_chpid < f_chpid) { + resp_code = 0x0003; + goto out_err; + } + /* css_collect_chp_desc() is endian-aware */ + desc_size = css_collect_chp_desc(m, cssid, f_chpid, l_chpid, rfmt, + &res->data); + res->code = cpu_to_be16(0x0001); + res->len = cpu_to_be16(8 + desc_size); + res->param = cpu_to_be32(rfmt); + return; + + out_err: + res->code = cpu_to_be16(resp_code); + res->len = cpu_to_be16(CHSC_MIN_RESP_LEN); + res->param = cpu_to_be32(rfmt); +} + +#define CHSC_SCSC_0_M 0x20000000 +#define CHSC_SCSC_0_FMT 0x000f0000 +#define CHSC_SCSC_0_CSSID 0x0000ff00 +#define CHSC_SCSC_0_RES 0xdff000ff +static void ioinst_handle_chsc_scsc(ChscReq *req, ChscResp *res) +{ + uint16_t len = be16_to_cpu(req->len); + uint32_t param0 = be32_to_cpu(req->param0); + uint8_t cssid; + uint16_t resp_code; + uint32_t general_chars[510]; + uint32_t chsc_chars[508]; + + if (len != 0x0010) { + resp_code = 0x0003; + goto out_err; + } + + if (param0 & CHSC_SCSC_0_FMT) { + resp_code = 0x0007; + goto out_err; + } + cssid = (param0 & CHSC_SCSC_0_CSSID) >> 8; + if (cssid != 0) { + if (!(param0 & CHSC_SCSC_0_M) || !css_present(cssid)) { + resp_code = 0x0008; + goto out_err; + } + } + if ((param0 & CHSC_SCSC_0_RES) || req->param1 || req->param2) { + resp_code = 0x0003; + goto out_err; + } + res->code = cpu_to_be16(0x0001); + res->len = cpu_to_be16(4080); + res->param = 0; + + memset(general_chars, 0, sizeof(general_chars)); + memset(chsc_chars, 0, sizeof(chsc_chars)); + + general_chars[0] = cpu_to_be32(0x03000000); + general_chars[1] = cpu_to_be32(0x00059000); + + chsc_chars[0] = cpu_to_be32(0x40000000); + chsc_chars[3] = cpu_to_be32(0x00040000); + + memcpy(res->data, general_chars, sizeof(general_chars)); + memcpy(res->data + sizeof(general_chars), chsc_chars, sizeof(chsc_chars)); + return; + + out_err: + res->code = cpu_to_be16(resp_code); + res->len = cpu_to_be16(CHSC_MIN_RESP_LEN); + res->param = 0; +} + +#define CHSC_SDA_0_FMT 0x0f000000 +#define CHSC_SDA_0_OC 0x0000ffff +#define CHSC_SDA_0_RES 0xf0ff0000 +#define CHSC_SDA_OC_MCSSE 0x0 +#define CHSC_SDA_OC_MSS 0x2 +static void ioinst_handle_chsc_sda(ChscReq *req, ChscResp *res) +{ + uint16_t resp_code = 0x0001; + uint16_t len = be16_to_cpu(req->len); + uint32_t param0 = be32_to_cpu(req->param0); + uint16_t oc; + int ret; + + if ((len != 0x0400) || (param0 & CHSC_SDA_0_RES)) { + resp_code = 0x0003; + goto out; + } + + if (param0 & CHSC_SDA_0_FMT) { + resp_code = 0x0007; + goto out; + } + + oc = param0 & CHSC_SDA_0_OC; + switch (oc) { + case CHSC_SDA_OC_MCSSE: + ret = css_enable_mcsse(); + if (ret == -EINVAL) { + resp_code = 0x0101; + goto out; + } + break; + case CHSC_SDA_OC_MSS: + ret = css_enable_mss(); + if (ret == -EINVAL) { + resp_code = 0x0101; + goto out; + } + break; + default: + resp_code = 0x0003; + goto out; + } + +out: + res->code = cpu_to_be16(resp_code); + res->len = cpu_to_be16(CHSC_MIN_RESP_LEN); + res->param = 0; +} + +static void ioinst_handle_chsc_unimplemented(ChscResp *res) +{ + res->len = cpu_to_be16(CHSC_MIN_RESP_LEN); + res->code = cpu_to_be16(0x0004); + res->param = 0; +} + +int ioinst_handle_chsc(CPUS390XState *env, uint32_t ipb) +{ + ChscReq *req; + ChscResp *res; + uint64_t addr; + int reg; + uint16_t len; + uint16_t command; + hwaddr map_size = TARGET_PAGE_SIZE; + int ret = 0; + + trace_ioinst("chsc"); + reg = (ipb >> 20) & 0x00f; + addr = env->regs[reg]; + /* Page boundary? */ + if (addr & 0xfff) { + program_interrupt(env, PGM_SPECIFICATION, 2); + return -EIO; + } + req = s390_cpu_physical_memory_map(env, addr, &map_size, 1); + if (!req || map_size != TARGET_PAGE_SIZE) { + program_interrupt(env, PGM_SPECIFICATION, 2); + ret = -EIO; + goto out; + } + len = be16_to_cpu(req->len); + /* Length field valid? */ + if ((len < 16) || (len > 4088) || (len & 7)) { + program_interrupt(env, PGM_OPERAND, 2); + ret = -EIO; + goto out; + } + memset((char *)req + len, 0, TARGET_PAGE_SIZE - len); + res = (void *)((char *)req + len); + command = be16_to_cpu(req->command); + trace_ioinst_chsc_cmd(command, len); + switch (command) { + case CHSC_SCSC: + ioinst_handle_chsc_scsc(req, res); + break; + case CHSC_SCPD: + ioinst_handle_chsc_scpd(req, res); + break; + case CHSC_SDA: + ioinst_handle_chsc_sda(req, res); + break; + default: + ioinst_handle_chsc_unimplemented(res); + break; + } + +out: + s390_cpu_physical_memory_unmap(env, req, map_size, 1); + return ret; +} + +int ioinst_handle_tpi(CPUS390XState *env, uint32_t ipb) +{ + uint64_t addr; + int lowcore; + IOIntCode *int_code; + hwaddr len, orig_len; + int ret; + + trace_ioinst("tpi"); + addr = decode_basedisp_s(env, ipb); + lowcore = addr ? 0 : 1; + len = lowcore ? 8 /* two words */ : 12 /* three words */; + orig_len = len; + int_code = s390_cpu_physical_memory_map(env, addr, &len, 1); + if (!int_code || (len != orig_len)) { + program_interrupt(env, PGM_SPECIFICATION, 2); + ret = -EIO; + goto out; + } + ret = css_do_tpi(int_code, lowcore); +out: + s390_cpu_physical_memory_unmap(env, int_code, len, 1); + return ret; +} + +#define SCHM_REG1_RES(_reg) (_reg & 0x000000000ffffffc) +#define SCHM_REG1_MBK(_reg) ((_reg & 0x00000000f0000000) >> 28) +#define SCHM_REG1_UPD(_reg) ((_reg & 0x0000000000000002) >> 1) +#define SCHM_REG1_DCT(_reg) (_reg & 0x0000000000000001) + +int ioinst_handle_schm(CPUS390XState *env, uint64_t reg1, uint64_t reg2, + uint32_t ipb) +{ + uint8_t mbk; + int update; + int dct; + + trace_ioinst("schm"); + + if (SCHM_REG1_RES(reg1)) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + + mbk = SCHM_REG1_MBK(reg1); + update = SCHM_REG1_UPD(reg1); + dct = SCHM_REG1_DCT(reg1); + + if (update && (reg2 & 0x0000000000000fff)) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + + css_do_schm(mbk, update, dct, update ? reg2 : 0); + + return 0; +} + +int ioinst_handle_rsch(CPUS390XState *env, uint64_t reg1) +{ + int cssid, ssid, schid, m; + SubchDev *sch; + int ret = -ENODEV; + int cc; + + if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + trace_ioinst_sch_id("rsch", cssid, ssid, schid); + sch = css_find_subch(m, cssid, ssid, schid); + if (sch && css_subch_visible(sch)) { + ret = css_do_rsch(sch); + } + switch (ret) { + case -ENODEV: + cc = 3; + break; + case -EINVAL: + cc = 2; + break; + case 0: + cc = 0; + break; + default: + cc = 1; + break; + } + + return cc; + +} + +#define RCHP_REG1_RES(_reg) (_reg & 0x00000000ff00ff00) +#define RCHP_REG1_CSSID(_reg) ((_reg & 0x0000000000ff0000) >> 16) +#define RCHP_REG1_CHPID(_reg) (_reg & 0x00000000000000ff) +int ioinst_handle_rchp(CPUS390XState *env, uint64_t reg1) +{ + int cc; + uint8_t cssid; + uint8_t chpid; + int ret; + + if (RCHP_REG1_RES(reg1)) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + + cssid = RCHP_REG1_CSSID(reg1); + chpid = RCHP_REG1_CHPID(reg1); + + trace_ioinst_chp_id("rchp", cssid, chpid); + + ret = css_do_rchp(cssid, chpid); + + switch (ret) { + case -ENODEV: + cc = 3; + break; + case -EBUSY: + cc = 2; + break; + case 0: + cc = 0; + break; + default: + /* Invalid channel subsystem. */ + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + + return cc; +} + +#define SAL_REG1_INVALID(_reg) (_reg & 0x0000000080000000) +int ioinst_handle_sal(CPUS390XState *env, uint64_t reg1) +{ + /* We do not provide address limit checking, so let's suppress it. */ + if (SAL_REG1_INVALID(reg1) || reg1 & 0x000000000000ffff) { + program_interrupt(env, PGM_OPERAND, 2); + return -EIO; + } + return 0; +} diff --git a/target-s390x/ioinst.h b/target-s390x/ioinst.h new file mode 100644 index 0000000..d5a43f4 --- /dev/null +++ b/target-s390x/ioinst.h @@ -0,0 +1,230 @@ +/* + * S/390 channel I/O instructions + * + * Copyright 2012 IBM Corp. + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. +*/ + +#ifndef IOINST_S390X_H +#define IOINST_S390X_H +/* + * Channel I/O related definitions, as defined in the Principles + * Of Operation (and taken from the Linux implementation). + */ + +/* subchannel status word (command mode only) */ +typedef struct SCSW { + uint16_t flags; + uint16_t ctrl; + uint32_t cpa; + uint8_t dstat; + uint8_t cstat; + uint16_t count; +} QEMU_PACKED SCSW; + +#define SCSW_FLAGS_MASK_KEY 0xf000 +#define SCSW_FLAGS_MASK_SCTL 0x0800 +#define SCSW_FLAGS_MASK_ESWF 0x0400 +#define SCSW_FLAGS_MASK_CC 0x0300 +#define SCSW_FLAGS_MASK_FMT 0x0080 +#define SCSW_FLAGS_MASK_PFCH 0x0040 +#define SCSW_FLAGS_MASK_ISIC 0x0020 +#define SCSW_FLAGS_MASK_ALCC 0x0010 +#define SCSW_FLAGS_MASK_SSI 0x0008 +#define SCSW_FLAGS_MASK_ZCC 0x0004 +#define SCSW_FLAGS_MASK_ECTL 0x0002 +#define SCSW_FLAGS_MASK_PNO 0x0001 + +#define SCSW_CTRL_MASK_FCTL 0x7000 +#define SCSW_CTRL_MASK_ACTL 0x0fe0 +#define SCSW_CTRL_MASK_STCTL 0x001f + +#define SCSW_FCTL_CLEAR_FUNC 0x1000 +#define SCSW_FCTL_HALT_FUNC 0x2000 +#define SCSW_FCTL_START_FUNC 0x4000 + +#define SCSW_ACTL_SUSP 0x0020 +#define SCSW_ACTL_DEVICE_ACTIVE 0x0040 +#define SCSW_ACTL_SUBCH_ACTIVE 0x0080 +#define SCSW_ACTL_CLEAR_PEND 0x0100 +#define SCSW_ACTL_HALT_PEND 0x0200 +#define SCSW_ACTL_START_PEND 0x0400 +#define SCSW_ACTL_RESUME_PEND 0x0800 + +#define SCSW_STCTL_STATUS_PEND 0x0001 +#define SCSW_STCTL_SECONDARY 0x0002 +#define SCSW_STCTL_PRIMARY 0x0004 +#define SCSW_STCTL_INTERMEDIATE 0x0008 +#define SCSW_STCTL_ALERT 0x0010 + +#define SCSW_DSTAT_ATTENTION 0x80 +#define SCSW_DSTAT_STAT_MOD 0x40 +#define SCSW_DSTAT_CU_END 0x20 +#define SCSW_DSTAT_BUSY 0x10 +#define SCSW_DSTAT_CHANNEL_END 0x08 +#define SCSW_DSTAT_DEVICE_END 0x04 +#define SCSW_DSTAT_UNIT_CHECK 0x02 +#define SCSW_DSTAT_UNIT_EXCEP 0x01 + +#define SCSW_CSTAT_PCI 0x80 +#define SCSW_CSTAT_INCORR_LEN 0x40 +#define SCSW_CSTAT_PROG_CHECK 0x20 +#define SCSW_CSTAT_PROT_CHECK 0x10 +#define SCSW_CSTAT_DATA_CHECK 0x08 +#define SCSW_CSTAT_CHN_CTRL_CHK 0x04 +#define SCSW_CSTAT_INTF_CTRL_CHK 0x02 +#define SCSW_CSTAT_CHAIN_CHECK 0x01 + +/* path management control word */ +typedef struct PMCW { + uint32_t intparm; + uint16_t flags; + uint16_t devno; + uint8_t lpm; + uint8_t pnom; + uint8_t lpum; + uint8_t pim; + uint16_t mbi; + uint8_t pom; + uint8_t pam; + uint8_t chpid[8]; + uint32_t chars; +} QEMU_PACKED PMCW; + +#define PMCW_FLAGS_MASK_QF 0x8000 +#define PMCW_FLAGS_MASK_W 0x4000 +#define PMCW_FLAGS_MASK_ISC 0x3800 +#define PMCW_FLAGS_MASK_ENA 0x0080 +#define PMCW_FLAGS_MASK_LM 0x0060 +#define PMCW_FLAGS_MASK_MME 0x0018 +#define PMCW_FLAGS_MASK_MP 0x0004 +#define PMCW_FLAGS_MASK_TF 0x0002 +#define PMCW_FLAGS_MASK_DNV 0x0001 +#define PMCW_FLAGS_MASK_INVALID 0x0700 + +#define PMCW_CHARS_MASK_ST 0x00e00000 +#define PMCW_CHARS_MASK_MBFC 0x00000004 +#define PMCW_CHARS_MASK_XMWME 0x00000002 +#define PMCW_CHARS_MASK_CSENSE 0x00000001 +#define PMCW_CHARS_MASK_INVALID 0xff1ffff8 + +/* subchannel information block */ +typedef struct SCHIB { + PMCW pmcw; + SCSW scsw; + uint64_t mba; + uint8_t mda[4]; +} QEMU_PACKED SCHIB; + +/* interruption response block */ +typedef struct IRB { + SCSW scsw; + uint32_t esw[5]; + uint32_t ecw[8]; + uint32_t emw[8]; +} QEMU_PACKED IRB; + +/* operation request block */ +typedef struct ORB { + uint32_t intparm; + uint16_t ctrl0; + uint8_t lpm; + uint8_t ctrl1; + uint32_t cpa; +} QEMU_PACKED ORB; + +#define ORB_CTRL0_MASK_KEY 0xf000 +#define ORB_CTRL0_MASK_SPND 0x0800 +#define ORB_CTRL0_MASK_STR 0x0400 +#define ORB_CTRL0_MASK_MOD 0x0200 +#define ORB_CTRL0_MASK_SYNC 0x0100 +#define ORB_CTRL0_MASK_FMT 0x0080 +#define ORB_CTRL0_MASK_PFCH 0x0040 +#define ORB_CTRL0_MASK_ISIC 0x0020 +#define ORB_CTRL0_MASK_ALCC 0x0010 +#define ORB_CTRL0_MASK_SSIC 0x0008 +#define ORB_CTRL0_MASK_C64 0x0002 +#define ORB_CTRL0_MASK_I2K 0x0001 +#define ORB_CTRL0_MASK_INVALID 0x0004 + +#define ORB_CTRL1_MASK_ILS 0x80 +#define ORB_CTRL1_MASK_MIDAW 0x40 +#define ORB_CTRL1_MASK_ORBX 0x01 +#define ORB_CTRL1_MASK_INVALID 0x3e + +/* channel command word (type 1) */ +typedef struct CCW1 { + uint8_t cmd_code; + uint8_t flags; + uint16_t count; + uint32_t cda; +} QEMU_PACKED CCW1; + +#define CCW_FLAG_DC 0x80 +#define CCW_FLAG_CC 0x40 +#define CCW_FLAG_SLI 0x20 +#define CCW_FLAG_SKIP 0x10 +#define CCW_FLAG_PCI 0x08 +#define CCW_FLAG_IDA 0x04 +#define CCW_FLAG_SUSPEND 0x02 + +#define CCW_CMD_NOOP 0x03 +#define CCW_CMD_BASIC_SENSE 0x04 +#define CCW_CMD_TIC 0x08 +#define CCW_CMD_SENSE_ID 0xe4 + +typedef struct CRW { + uint16_t flags; + uint16_t rsid; +} QEMU_PACKED CRW; + +#define CRW_FLAGS_MASK_S 0x4000 +#define CRW_FLAGS_MASK_R 0x2000 +#define CRW_FLAGS_MASK_C 0x1000 +#define CRW_FLAGS_MASK_RSC 0x0f00 +#define CRW_FLAGS_MASK_A 0x0080 +#define CRW_FLAGS_MASK_ERC 0x003f + +#define CRW_ERC_INIT 0x02 +#define CRW_ERC_IPI 0x04 + +#define CRW_RSC_SUBCH 0x3 +#define CRW_RSC_CHP 0x4 + +/* I/O interruption code */ +typedef struct IOIntCode { + uint32_t subsys_id; + uint32_t intparm; + uint32_t interrupt_id; +} QEMU_PACKED IOIntCode; + +/* schid disintegration */ +#define IOINST_SCHID_ONE(_schid) ((_schid & 0x00010000) >> 16) +#define IOINST_SCHID_M(_schid) ((_schid & 0x00080000) >> 19) +#define IOINST_SCHID_CSSID(_schid) ((_schid & 0xff000000) >> 24) +#define IOINST_SCHID_SSID(_schid) ((_schid & 0x00060000) >> 17) +#define IOINST_SCHID_NR(_schid) (_schid & 0x0000ffff) + +int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid, + int *schid); +int ioinst_handle_xsch(CPUS390XState *env, uint64_t reg1); +int ioinst_handle_csch(CPUS390XState *env, uint64_t reg1); +int ioinst_handle_hsch(CPUS390XState *env, uint64_t reg1); +int ioinst_handle_msch(CPUS390XState *env, uint64_t reg1, uint32_t ipb); +int ioinst_handle_ssch(CPUS390XState *env, uint64_t reg1, uint32_t ipb); +int ioinst_handle_stcrw(CPUS390XState *env, uint32_t ipb); +int ioinst_handle_stsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb); +int ioinst_handle_tsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb); +int ioinst_handle_chsc(CPUS390XState *env, uint32_t ipb); +int ioinst_handle_tpi(CPUS390XState *env, uint32_t ipb); +int ioinst_handle_schm(CPUS390XState *env, uint64_t reg1, uint64_t reg2, + uint32_t ipb); +int ioinst_handle_rsch(CPUS390XState *env, uint64_t reg1); +int ioinst_handle_rchp(CPUS390XState *env, uint64_t reg1); +int ioinst_handle_sal(CPUS390XState *env, uint64_t reg1); + +#endif diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index add6a58..2c24182 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -47,9 +47,29 @@ #define IPA0_DIAG 0x8300 #define IPA0_SIGP 0xae00 -#define IPA0_PRIV 0xb200 +#define IPA0_B2 0xb200 +#define IPA0_B9 0xb900 +#define IPA0_EB 0xeb00 #define PRIV_SCLP_CALL 0x20 +#define PRIV_CSCH 0x30 +#define PRIV_HSCH 0x31 +#define PRIV_MSCH 0x32 +#define PRIV_SSCH 0x33 +#define PRIV_STSCH 0x34 +#define PRIV_TSCH 0x35 +#define PRIV_TPI 0x36 +#define PRIV_SAL 0x37 +#define PRIV_RSCH 0x38 +#define PRIV_STCRW 0x39 +#define PRIV_STCPS 0x3a +#define PRIV_RCHP 0x3b +#define PRIV_SCHM 0x3c +#define PRIV_CHSC 0x5f +#define PRIV_SIGA 0x74 +#define PRIV_XSCH 0x76 +#define PRIV_SQBS 0x8a +#define PRIV_EQBS 0x9c #define DIAG_KVM_HYPERCALL 0x500 #define DIAG_KVM_BREAKPOINT 0x501 @@ -76,6 +96,11 @@ int kvm_arch_init(KVMState *s) return 0; } +unsigned long kvm_arch_vcpu_id(CPUState *cpu) +{ + return cpu->cpu_index; +} + int kvm_arch_init_vcpu(CPUState *cpu) { int ret = 0; @@ -375,10 +400,123 @@ static int kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run, return 0; } -static int handle_priv(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) +static int kvm_handle_css_inst(S390CPU *cpu, struct kvm_run *run, + uint8_t ipa0, uint8_t ipa1, uint8_t ipb) +{ + int r = 0; + int no_cc = 0; + CPUS390XState *env = &cpu->env; + + if (ipa0 != 0xb2) { + /* Not handled for now. */ + return -1; + } + cpu_synchronize_state(env); + switch (ipa1) { + case PRIV_XSCH: + r = ioinst_handle_xsch(env, env->regs[1]); + break; + case PRIV_CSCH: + r = ioinst_handle_csch(env, env->regs[1]); + break; + case PRIV_HSCH: + r = ioinst_handle_hsch(env, env->regs[1]); + break; + case PRIV_MSCH: + r = ioinst_handle_msch(env, env->regs[1], run->s390_sieic.ipb); + break; + case PRIV_SSCH: + r = ioinst_handle_ssch(env, env->regs[1], run->s390_sieic.ipb); + break; + case PRIV_STCRW: + r = ioinst_handle_stcrw(env, run->s390_sieic.ipb); + break; + case PRIV_STSCH: + r = ioinst_handle_stsch(env, env->regs[1], run->s390_sieic.ipb); + break; + case PRIV_TSCH: + /* We should only get tsch via KVM_EXIT_S390_TSCH. */ + fprintf(stderr, "Spurious tsch intercept\n"); + break; + case PRIV_CHSC: + r = ioinst_handle_chsc(env, run->s390_sieic.ipb); + break; + case PRIV_TPI: + /* This should have been handled by kvm already. */ + fprintf(stderr, "Spurious tpi intercept\n"); + break; + case PRIV_SCHM: + no_cc = 1; + r = ioinst_handle_schm(env, env->regs[1], env->regs[2], + run->s390_sieic.ipb); + break; + case PRIV_RSCH: + r = ioinst_handle_rsch(env, env->regs[1]); + break; + case PRIV_RCHP: + r = ioinst_handle_rchp(env, env->regs[1]); + break; + case PRIV_STCPS: + /* We do not provide this instruction, it is suppressed. */ + no_cc = 1; + r = 0; + break; + case PRIV_SAL: + no_cc = 1; + r = ioinst_handle_sal(env, env->regs[1]); + break; + default: + r = -1; + break; + } + + if (r >= 0) { + if (!no_cc) { + setcc(cpu, r); + } + r = 0; + } else if (r < -1) { + r = 0; + } + return r; +} + +static int is_ioinst(uint8_t ipa0, uint8_t ipa1, uint8_t ipb) +{ + int ret = 0; + uint16_t ipa = (ipa0 << 8) | ipa1; + + switch (ipa) { + case IPA0_B2 | PRIV_CSCH: + case IPA0_B2 | PRIV_HSCH: + case IPA0_B2 | PRIV_MSCH: + case IPA0_B2 | PRIV_SSCH: + case IPA0_B2 | PRIV_STSCH: + case IPA0_B2 | PRIV_TPI: + case IPA0_B2 | PRIV_SAL: + case IPA0_B2 | PRIV_RSCH: + case IPA0_B2 | PRIV_STCRW: + case IPA0_B2 | PRIV_STCPS: + case IPA0_B2 | PRIV_RCHP: + case IPA0_B2 | PRIV_SCHM: + case IPA0_B2 | PRIV_CHSC: + case IPA0_B2 | PRIV_SIGA: + case IPA0_B2 | PRIV_XSCH: + case IPA0_B9 | PRIV_EQBS: + case IPA0_EB | PRIV_SQBS: + ret = 1; + break; + } + + return ret; +} + +static int handle_priv(S390CPU *cpu, struct kvm_run *run, + uint8_t ipa0, uint8_t ipa1) { int r = 0; uint16_t ipbh0 = (run->s390_sieic.ipb & 0xffff0000) >> 16; + uint8_t ipb = run->s390_sieic.ipb & 0xff; dprintf("KVM: PRIV: %d\n", ipa1); switch (ipa1) { @@ -386,8 +524,16 @@ static int handle_priv(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) r = kvm_sclp_service_call(cpu, run, ipbh0); break; default: - dprintf("KVM: unknown PRIV: 0x%x\n", ipa1); - r = -1; + if (is_ioinst(ipa0, ipa1, ipb)) { + r = kvm_handle_css_inst(cpu, run, ipa0, ipa1, ipb); + if (r == -1) { + setcc(cpu, 3); + r = 0; + } + } else { + dprintf("KVM: unknown PRIV: 0x%x\n", ipa1); + r = -1; + } break; } @@ -528,15 +674,17 @@ static int handle_instruction(S390CPU *cpu, struct kvm_run *run) dprintf("handle_instruction 0x%x 0x%x\n", run->s390_sieic.ipa, run->s390_sieic.ipb); switch (ipa0) { - case IPA0_PRIV: - r = handle_priv(cpu, run, ipa1); - break; - case IPA0_DIAG: - r = handle_diag(env, run, ipb_code); - break; - case IPA0_SIGP: - r = handle_sigp(cpu, run, ipa1); - break; + case IPA0_B2: + case IPA0_B9: + case IPA0_EB: + r = handle_priv(cpu, run, ipa0 >> 8, ipa1); + break; + case IPA0_DIAG: + r = handle_diag(env, run, ipb_code); + break; + case IPA0_SIGP: + r = handle_sigp(cpu, run, ipa1); + break; } if (r < 0) { @@ -595,6 +743,43 @@ static int handle_intercept(S390CPU *cpu) return r; } +static int handle_tsch(S390CPU *cpu) +{ + CPUS390XState *env = &cpu->env; + CPUState *cs = CPU(cpu); + struct kvm_run *run = cs->kvm_run; + int ret; + + cpu_synchronize_state(env); + ret = ioinst_handle_tsch(env, env->regs[1], run->s390_tsch.ipb); + if (ret >= 0) { + /* Success; set condition code. */ + setcc(cpu, ret); + ret = 0; + } else if (ret < -1) { + /* + * Failure. + * If an I/O interrupt had been dequeued, we have to reinject it. + */ + if (run->s390_tsch.dequeued) { + uint16_t subchannel_id = run->s390_tsch.subchannel_id; + uint16_t subchannel_nr = run->s390_tsch.subchannel_nr; + uint32_t io_int_parm = run->s390_tsch.io_int_parm; + uint32_t io_int_word = run->s390_tsch.io_int_word; + uint32_t type = ((subchannel_id & 0xff00) << 24) | + ((subchannel_id & 0x00060) << 22) | (subchannel_nr << 16); + + kvm_s390_interrupt_internal(cpu, type, + ((uint32_t)subchannel_id << 16) + | subchannel_nr, + ((uint64_t)io_int_parm << 32) + | io_int_word, 1); + } + ret = 0; + } + return ret; +} + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) { S390CPU *cpu = S390_CPU(cs); @@ -607,6 +792,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) case KVM_EXIT_S390_RESET: qemu_system_reset_request(); break; + case KVM_EXIT_S390_TSCH: + ret = handle_tsch(cpu); + break; default: fprintf(stderr, "Unknown KVM exit: %d\n", run->exit_reason); break; @@ -632,3 +820,33 @@ int kvm_arch_on_sigbus(int code, void *addr) { return 1; } + +void kvm_s390_io_interrupt(S390CPU *cpu, uint16_t subchannel_id, + uint16_t subchannel_nr, uint32_t io_int_parm, + uint32_t io_int_word) +{ + uint32_t type; + + type = ((subchannel_id & 0xff00) << 24) | + ((subchannel_id & 0x00060) << 22) | (subchannel_nr << 16); + kvm_s390_interrupt_internal(cpu, type, + ((uint32_t)subchannel_id << 16) | subchannel_nr, + ((uint64_t)io_int_parm << 32) | io_int_word, 1); +} + +void kvm_s390_crw_mchk(S390CPU *cpu) +{ + kvm_s390_interrupt_internal(cpu, KVM_S390_MCHK, 1 << 28, + 0x00400f1d40330000, 1); +} + +void kvm_s390_enable_css_support(S390CPU *cpu) +{ + struct kvm_enable_cap cap = {}; + int r; + + /* Activate host kernel channel subsystem support. */ + cap.cap = KVM_CAP_S390_CSS_SUPPORT; + r = kvm_vcpu_ioctl(CPU(cpu), KVM_ENABLE_CAP, &cap); + assert(r == 0); +} diff --git a/target-unicore32/cpu.c b/target-unicore32/cpu.c index 884c101..c120440 100644 --- a/target-unicore32/cpu.c +++ b/target-unicore32/cpu.c @@ -22,6 +22,22 @@ static inline void set_feature(CPUUniCore32State *env, int feature) /* CPU models */ +static ObjectClass *uc32_cpu_class_by_name(const char *cpu_model) +{ + ObjectClass *oc; + + if (cpu_model == NULL) { + return NULL; + } + + oc = object_class_by_name(cpu_model); + if (oc != NULL && (!object_class_dynamic_cast(oc, TYPE_UNICORE32_CPU) || + object_class_is_abstract(oc))) { + oc = NULL; + } + return oc; +} + typedef struct UniCore32CPUInfo { const char *name; void (*instance_init)(Object *obj); @@ -80,6 +96,13 @@ static void uc32_cpu_initfn(Object *obj) tlb_flush(env, 1); } +static void uc32_cpu_class_init(ObjectClass *oc, void *data) +{ + CPUClass *cc = CPU_CLASS(oc); + + cc->class_by_name = uc32_cpu_class_by_name; +} + static void uc32_register_cpu_type(const UniCore32CPUInfo *info) { TypeInfo type_info = { @@ -88,7 +111,7 @@ static void uc32_register_cpu_type(const UniCore32CPUInfo *info) .instance_init = info->instance_init, }; - type_register_static(&type_info); + type_register(&type_info); } static const TypeInfo uc32_cpu_type_info = { @@ -98,6 +121,7 @@ static const TypeInfo uc32_cpu_type_info = { .instance_init = uc32_cpu_initfn, .abstract = true, .class_size = sizeof(UniCore32CPUClass), + .class_init = uc32_cpu_class_init, }; static void uc32_cpu_register_types(void) diff --git a/target-unicore32/helper.c b/target-unicore32/helper.c index 5359538..183b5b3 100644 --- a/target-unicore32/helper.c +++ b/target-unicore32/helper.c @@ -29,12 +29,14 @@ CPUUniCore32State *uc32_cpu_init(const char *cpu_model) { UniCore32CPU *cpu; CPUUniCore32State *env; + ObjectClass *oc; static int inited = 1; - if (object_class_by_name(cpu_model) == NULL) { + oc = cpu_class_by_name(TYPE_UNICORE32_CPU, cpu_model); + if (oc == NULL) { return NULL; } - cpu = UNICORE32_CPU(object_new(cpu_model)); + cpu = UNICORE32_CPU(object_new(object_class_get_name(oc))); env = &cpu->env; if (inited) { diff --git a/tests/.gitignore b/tests/.gitignore index f9041f3..38c94ef 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -10,4 +10,5 @@ test-qmp-commands.h test-qmp-commands test-qmp-input-strict test-qmp-marshal.c +test-x86-cpuid *-test diff --git a/tests/Makefile b/tests/Makefile index 442b286..c681ceb 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -45,6 +45,11 @@ gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c check-unit-y += tests/test-thread-pool$(EXESUF) gcov-files-test-thread-pool-y = thread-pool.c +gcov-files-test-hbitmap-y = util/hbitmap.c +check-unit-y += tests/test-hbitmap$(EXESUF) +check-unit-y += tests/test-x86-cpuid$(EXESUF) +# all code tested by test-x86-cpuid is inside topology.h +gcov-files-test-x86-cpuid-y = check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh @@ -72,12 +77,15 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \ tests/test-coroutine.o tests/test-string-output-visitor.o \ tests/test-string-input-visitor.o tests/test-qmp-output-visitor.o \ tests/test-qmp-input-visitor.o tests/test-qmp-input-strict.o \ - tests/test-qmp-commands.o tests/test-visitor-serialization.o + tests/test-qmp-commands.o tests/test-visitor-serialization.o \ + tests/test-x86-cpuid.o test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o $(test-obj-y): QEMU_INCLUDES += -Itests +tests/test-x86-cpuid.o: QEMU_INCLUDES += -I$(SRC_PATH)/target-i386 + tests/check-qint$(EXESUF): tests/check-qint.o libqemuutil.a tests/check-qstring$(EXESUF): tests/check-qstring.o libqemuutil.a tests/check-qdict$(EXESUF): tests/check-qdict.o libqemuutil.a @@ -88,6 +96,8 @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a +tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o libqemuutil.a libqemustub.a +tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o tests/test-qapi-types.c tests/test-qapi-types.h :\ $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index c6eb851..b040820 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -207,6 +207,37 @@ class TestSingleDrive(ImageMirroringTestCase): self.assertTrue(self.compare_images(test_img, target_img), 'target image does not match source after mirroring') + def test_small_buffer(self): + self.assert_no_active_mirrors() + + # A small buffer is rounded up automatically + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + buf_size=4096, target=target_img) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', target_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_small_buffer2(self): + self.assert_no_active_mirrors() + + qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,size=%d' + % (TestSingleDrive.image_len, TestSingleDrive.image_len), target_img) + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + buf_size=65536, mode='existing', target=target_img) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', target_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + def test_large_cluster(self): self.assert_no_active_mirrors() @@ -292,6 +323,27 @@ class TestMirrorNoBacking(ImageMirroringTestCase): self.assertTrue(self.compare_images(test_img, target_img), 'target image does not match source after mirroring') + def test_large_cluster(self): + self.assert_no_active_mirrors() + + # qemu-img create fails if the image is not there + qemu_img('create', '-f', iotests.imgfmt, '-o', 'size=%d' + %(TestMirrorNoBacking.image_len), target_backing_img) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s' + % (TestMirrorNoBacking.image_len, target_backing_img), target_img) + os.remove(target_backing_img) + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=target_img) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', target_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + class TestReadErrors(ImageMirroringTestCase): image_len = 2 * 1024 * 1024 # MB @@ -330,6 +382,9 @@ new_state = "1" '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw' % (self.blkdebug_file, backing_img), test_img) + # Write something for tests that use sync='top' + qemu_io('-c', 'write %d 512' % (self.MIRROR_GRANULARITY + 65536), + test_img) self.vm = iotests.VM().add_drive(test_img) self.vm.launch() @@ -383,6 +438,32 @@ new_state = "1" self.complete_and_wait() self.vm.shutdown() + def test_large_cluster(self): + self.assert_no_active_mirrors() + + # Test COW into the target image. The first half of the + # cluster at MIRROR_GRANULARITY has to be copied from + # backing_img, even though sync='top'. + qemu_img('create', '-f', iotests.imgfmt, '-ocluster_size=131072,backing_file=%s' %(backing_img), target_img) + result = self.vm.qmp('drive-mirror', device='drive0', sync='top', + on_source_error='ignore', + mode='existing', target=target_img) + self.assert_qmp(result, 'return', {}) + + event = self.vm.get_qmp_event(wait=True) + self.assertEquals(event['event'], 'BLOCK_JOB_ERROR') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.complete_and_wait() + self.vm.shutdown() + + # Detach blkdebug to compare images successfully + qemu_img('rebase', '-f', iotests.imgfmt, '-u', '-b', backing_img, test_img) + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + def test_stop_read(self): self.assert_no_active_mirrors() diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out index 71009c2..84bfd63 100644 --- a/tests/qemu-iotests/041.out +++ b/tests/qemu-iotests/041.out @@ -1,5 +1,5 @@ -.................. +...................... ---------------------------------------------------------------------- -Ran 18 tests +Ran 22 tests OK diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c new file mode 100644 index 0000000..8c902f2 --- /dev/null +++ b/tests/test-hbitmap.c @@ -0,0 +1,401 @@ +/* + * Hierarchical bitmap unit-tests. + * + * Copyright (C) 2012 Red Hat Inc. + * + * Author: Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include <glib.h> +#include <stdarg.h> +#include "qemu/hbitmap.h" + +#define LOG_BITS_PER_LONG (BITS_PER_LONG == 32 ? 5 : 6) + +#define L1 BITS_PER_LONG +#define L2 (BITS_PER_LONG * L1) +#define L3 (BITS_PER_LONG * L2) + +typedef struct TestHBitmapData { + HBitmap *hb; + unsigned long *bits; + size_t size; + int granularity; +} TestHBitmapData; + + +/* Check that the HBitmap and the shadow bitmap contain the same data, + * ignoring the same "first" bits. + */ +static void hbitmap_test_check(TestHBitmapData *data, + uint64_t first) +{ + uint64_t count = 0; + size_t pos; + int bit; + HBitmapIter hbi; + int64_t i, next; + + hbitmap_iter_init(&hbi, data->hb, first); + + i = first; + for (;;) { + next = hbitmap_iter_next(&hbi); + if (next < 0) { + next = data->size; + } + + while (i < next) { + pos = i >> LOG_BITS_PER_LONG; + bit = i & (BITS_PER_LONG - 1); + i++; + g_assert_cmpint(data->bits[pos] & (1UL << bit), ==, 0); + } + + if (next == data->size) { + break; + } + + pos = i >> LOG_BITS_PER_LONG; + bit = i & (BITS_PER_LONG - 1); + i++; + count++; + g_assert_cmpint(data->bits[pos] & (1UL << bit), !=, 0); + } + + if (first == 0) { + g_assert_cmpint(count << data->granularity, ==, hbitmap_count(data->hb)); + } +} + +/* This is provided instead of a test setup function so that the sizes + are kept in the test functions (and not in main()) */ +static void hbitmap_test_init(TestHBitmapData *data, + uint64_t size, int granularity) +{ + size_t n; + data->hb = hbitmap_alloc(size, granularity); + + n = (size + BITS_PER_LONG - 1) / BITS_PER_LONG; + if (n == 0) { + n = 1; + } + data->bits = g_new0(unsigned long, n); + data->size = size; + data->granularity = granularity; + if (size) { + hbitmap_test_check(data, 0); + } +} + +static void hbitmap_test_teardown(TestHBitmapData *data, + const void *unused) +{ + if (data->hb) { + hbitmap_free(data->hb); + data->hb = NULL; + } + if (data->bits) { + g_free(data->bits); + data->bits = NULL; + } +} + +/* Set a range in the HBitmap and in the shadow "simple" bitmap. + * The two bitmaps are then tested against each other. + */ +static void hbitmap_test_set(TestHBitmapData *data, + uint64_t first, uint64_t count) +{ + hbitmap_set(data->hb, first, count); + while (count-- != 0) { + size_t pos = first >> LOG_BITS_PER_LONG; + int bit = first & (BITS_PER_LONG - 1); + first++; + + data->bits[pos] |= 1UL << bit; + } + + if (data->granularity == 0) { + hbitmap_test_check(data, 0); + } +} + +/* Reset a range in the HBitmap and in the shadow "simple" bitmap. + */ +static void hbitmap_test_reset(TestHBitmapData *data, + uint64_t first, uint64_t count) +{ + hbitmap_reset(data->hb, first, count); + while (count-- != 0) { + size_t pos = first >> LOG_BITS_PER_LONG; + int bit = first & (BITS_PER_LONG - 1); + first++; + + data->bits[pos] &= ~(1UL << bit); + } + + if (data->granularity == 0) { + hbitmap_test_check(data, 0); + } +} + +static void hbitmap_test_check_get(TestHBitmapData *data) +{ + uint64_t count = 0; + uint64_t i; + + for (i = 0; i < data->size; i++) { + size_t pos = i >> LOG_BITS_PER_LONG; + int bit = i & (BITS_PER_LONG - 1); + unsigned long val = data->bits[pos] & (1UL << bit); + count += hbitmap_get(data->hb, i); + g_assert_cmpint(hbitmap_get(data->hb, i), ==, val != 0); + } + g_assert_cmpint(count, ==, hbitmap_count(data->hb)); +} + +static void test_hbitmap_zero(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, 0, 0); +} + +static void test_hbitmap_unaligned(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, L3 + 23, 0); + hbitmap_test_set(data, 0, 1); + hbitmap_test_set(data, L3 + 22, 1); +} + +static void test_hbitmap_iter_empty(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, L1, 0); +} + +static void test_hbitmap_iter_partial(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, L3, 0); + hbitmap_test_set(data, 0, L3); + hbitmap_test_check(data, 1); + hbitmap_test_check(data, L1 - 1); + hbitmap_test_check(data, L1); + hbitmap_test_check(data, L1 * 2 - 1); + hbitmap_test_check(data, L2 - 1); + hbitmap_test_check(data, L2); + hbitmap_test_check(data, L2 + 1); + hbitmap_test_check(data, L2 + L1); + hbitmap_test_check(data, L2 + L1 * 2 - 1); + hbitmap_test_check(data, L2 * 2 - 1); + hbitmap_test_check(data, L2 * 2); + hbitmap_test_check(data, L2 * 2 + 1); + hbitmap_test_check(data, L2 * 2 + L1); + hbitmap_test_check(data, L2 * 2 + L1 * 2 - 1); + hbitmap_test_check(data, L3 / 2); +} + +static void test_hbitmap_set_all(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, L3, 0); + hbitmap_test_set(data, 0, L3); +} + +static void test_hbitmap_get_all(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, L3, 0); + hbitmap_test_set(data, 0, L3); + hbitmap_test_check_get(data); +} + +static void test_hbitmap_get_some(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, 2 * L2, 0); + hbitmap_test_set(data, 10, 1); + hbitmap_test_check_get(data); + hbitmap_test_set(data, L1 - 1, 1); + hbitmap_test_check_get(data); + hbitmap_test_set(data, L1, 1); + hbitmap_test_check_get(data); + hbitmap_test_set(data, L2 - 1, 1); + hbitmap_test_check_get(data); + hbitmap_test_set(data, L2, 1); + hbitmap_test_check_get(data); +} + +static void test_hbitmap_set_one(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, 2 * L2, 0); + hbitmap_test_set(data, 10, 1); + hbitmap_test_set(data, L1 - 1, 1); + hbitmap_test_set(data, L1, 1); + hbitmap_test_set(data, L2 - 1, 1); + hbitmap_test_set(data, L2, 1); +} + +static void test_hbitmap_set_two_elem(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, 2 * L2, 0); + hbitmap_test_set(data, L1 - 1, 2); + hbitmap_test_set(data, L1 * 2 - 1, 4); + hbitmap_test_set(data, L1 * 4, L1 + 1); + hbitmap_test_set(data, L1 * 8 - 1, L1 + 1); + hbitmap_test_set(data, L2 - 1, 2); + hbitmap_test_set(data, L2 + L1 - 1, 8); + hbitmap_test_set(data, L2 + L1 * 4, L1 + 1); + hbitmap_test_set(data, L2 + L1 * 8 - 1, L1 + 1); +} + +static void test_hbitmap_set(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, L3 * 2, 0); + hbitmap_test_set(data, L1 - 1, L1 + 2); + hbitmap_test_set(data, L1 * 3 - 1, L1 + 2); + hbitmap_test_set(data, L1 * 5, L1 * 2 + 1); + hbitmap_test_set(data, L1 * 8 - 1, L1 * 2 + 1); + hbitmap_test_set(data, L2 - 1, L1 + 2); + hbitmap_test_set(data, L2 + L1 * 2 - 1, L1 + 2); + hbitmap_test_set(data, L2 + L1 * 4, L1 * 2 + 1); + hbitmap_test_set(data, L2 + L1 * 7 - 1, L1 * 2 + 1); + hbitmap_test_set(data, L2 * 2 - 1, L3 * 2 - L2 * 2); +} + +static void test_hbitmap_set_twice(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, L1 * 3, 0); + hbitmap_test_set(data, 0, L1 * 3); + hbitmap_test_set(data, L1, 1); +} + +static void test_hbitmap_set_overlap(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, L3 * 2, 0); + hbitmap_test_set(data, L1 - 1, L1 + 2); + hbitmap_test_set(data, L1 * 2 - 1, L1 * 2 + 2); + hbitmap_test_set(data, 0, L1 * 3); + hbitmap_test_set(data, L1 * 8 - 1, L2); + hbitmap_test_set(data, L2, L1); + hbitmap_test_set(data, L2 - L1 - 1, L1 * 8 + 2); + hbitmap_test_set(data, L2, L3 - L2 + 1); + hbitmap_test_set(data, L3 - L1, L1 * 3); + hbitmap_test_set(data, L3 - 1, 3); + hbitmap_test_set(data, L3 - 1, L2); +} + +static void test_hbitmap_reset_empty(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, L3, 0); + hbitmap_test_reset(data, 0, L3); +} + +static void test_hbitmap_reset(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, L3 * 2, 0); + hbitmap_test_set(data, L1 - 1, L1 + 2); + hbitmap_test_reset(data, L1 * 2 - 1, L1 * 2 + 2); + hbitmap_test_set(data, 0, L1 * 3); + hbitmap_test_reset(data, L1 * 8 - 1, L2); + hbitmap_test_set(data, L2, L1); + hbitmap_test_reset(data, L2 - L1 - 1, L1 * 8 + 2); + hbitmap_test_set(data, L2, L3 - L2 + 1); + hbitmap_test_reset(data, L3 - L1, L1 * 3); + hbitmap_test_set(data, L3 - 1, 3); + hbitmap_test_reset(data, L3 - 1, L2); + hbitmap_test_set(data, 0, L3 * 2); + hbitmap_test_reset(data, 0, L1); + hbitmap_test_reset(data, 0, L2); + hbitmap_test_reset(data, L3, L3); + hbitmap_test_set(data, L3 / 2, L3); +} + +static void test_hbitmap_granularity(TestHBitmapData *data, + const void *unused) +{ + /* Note that hbitmap_test_check has to be invoked manually in this test. */ + hbitmap_test_init(data, L1, 1); + hbitmap_test_set(data, 0, 1); + g_assert_cmpint(hbitmap_count(data->hb), ==, 2); + hbitmap_test_check(data, 0); + hbitmap_test_set(data, 2, 1); + g_assert_cmpint(hbitmap_count(data->hb), ==, 4); + hbitmap_test_check(data, 0); + hbitmap_test_set(data, 0, 3); + g_assert_cmpint(hbitmap_count(data->hb), ==, 4); + hbitmap_test_reset(data, 0, 1); + g_assert_cmpint(hbitmap_count(data->hb), ==, 2); +} + +static void test_hbitmap_iter_granularity(TestHBitmapData *data, + const void *unused) +{ + HBitmapIter hbi; + + /* Note that hbitmap_test_check has to be invoked manually in this test. */ + hbitmap_test_init(data, 131072 << 7, 7); + hbitmap_iter_init(&hbi, data->hb, 0); + g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + + hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8); + hbitmap_iter_init(&hbi, data->hb, 0); + g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); + g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + + hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7); + g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + + hbitmap_test_set(data, (131072 << 7) - 8, 8); + hbitmap_iter_init(&hbi, data->hb, 0); + g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); + g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7); + g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + + hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7); + g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7); + g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); +} + +static void hbitmap_test_add(const char *testpath, + void (*test_func)(TestHBitmapData *data, const void *user_data)) +{ + g_test_add(testpath, TestHBitmapData, NULL, NULL, test_func, + hbitmap_test_teardown); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + hbitmap_test_add("/hbitmap/size/0", test_hbitmap_zero); + hbitmap_test_add("/hbitmap/size/unaligned", test_hbitmap_unaligned); + hbitmap_test_add("/hbitmap/iter/empty", test_hbitmap_iter_empty); + hbitmap_test_add("/hbitmap/iter/partial", test_hbitmap_iter_partial); + hbitmap_test_add("/hbitmap/iter/granularity", test_hbitmap_iter_granularity); + hbitmap_test_add("/hbitmap/get/all", test_hbitmap_get_all); + hbitmap_test_add("/hbitmap/get/some", test_hbitmap_get_some); + hbitmap_test_add("/hbitmap/set/all", test_hbitmap_set_all); + hbitmap_test_add("/hbitmap/set/one", test_hbitmap_set_one); + hbitmap_test_add("/hbitmap/set/two-elem", test_hbitmap_set_two_elem); + hbitmap_test_add("/hbitmap/set/general", test_hbitmap_set); + hbitmap_test_add("/hbitmap/set/twice", test_hbitmap_set_twice); + hbitmap_test_add("/hbitmap/set/overlap", test_hbitmap_set_overlap); + hbitmap_test_add("/hbitmap/reset/empty", test_hbitmap_reset_empty); + hbitmap_test_add("/hbitmap/reset/general", test_hbitmap_reset); + hbitmap_test_add("/hbitmap/granularity", test_hbitmap_granularity); + g_test_run(); + + return 0; +} diff --git a/tests/test-x86-cpuid.c b/tests/test-x86-cpuid.c new file mode 100644 index 0000000..8d9f96a --- /dev/null +++ b/tests/test-x86-cpuid.c @@ -0,0 +1,110 @@ +/* + * Test code for x86 CPUID and Topology functions + * + * Copyright (c) 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <glib.h> + +#include "topology.h" + +static void test_topo_bits(void) +{ + /* simple tests for 1 thread per core, 1 core per socket */ + g_assert_cmpuint(apicid_smt_width(1, 1), ==, 0); + g_assert_cmpuint(apicid_core_width(1, 1), ==, 0); + + g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 0), ==, 0); + g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 1), ==, 1); + g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 2), ==, 2); + g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 3), ==, 3); + + + /* Test field width calculation for multiple values + */ + g_assert_cmpuint(apicid_smt_width(1, 2), ==, 1); + g_assert_cmpuint(apicid_smt_width(1, 3), ==, 2); + g_assert_cmpuint(apicid_smt_width(1, 4), ==, 2); + + g_assert_cmpuint(apicid_smt_width(1, 14), ==, 4); + g_assert_cmpuint(apicid_smt_width(1, 15), ==, 4); + g_assert_cmpuint(apicid_smt_width(1, 16), ==, 4); + g_assert_cmpuint(apicid_smt_width(1, 17), ==, 5); + + + g_assert_cmpuint(apicid_core_width(30, 2), ==, 5); + g_assert_cmpuint(apicid_core_width(31, 2), ==, 5); + g_assert_cmpuint(apicid_core_width(32, 2), ==, 5); + g_assert_cmpuint(apicid_core_width(33, 2), ==, 6); + + + /* build a weird topology and see if IDs are calculated correctly + */ + + /* This will use 2 bits for thread ID and 3 bits for core ID + */ + g_assert_cmpuint(apicid_smt_width(6, 3), ==, 2); + g_assert_cmpuint(apicid_core_width(6, 3), ==, 3); + g_assert_cmpuint(apicid_pkg_offset(6, 3), ==, 5); + + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 0), ==, 0); + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1), ==, 1); + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2), ==, 2); + + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 0), ==, + (1 << 2) | 0); + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 1), ==, + (1 << 2) | 1); + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 2), ==, + (1 << 2) | 2); + + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 0), ==, + (2 << 2) | 0); + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 1), ==, + (2 << 2) | 1); + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 2), ==, + (2 << 2) | 2); + + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 0), ==, + (5 << 2) | 0); + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 1), ==, + (5 << 2) | 1); + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 2), ==, + (5 << 2) | 2); + + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 6 * 3 + 0 * 3 + 0), ==, + (1 << 5)); + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 6 * 3 + 1 * 3 + 1), ==, + (1 << 5) | (1 << 2) | 1); + g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 3 * 6 * 3 + 5 * 3 + 2), ==, + (3 << 5) | (5 << 2) | 2); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + g_test_add_func("/cpuid/topology/basic", test_topo_bits); + + g_test_run(); + + return 0; +} diff --git a/trace-events b/trace-events index 09091e6..1011f27 100644 --- a/trace-events +++ b/trace-events @@ -79,10 +79,17 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) " # block/mirror.c mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p" +mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64 mirror_before_flush(void *s) "s %p" mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64 mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d" mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d" +mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64 +mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d" +mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d" +mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d" +mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d" +mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d" # blockdev.c qmp_block_job_cancel(void *job) "job %p" @@ -1060,3 +1067,26 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]" xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x" xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]" xics_ics_eoi(int nr) "ics_eoi: irq %#x" + +# hbitmap.c +hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx" +hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64 +hbitmap_set(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64 + +# target-s390x/ioinst.c +ioinst(const char *insn) "IOINST: %s" +ioinst_sch_id(const char *insn, int cssid, int ssid, int schid) "IOINST: %s (%x.%x.%04x)" +ioinst_chp_id(const char *insn, int cssid, int chpid) "IOINST: %s (%x.%02x)" +ioinst_chsc_cmd(uint16_t cmd, uint16_t len) "IOINST: chsc command %04x, len %04x" + +# hw/s390x/css.c +css_enable_facility(const char *facility) "CSS: enable %s" +css_crw(uint8_t rsc, uint8_t erc, uint16_t rsid, const char *chained) "CSS: queueing crw: rsc=%x, erc=%x, rsid=%x %s" +css_chpid_add(uint8_t cssid, uint8_t chpid, uint8_t type) "CSS: add chpid %x.%02x (type %02x)" +css_new_image(uint8_t cssid, const char *default_cssid) "CSS: add css image %02x %s" +css_assign_subch(const char *do_assign, uint8_t cssid, uint8_t ssid, uint16_t schid, uint16_t devno) "CSS: %s %x.%x.%04x (devno %04x)" +css_io_interrupt(int cssid, int ssid, int schid, uint32_t intparm, uint8_t isc, const char *conditional) "CSS: I/O interrupt on sch %x.%x.%04x (intparm %08x, isc %x) %s" + +# hw/s390x/virtio-ccw.c +virtio_ccw_interpret_ccw(int cssid, int ssid, int schid, int cmd_code) "VIRTIO-CCW: %x.%x.%04x: interpret command %x" +virtio_ccw_new_device(int cssid, int ssid, int schid, int devno, const char *devno_mode) "VIRTIO-CCW: add subchannel %x.%x.%04x, devno %04x (%s)" diff --git a/util/Makefile.objs b/util/Makefile.objs index 5baeb53..495a178 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -2,7 +2,7 @@ util-obj-y = osdep.o cutils.o qemu-timer-common.o util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o util-obj-y += envlist.o path.o host-utils.o cache-utils.o module.o -util-obj-y += bitmap.o bitops.o +util-obj-y += bitmap.o bitops.o hbitmap.o util-obj-y += acl.o util-obj-y += error.o qemu-error.o util-obj-$(CONFIG_POSIX) += compatfd.o diff --git a/util/hbitmap.c b/util/hbitmap.c new file mode 100644 index 0000000..2aa487d --- /dev/null +++ b/util/hbitmap.c @@ -0,0 +1,401 @@ +/* + * Hierarchical Bitmap Data Type + * + * Copyright Red Hat, Inc., 2012 + * + * Author: Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include <string.h> +#include <glib.h> +#include <assert.h> +#include "qemu/osdep.h" +#include "qemu/hbitmap.h" +#include "qemu/host-utils.h" +#include "trace.h" + +/* HBitmaps provides an array of bits. The bits are stored as usual in an + * array of unsigned longs, but HBitmap is also optimized to provide fast + * iteration over set bits; going from one bit to the next is O(logB n) + * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough + * that the number of levels is in fact fixed. + * + * In order to do this, it stacks multiple bitmaps with progressively coarser + * granularity; in all levels except the last, bit N is set iff the N-th + * unsigned long is nonzero in the immediately next level. When iteration + * completes on the last level it can examine the 2nd-last level to quickly + * skip entire words, and even do so recursively to skip blocks of 64 words or + * powers thereof (32 on 32-bit machines). + * + * Given an index in the bitmap, it can be split in group of bits like + * this (for the 64-bit case): + * + * bits 0-57 => word in the last bitmap | bits 58-63 => bit in the word + * bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word + * bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word + * + * So it is easy to move up simply by shifting the index right by + * log2(BITS_PER_LONG) bits. To move down, you shift the index left + * similarly, and add the word index within the group. Iteration uses + * ffs (find first set bit) to find the next word to examine; this + * operation can be done in constant time in most current architectures. + * + * Setting or clearing a range of m bits on all levels, the work to perform + * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap. + * + * When iterating on a bitmap, each bit (on any level) is only visited + * once. Hence, The total cost of visiting a bitmap with m bits in it is + * the number of bits that are set in all bitmaps. Unless the bitmap is + * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized + * cost of advancing from one bit to the next is usually constant (worst case + * O(logB n) as in the non-amortized complexity). + */ + +struct HBitmap { + /* Number of total bits in the bottom level. */ + uint64_t size; + + /* Number of set bits in the bottom level. */ + uint64_t count; + + /* A scaling factor. Given a granularity of G, each bit in the bitmap will + * will actually represent a group of 2^G elements. Each operation on a + * range of bits first rounds the bits to determine which group they land + * in, and then affect the entire page; iteration will only visit the first + * bit of each group. Here is an example of operations in a size-16, + * granularity-1 HBitmap: + * + * initial state 00000000 + * set(start=0, count=9) 11111000 (iter: 0, 2, 4, 6, 8) + * reset(start=1, count=3) 00111000 (iter: 4, 6, 8) + * set(start=9, count=2) 00111100 (iter: 4, 6, 8, 10) + * reset(start=5, count=5) 00000000 + * + * From an implementation point of view, when setting or resetting bits, + * the bitmap will scale bit numbers right by this amount of bits. When + * iterating, the bitmap will scale bit numbers left by this amount of + * bits. + */ + int granularity; + + /* A number of progressively less coarse bitmaps (i.e. level 0 is the + * coarsest). Each bit in level N represents a word in level N+1 that + * has a set bit, except the last level where each bit represents the + * actual bitmap. + * + * Note that all bitmaps have the same number of levels. Even a 1-bit + * bitmap will still allocate HBITMAP_LEVELS arrays. + */ + unsigned long *levels[HBITMAP_LEVELS]; +}; + +static inline int popcountl(unsigned long l) +{ + return BITS_PER_LONG == 32 ? ctpop32(l) : ctpop64(l); +} + +/* Advance hbi to the next nonzero word and return it. hbi->pos + * is updated. Returns zero if we reach the end of the bitmap. + */ +unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi) +{ + size_t pos = hbi->pos; + const HBitmap *hb = hbi->hb; + unsigned i = HBITMAP_LEVELS - 1; + + unsigned long cur; + do { + cur = hbi->cur[--i]; + pos >>= BITS_PER_LEVEL; + } while (cur == 0); + + /* Check for end of iteration. We always use fewer than BITS_PER_LONG + * bits in the level 0 bitmap; thus we can repurpose the most significant + * bit as a sentinel. The sentinel is set in hbitmap_alloc and ensures + * that the above loop ends even without an explicit check on i. + */ + + if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) { + return 0; + } + for (; i < HBITMAP_LEVELS - 1; i++) { + /* Shift back pos to the left, matching the right shifts above. + * The index of this word's least significant set bit provides + * the low-order bits. + */ + pos = (pos << BITS_PER_LEVEL) + ffsl(cur) - 1; + hbi->cur[i] = cur & (cur - 1); + + /* Set up next level for iteration. */ + cur = hb->levels[i + 1][pos]; + } + + hbi->pos = pos; + trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur); + + assert(cur); + return cur; +} + +void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first) +{ + unsigned i, bit; + uint64_t pos; + + hbi->hb = hb; + pos = first >> hb->granularity; + assert(pos < hb->size); + hbi->pos = pos >> BITS_PER_LEVEL; + hbi->granularity = hb->granularity; + + for (i = HBITMAP_LEVELS; i-- > 0; ) { + bit = pos & (BITS_PER_LONG - 1); + pos >>= BITS_PER_LEVEL; + + /* Drop bits representing items before first. */ + hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1); + + /* We have already added level i+1, so the lowest set bit has + * been processed. Clear it. + */ + if (i != HBITMAP_LEVELS - 1) { + hbi->cur[i] &= ~(1UL << bit); + } + } +} + +bool hbitmap_empty(const HBitmap *hb) +{ + return hb->count == 0; +} + +int hbitmap_granularity(const HBitmap *hb) +{ + return hb->granularity; +} + +uint64_t hbitmap_count(const HBitmap *hb) +{ + return hb->count << hb->granularity; +} + +/* Count the number of set bits between start and end, not accounting for + * the granularity. Also an example of how to use hbitmap_iter_next_word. + */ +static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last) +{ + HBitmapIter hbi; + uint64_t count = 0; + uint64_t end = last + 1; + unsigned long cur; + size_t pos; + + hbitmap_iter_init(&hbi, hb, start << hb->granularity); + for (;;) { + pos = hbitmap_iter_next_word(&hbi, &cur); + if (pos >= (end >> BITS_PER_LEVEL)) { + break; + } + count += popcountl(cur); + } + + if (pos == (end >> BITS_PER_LEVEL)) { + /* Drop bits representing the END-th and subsequent items. */ + int bit = end & (BITS_PER_LONG - 1); + cur &= (1UL << bit) - 1; + count += popcountl(cur); + } + + return count; +} + +/* Setting starts at the last layer and propagates up if an element + * changes from zero to non-zero. + */ +static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last) +{ + unsigned long mask; + bool changed; + + assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); + assert(start <= last); + + mask = 2UL << (last & (BITS_PER_LONG - 1)); + mask -= 1UL << (start & (BITS_PER_LONG - 1)); + changed = (*elem == 0); + *elem |= mask; + return changed; +} + +/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ +static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last) +{ + size_t pos = start >> BITS_PER_LEVEL; + size_t lastpos = last >> BITS_PER_LEVEL; + bool changed = false; + size_t i; + + i = pos; + if (i < lastpos) { + uint64_t next = (start | (BITS_PER_LONG - 1)) + 1; + changed |= hb_set_elem(&hb->levels[level][i], start, next - 1); + for (;;) { + start = next; + next += BITS_PER_LONG; + if (++i == lastpos) { + break; + } + changed |= (hb->levels[level][i] == 0); + hb->levels[level][i] = ~0UL; + } + } + changed |= hb_set_elem(&hb->levels[level][i], start, last); + + /* If there was any change in this layer, we may have to update + * the one above. + */ + if (level > 0 && changed) { + hb_set_between(hb, level - 1, pos, lastpos); + } +} + +void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) +{ + /* Compute range in the last layer. */ + uint64_t last = start + count - 1; + + trace_hbitmap_set(hb, start, count, + start >> hb->granularity, last >> hb->granularity); + + start >>= hb->granularity; + last >>= hb->granularity; + count = last - start + 1; + + hb->count += count - hb_count_between(hb, start, last); + hb_set_between(hb, HBITMAP_LEVELS - 1, start, last); +} + +/* Resetting works the other way round: propagate up if the new + * value is zero. + */ +static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last) +{ + unsigned long mask; + bool blanked; + + assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); + assert(start <= last); + + mask = 2UL << (last & (BITS_PER_LONG - 1)); + mask -= 1UL << (start & (BITS_PER_LONG - 1)); + blanked = *elem != 0 && ((*elem & ~mask) == 0); + *elem &= ~mask; + return blanked; +} + +/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ +static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last) +{ + size_t pos = start >> BITS_PER_LEVEL; + size_t lastpos = last >> BITS_PER_LEVEL; + bool changed = false; + size_t i; + + i = pos; + if (i < lastpos) { + uint64_t next = (start | (BITS_PER_LONG - 1)) + 1; + + /* Here we need a more complex test than when setting bits. Even if + * something was changed, we must not blank bits in the upper level + * unless the lower-level word became entirely zero. So, remove pos + * from the upper-level range if bits remain set. + */ + if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) { + changed = true; + } else { + pos++; + } + + for (;;) { + start = next; + next += BITS_PER_LONG; + if (++i == lastpos) { + break; + } + changed |= (hb->levels[level][i] != 0); + hb->levels[level][i] = 0UL; + } + } + + /* Same as above, this time for lastpos. */ + if (hb_reset_elem(&hb->levels[level][i], start, last)) { + changed = true; + } else { + lastpos--; + } + + if (level > 0 && changed) { + hb_reset_between(hb, level - 1, pos, lastpos); + } +} + +void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) +{ + /* Compute range in the last layer. */ + uint64_t last = start + count - 1; + + trace_hbitmap_reset(hb, start, count, + start >> hb->granularity, last >> hb->granularity); + + start >>= hb->granularity; + last >>= hb->granularity; + + hb->count -= hb_count_between(hb, start, last); + hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last); +} + +bool hbitmap_get(const HBitmap *hb, uint64_t item) +{ + /* Compute position and bit in the last layer. */ + uint64_t pos = item >> hb->granularity; + unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1)); + + return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0; +} + +void hbitmap_free(HBitmap *hb) +{ + unsigned i; + for (i = HBITMAP_LEVELS; i-- > 0; ) { + g_free(hb->levels[i]); + } + g_free(hb); +} + +HBitmap *hbitmap_alloc(uint64_t size, int granularity) +{ + HBitmap *hb = g_malloc0(sizeof (struct HBitmap)); + unsigned i; + + assert(granularity >= 0 && granularity < 64); + size = (size + (1ULL << granularity) - 1) >> granularity; + assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE)); + + hb->size = size; + hb->granularity = granularity; + for (i = HBITMAP_LEVELS; i-- > 0; ) { + size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); + hb->levels[i] = g_malloc0(size * sizeof(unsigned long)); + } + + /* We necessarily have free bits in level 0 due to the definition + * of HBITMAP_LEVELS, so use one for a sentinel. This speeds up + * hbitmap_iter_skip_words. + */ + assert(size == 1); + hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1); + return hb; +} @@ -176,6 +176,7 @@ int main(int argc, char **argv) #define DEFAULT_RAM_SIZE 128 #define MAX_VIRTIO_CONSOLES 1 +#define MAX_SCLP_CONSOLES 1 static const char *data_dir; const char *bios_name = NULL; @@ -203,6 +204,7 @@ int no_quit = 0; CharDriverState *serial_hds[MAX_SERIAL_PORTS]; CharDriverState *parallel_hds[MAX_PARALLEL_PORTS]; CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES]; +CharDriverState *sclp_hds[MAX_SCLP_CONSOLES]; int win2k_install_hack = 0; int singlestep = 0; int smp_cpus = 1; @@ -271,6 +273,7 @@ static int tcg_tb_size; static int default_serial = 1; static int default_parallel = 1; static int default_virtcon = 1; +static int default_sclp = 1; static int default_monitor = 1; static int default_floppy = 1; static int default_cdrom = 1; @@ -2340,6 +2343,7 @@ struct device_config { DEV_VIRTCON, /* -virtioconsole */ DEV_DEBUGCON, /* -debugcon */ DEV_GDB, /* -gdb, -s */ + DEV_SCLP, /* s390 sclp */ } type; const char *cmdline; Location loc; @@ -2458,6 +2462,39 @@ static int virtcon_parse(const char *devname) return 0; } +static int sclp_parse(const char *devname) +{ + QemuOptsList *device = qemu_find_opts("device"); + static int index = 0; + char label[32]; + QemuOpts *dev_opts; + + if (strcmp(devname, "none") == 0) { + return 0; + } + if (index == MAX_SCLP_CONSOLES) { + fprintf(stderr, "qemu: too many sclp consoles\n"); + exit(1); + } + + assert(arch_type == QEMU_ARCH_S390X); + + dev_opts = qemu_opts_create(device, NULL, 0, NULL); + qemu_opt_set(dev_opts, "driver", "sclpconsole"); + + snprintf(label, sizeof(label), "sclpcon%d", index); + sclp_hds[index] = qemu_chr_new(label, devname, NULL); + if (!sclp_hds[index]) { + fprintf(stderr, "qemu: could not connect sclp console" + " to character backend '%s'\n", devname); + return -1; + } + qemu_opt_set(dev_opts, "chardev", label); + + index++; + return 0; +} + static int debugcon_parse(const char *devname) { QemuOpts *opts; @@ -3615,6 +3652,7 @@ int main(int argc, char **argv, char **envp) default_serial = 0; default_parallel = 0; default_virtcon = 0; + default_sclp = 0; default_monitor = 0; default_net = 0; default_floppy = 0; @@ -3832,6 +3870,9 @@ int main(int argc, char **argv, char **envp) if (!machine->use_virtcon) { default_virtcon = 0; } + if (!machine->use_sclp) { + default_sclp = 0; + } if (machine->no_floppy) { default_floppy = 0; } @@ -3873,11 +3914,16 @@ int main(int argc, char **argv, char **envp) add_device_config(DEV_SERIAL, "mon:stdio"); } else if (default_virtcon && default_monitor) { add_device_config(DEV_VIRTCON, "mon:stdio"); + } else if (default_sclp && default_monitor) { + add_device_config(DEV_SCLP, "mon:stdio"); } else { if (default_serial) add_device_config(DEV_SERIAL, "stdio"); if (default_virtcon) add_device_config(DEV_VIRTCON, "stdio"); + if (default_sclp) { + add_device_config(DEV_SCLP, "stdio"); + } if (default_monitor) monitor_parse("stdio", "readline"); } @@ -3890,6 +3936,9 @@ int main(int argc, char **argv, char **envp) monitor_parse("vc:80Cx24C", "readline"); if (default_virtcon) add_device_config(DEV_VIRTCON, "vc:80Cx24C"); + if (default_sclp) { + add_device_config(DEV_SCLP, "vc:80Cx24C"); + } } socket_init(); @@ -4060,6 +4109,9 @@ int main(int argc, char **argv, char **envp) exit(1); if (foreach_device_config(DEV_VIRTCON, virtcon_parse) < 0) exit(1); + if (foreach_device_config(DEV_SCLP, sclp_parse) < 0) { + exit(1); + } if (foreach_device_config(DEV_DEBUGCON, debugcon_parse) < 0) exit(1); |