diff options
187 files changed, 2965 insertions, 1236 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index ef2ec58..7df0882 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1005,6 +1005,14 @@ S: Supported F: hw/vfio/* F: include/hw/vfio/ +vfio-ccw +M: Cornelia Huck <cornelia.huck@de.ibm.com> +S: Supported +F: hw/vfio/ccw.c +F: hw/s390x/s390-ccw.c +F: include/hw/s390x/s390-ccw.h +T: git git://github.com/cohuck/qemu.git s390-next + vhost M: Michael S. Tsirkin <mst@redhat.com> S: Supported diff --git a/block/backup.c b/block/backup.c index a4fb288..5387fbd 100644 --- a/block/backup.c +++ b/block/backup.c @@ -692,7 +692,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, } if (job) { backup_clean(&job->common); - block_job_unref(&job->common); + block_job_early_fail(&job->common); } return NULL; diff --git a/block/commit.c b/block/commit.c index 76a0d98..a3028b2 100644 --- a/block/commit.c +++ b/block/commit.c @@ -426,7 +426,7 @@ fail: if (commit_top_bs) { bdrv_set_backing_hd(overlay_bs, top, &error_abort); } - block_job_unref(&s->common); + block_job_early_fail(&s->common); } diff --git a/block/gluster.c b/block/gluster.c index 7c76cd0..8ba3bcc 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1275,7 +1275,14 @@ static int find_allocation(BlockDriverState *bs, off_t start, if (offs < 0) { return -errno; /* D3 or D4 */ } - assert(offs >= start); + + if (offs < start) { + /* This is not a valid return by lseek(). We are safe to just return + * -EIO in this case, and we'll treat it like D4. Unfortunately some + * versions of gluster server will return offs < start, so an assert + * here will unnecessarily abort QEMU. */ + return -EIO; + } if (offs > start) { /* D2: in hole, next data at offs */ @@ -1307,7 +1314,14 @@ static int find_allocation(BlockDriverState *bs, off_t start, if (offs < 0) { return -errno; /* D1 and (H3 or H4) */ } - assert(offs >= start); + + if (offs < start) { + /* This is not a valid return by lseek(). We are safe to just return + * -EIO in this case, and we'll treat it like H4. Unfortunately some + * versions of gluster server will return offs < start, so an assert + * here will unnecessarily abort QEMU. */ + return -EIO; + } if (offs > start) { /* @@ -26,6 +26,7 @@ #include "trace.h" #include "sysemu/block-backend.h" #include "block/blockjob.h" +#include "block/blockjob_int.h" #include "block/block_int.h" #include "qemu/cutils.h" #include "qapi/error.h" @@ -301,16 +302,9 @@ void bdrv_drain_all_begin(void) bool waited = true; BlockDriverState *bs; BdrvNextIterator it; - BlockJob *job = NULL; GSList *aio_ctxs = NULL, *ctx; - while ((job = block_job_next(job))) { - AioContext *aio_context = blk_get_aio_context(job->blk); - - aio_context_acquire(aio_context); - block_job_pause(job); - aio_context_release(aio_context); - } + block_job_pause_all(); for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); @@ -354,7 +348,6 @@ void bdrv_drain_all_end(void) { BlockDriverState *bs; BdrvNextIterator it; - BlockJob *job = NULL; for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); @@ -365,13 +358,7 @@ void bdrv_drain_all_end(void) aio_context_release(aio_context); } - while ((job = block_job_next(job))) { - AioContext *aio_context = blk_get_aio_context(job->blk); - - aio_context_acquire(aio_context); - block_job_resume(job); - aio_context_release(aio_context); - } + block_job_resume_all(); } void bdrv_drain_all(void) diff --git a/block/mirror.c b/block/mirror.c index e778ee0..a2a9703 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1257,7 +1257,7 @@ fail: g_free(s->replaces); blk_unref(s->target); - block_job_unref(&s->common); + block_job_early_fail(&s->common); } bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, @@ -3715,7 +3715,6 @@ void qmp_block_job_resume(const char *device, Error **errp) } trace_qmp_block_job_resume(job); - block_job_iostatus_reset(job); block_job_user_resume(job); aio_context_release(aio_context); } @@ -55,35 +55,20 @@ struct BlockJobTxn { static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); -static char *child_job_get_parent_desc(BdrvChild *c) -{ - BlockJob *job = c->opaque; - return g_strdup_printf("%s job '%s'", - BlockJobType_lookup[job->driver->job_type], - job->id); -} - -static const BdrvChildRole child_job = { - .get_parent_desc = child_job_get_parent_desc, - .stay_at_node = true, -}; - -static void block_job_drained_begin(void *opaque) -{ - BlockJob *job = opaque; - block_job_pause(job); -} - -static void block_job_drained_end(void *opaque) -{ - BlockJob *job = opaque; - block_job_resume(job); -} - -static const BlockDevOps block_job_dev_ops = { - .drained_begin = block_job_drained_begin, - .drained_end = block_job_drained_end, -}; +/* + * The block job API is composed of two categories of functions. + * + * The first includes functions used by the monitor. The monitor is + * peculiar in that it accesses the block job list with block_job_get, and + * therefore needs consistency across block_job_get and the actual operation + * (e.g. block_job_set_speed). The consistency is achieved with + * aio_context_acquire/release. These functions are declared in blockjob.h. + * + * The second includes functions used by the block job drivers and sometimes + * by the core block layer. These do not care about locking, because the + * whole coroutine runs under the AioContext lock, and are declared in + * blockjob_int.h. + */ BlockJob *block_job_next(BlockJob *job) { @@ -106,6 +91,80 @@ BlockJob *block_job_get(const char *id) return NULL; } +BlockJobTxn *block_job_txn_new(void) +{ + BlockJobTxn *txn = g_new0(BlockJobTxn, 1); + QLIST_INIT(&txn->jobs); + txn->refcnt = 1; + return txn; +} + +static void block_job_txn_ref(BlockJobTxn *txn) +{ + txn->refcnt++; +} + +void block_job_txn_unref(BlockJobTxn *txn) +{ + if (txn && --txn->refcnt == 0) { + g_free(txn); + } +} + +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) +{ + if (!txn) { + return; + } + + assert(!job->txn); + job->txn = txn; + + QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); + block_job_txn_ref(txn); +} + +static void block_job_pause(BlockJob *job) +{ + job->pause_count++; +} + +static void block_job_resume(BlockJob *job) +{ + assert(job->pause_count > 0); + job->pause_count--; + if (job->pause_count) { + return; + } + block_job_enter(job); +} + +static void block_job_ref(BlockJob *job) +{ + ++job->refcnt; +} + +static void block_job_attached_aio_context(AioContext *new_context, + void *opaque); +static void block_job_detach_aio_context(void *opaque); + +static void block_job_unref(BlockJob *job) +{ + if (--job->refcnt == 0) { + BlockDriverState *bs = blk_bs(job->blk); + bs->job = NULL; + block_job_remove_all_bdrv(job); + blk_remove_aio_context_notifier(job->blk, + block_job_attached_aio_context, + block_job_detach_aio_context, job); + blk_unref(job->blk); + error_free(job->blocker); + g_free(job->id); + QLIST_REMOVE(job, job_list); + g_free(job); + } +} + static void block_job_attached_aio_context(AioContext *new_context, void *opaque) { @@ -145,6 +204,36 @@ static void block_job_detach_aio_context(void *opaque) block_job_unref(job); } +static char *child_job_get_parent_desc(BdrvChild *c) +{ + BlockJob *job = c->opaque; + return g_strdup_printf("%s job '%s'", + BlockJobType_lookup[job->driver->job_type], + job->id); +} + +static const BdrvChildRole child_job = { + .get_parent_desc = child_job_get_parent_desc, + .stay_at_node = true, +}; + +static void block_job_drained_begin(void *opaque) +{ + BlockJob *job = opaque; + block_job_pause(job); +} + +static void block_job_drained_end(void *opaque) +{ + BlockJob *job = opaque; + block_job_resume(job); +} + +static const BlockDevOps block_job_dev_ops = { + .drained_begin = block_job_drained_begin, + .drained_end = block_job_drained_end, +}; + void block_job_remove_all_bdrv(BlockJob *job) { GSList *l; @@ -175,90 +264,6 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, return 0; } -void *block_job_create(const char *job_id, const BlockJobDriver *driver, - BlockDriverState *bs, uint64_t perm, - uint64_t shared_perm, int64_t speed, int flags, - BlockCompletionFunc *cb, void *opaque, Error **errp) -{ - BlockBackend *blk; - BlockJob *job; - int ret; - - if (bs->job) { - error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); - return NULL; - } - - if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) { - job_id = bdrv_get_device_name(bs); - if (!*job_id) { - error_setg(errp, "An explicit job ID is required for this node"); - return NULL; - } - } - - if (job_id) { - if (flags & BLOCK_JOB_INTERNAL) { - error_setg(errp, "Cannot specify job ID for internal block job"); - return NULL; - } - - if (!id_wellformed(job_id)) { - error_setg(errp, "Invalid job ID '%s'", job_id); - return NULL; - } - - if (block_job_get(job_id)) { - error_setg(errp, "Job ID '%s' already in use", job_id); - return NULL; - } - } - - blk = blk_new(perm, shared_perm); - ret = blk_insert_bs(blk, bs, errp); - if (ret < 0) { - blk_unref(blk); - return NULL; - } - - job = g_malloc0(driver->instance_size); - job->driver = driver; - job->id = g_strdup(job_id); - job->blk = blk; - job->cb = cb; - job->opaque = opaque; - job->busy = false; - job->paused = true; - job->pause_count = 1; - job->refcnt = 1; - - error_setg(&job->blocker, "block device is in use by block job: %s", - BlockJobType_lookup[driver->job_type]); - block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); - bs->job = job; - - blk_set_dev_ops(blk, &block_job_dev_ops, job); - bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); - - QLIST_INSERT_HEAD(&block_jobs, job, job_list); - - blk_add_aio_context_notifier(blk, block_job_attached_aio_context, - block_job_detach_aio_context, job); - - /* Only set speed when necessary to avoid NotSupported error */ - if (speed != 0) { - Error *local_err = NULL; - - block_job_set_speed(job, speed, &local_err); - if (local_err) { - block_job_unref(job); - error_propagate(errp, local_err); - return NULL; - } - } - return job; -} - bool block_job_is_internal(BlockJob *job) { return (job->id == NULL); @@ -293,30 +298,10 @@ void block_job_start(BlockJob *job) bdrv_coroutine_enter(blk_bs(job->blk), job->co); } -void block_job_ref(BlockJob *job) -{ - ++job->refcnt; -} - -void block_job_unref(BlockJob *job) -{ - if (--job->refcnt == 0) { - BlockDriverState *bs = blk_bs(job->blk); - bs->job = NULL; - block_job_remove_all_bdrv(job); - blk_remove_aio_context_notifier(job->blk, - block_job_attached_aio_context, - block_job_detach_aio_context, job); - blk_unref(job->blk); - error_free(job->blocker); - g_free(job->id); - QLIST_REMOVE(job, job_list); - g_free(job); - } -} - static void block_job_completed_single(BlockJob *job) { + assert(job->completed); + if (!job->ret) { if (job->driver->commit) { job->driver->commit(job); @@ -354,11 +339,57 @@ static void block_job_completed_single(BlockJob *job) block_job_unref(job); } +static void block_job_cancel_async(BlockJob *job) +{ + if (job->iostatus != BLOCK_DEVICE_IO_STATUS_OK) { + block_job_iostatus_reset(job); + } + if (job->user_paused) { + /* Do not call block_job_enter here, the caller will handle it. */ + job->user_paused = false; + job->pause_count--; + } + job->cancelled = true; +} + +static int block_job_finish_sync(BlockJob *job, + void (*finish)(BlockJob *, Error **errp), + Error **errp) +{ + Error *local_err = NULL; + int ret; + + assert(blk_bs(job->blk)->job == job); + + block_job_ref(job); + + if (finish) { + finish(job, &local_err); + } + if (local_err) { + error_propagate(errp, local_err); + block_job_unref(job); + return -EBUSY; + } + /* block_job_drain calls block_job_enter, and it should be enough to + * induce progress until the job completes or moves to the main thread. + */ + while (!job->deferred_to_main_loop && !job->completed) { + block_job_drain(job); + } + while (!job->completed) { + aio_poll(qemu_get_aio_context(), true); + } + ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; + block_job_unref(job); + return ret; +} + static void block_job_completed_txn_abort(BlockJob *job) { AioContext *ctx; BlockJobTxn *txn = job->txn; - BlockJob *other_job, *next; + BlockJob *other_job; if (txn->aborting) { /* @@ -367,29 +398,34 @@ static void block_job_completed_txn_abort(BlockJob *job) return; } txn->aborting = true; + block_job_txn_ref(txn); + /* We are the first failed job. Cancel other jobs. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { ctx = blk_get_aio_context(other_job->blk); aio_context_acquire(ctx); } + + /* Other jobs are effectively cancelled by us, set the status for + * them; this job, however, may or may not be cancelled, depending + * on the caller, so leave it. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { - if (other_job == job || other_job->completed) { - /* Other jobs are "effectively" cancelled by us, set the status for - * them; this job, however, may or may not be cancelled, depending - * on the caller, so leave it. */ - if (other_job != job) { - other_job->cancelled = true; - } - continue; + if (other_job != job) { + block_job_cancel_async(other_job); } - block_job_cancel_sync(other_job); - assert(other_job->completed); } - QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { + while (!QLIST_EMPTY(&txn->jobs)) { + other_job = QLIST_FIRST(&txn->jobs); ctx = blk_get_aio_context(other_job->blk); + if (!other_job->completed) { + assert(other_job->cancelled); + block_job_finish_sync(other_job, NULL, NULL); + } block_job_completed_single(other_job); aio_context_release(ctx); } + + block_job_txn_unref(txn); } static void block_job_completed_txn_success(BlockJob *job) @@ -416,21 +452,6 @@ static void block_job_completed_txn_success(BlockJob *job) } } -void block_job_completed(BlockJob *job, int ret) -{ - assert(blk_bs(job->blk)->job == job); - assert(!job->completed); - job->completed = true; - job->ret = ret; - if (!job->txn) { - block_job_completed_single(job); - } else if (ret < 0 || block_job_is_cancelled(job)) { - block_job_completed_txn_abort(job); - } else { - block_job_completed_txn_success(job); - } -} - void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) { Error *local_err = NULL; @@ -462,135 +483,36 @@ void block_job_complete(BlockJob *job, Error **errp) job->driver->complete(job, errp); } -void block_job_pause(BlockJob *job) -{ - job->pause_count++; -} - void block_job_user_pause(BlockJob *job) { job->user_paused = true; block_job_pause(job); } -static bool block_job_should_pause(BlockJob *job) -{ - return job->pause_count > 0; -} - bool block_job_user_paused(BlockJob *job) { - return job ? job->user_paused : 0; -} - -void coroutine_fn block_job_pause_point(BlockJob *job) -{ - assert(job && block_job_started(job)); - - if (!block_job_should_pause(job)) { - return; - } - if (block_job_is_cancelled(job)) { - return; - } - - if (job->driver->pause) { - job->driver->pause(job); - } - - if (block_job_should_pause(job) && !block_job_is_cancelled(job)) { - job->paused = true; - job->busy = false; - qemu_coroutine_yield(); /* wait for block_job_resume() */ - job->busy = true; - job->paused = false; - } - - if (job->driver->resume) { - job->driver->resume(job); - } -} - -void block_job_resume(BlockJob *job) -{ - assert(job->pause_count > 0); - job->pause_count--; - if (job->pause_count) { - return; - } - block_job_enter(job); + return job->user_paused; } void block_job_user_resume(BlockJob *job) { if (job && job->user_paused && job->pause_count > 0) { + block_job_iostatus_reset(job); job->user_paused = false; block_job_resume(job); } } -void block_job_enter(BlockJob *job) -{ - if (job->co && !job->busy) { - bdrv_coroutine_enter(blk_bs(job->blk), job->co); - } -} - void block_job_cancel(BlockJob *job) { if (block_job_started(job)) { - job->cancelled = true; - block_job_iostatus_reset(job); + block_job_cancel_async(job); block_job_enter(job); } else { block_job_completed(job, -ECANCELED); } } -bool block_job_is_cancelled(BlockJob *job) -{ - return job->cancelled; -} - -void block_job_iostatus_reset(BlockJob *job) -{ - job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; - if (job->driver->iostatus_reset) { - job->driver->iostatus_reset(job); - } -} - -static int block_job_finish_sync(BlockJob *job, - void (*finish)(BlockJob *, Error **errp), - Error **errp) -{ - Error *local_err = NULL; - int ret; - - assert(blk_bs(job->blk)->job == job); - - block_job_ref(job); - - finish(job, &local_err); - if (local_err) { - error_propagate(errp, local_err); - block_job_unref(job); - return -EBUSY; - } - /* block_job_drain calls block_job_enter, and it should be enough to - * induce progress until the job completes or moves to the main thread. - */ - while (!job->deferred_to_main_loop && !job->completed) { - block_job_drain(job); - } - while (!job->completed) { - aio_poll(qemu_get_aio_context(), true); - } - ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; - block_job_unref(job); - return ret; -} - /* A wrapper around block_job_cancel() taking an Error ** parameter so it may be * used with block_job_finish_sync() without the need for (rather nasty) * function pointer casts there. */ @@ -622,42 +544,6 @@ int block_job_complete_sync(BlockJob *job, Error **errp) return block_job_finish_sync(job, &block_job_complete, errp); } -void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) -{ - assert(job->busy); - - /* Check cancellation *before* setting busy = false, too! */ - if (block_job_is_cancelled(job)) { - return; - } - - job->busy = false; - if (!block_job_should_pause(job)) { - co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); - } - job->busy = true; - - block_job_pause_point(job); -} - -void block_job_yield(BlockJob *job) -{ - assert(job->busy); - - /* Check cancellation *before* setting busy = false, too! */ - if (block_job_is_cancelled(job)) { - return; - } - - job->busy = false; - if (!block_job_should_pause(job)) { - qemu_coroutine_yield(); - } - job->busy = true; - - block_job_pause_point(job); -} - BlockJobInfo *block_job_query(BlockJob *job, Error **errp) { BlockJobInfo *info; @@ -717,6 +603,236 @@ static void block_job_event_completed(BlockJob *job, const char *msg) &error_abort); } +/* + * API for block job drivers and the block layer. These functions are + * declared in blockjob_int.h. + */ + +void *block_job_create(const char *job_id, const BlockJobDriver *driver, + BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, + BlockCompletionFunc *cb, void *opaque, Error **errp) +{ + BlockBackend *blk; + BlockJob *job; + int ret; + + if (bs->job) { + error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); + return NULL; + } + + if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) { + job_id = bdrv_get_device_name(bs); + if (!*job_id) { + error_setg(errp, "An explicit job ID is required for this node"); + return NULL; + } + } + + if (job_id) { + if (flags & BLOCK_JOB_INTERNAL) { + error_setg(errp, "Cannot specify job ID for internal block job"); + return NULL; + } + + if (!id_wellformed(job_id)) { + error_setg(errp, "Invalid job ID '%s'", job_id); + return NULL; + } + + if (block_job_get(job_id)) { + error_setg(errp, "Job ID '%s' already in use", job_id); + return NULL; + } + } + + blk = blk_new(perm, shared_perm); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + blk_unref(blk); + return NULL; + } + + job = g_malloc0(driver->instance_size); + job->driver = driver; + job->id = g_strdup(job_id); + job->blk = blk; + job->cb = cb; + job->opaque = opaque; + job->busy = false; + job->paused = true; + job->pause_count = 1; + job->refcnt = 1; + + error_setg(&job->blocker, "block device is in use by block job: %s", + BlockJobType_lookup[driver->job_type]); + block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); + bs->job = job; + + blk_set_dev_ops(blk, &block_job_dev_ops, job); + bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); + + QLIST_INSERT_HEAD(&block_jobs, job, job_list); + + blk_add_aio_context_notifier(blk, block_job_attached_aio_context, + block_job_detach_aio_context, job); + + /* Only set speed when necessary to avoid NotSupported error */ + if (speed != 0) { + Error *local_err = NULL; + + block_job_set_speed(job, speed, &local_err); + if (local_err) { + block_job_unref(job); + error_propagate(errp, local_err); + return NULL; + } + } + return job; +} + +void block_job_pause_all(void) +{ + BlockJob *job = NULL; + while ((job = block_job_next(job))) { + AioContext *aio_context = blk_get_aio_context(job->blk); + + aio_context_acquire(aio_context); + block_job_pause(job); + aio_context_release(aio_context); + } +} + +void block_job_early_fail(BlockJob *job) +{ + block_job_unref(job); +} + +void block_job_completed(BlockJob *job, int ret) +{ + assert(blk_bs(job->blk)->job == job); + assert(!job->completed); + job->completed = true; + job->ret = ret; + if (!job->txn) { + block_job_completed_single(job); + } else if (ret < 0 || block_job_is_cancelled(job)) { + block_job_completed_txn_abort(job); + } else { + block_job_completed_txn_success(job); + } +} + +static bool block_job_should_pause(BlockJob *job) +{ + return job->pause_count > 0; +} + +void coroutine_fn block_job_pause_point(BlockJob *job) +{ + assert(job && block_job_started(job)); + + if (!block_job_should_pause(job)) { + return; + } + if (block_job_is_cancelled(job)) { + return; + } + + if (job->driver->pause) { + job->driver->pause(job); + } + + if (block_job_should_pause(job) && !block_job_is_cancelled(job)) { + job->paused = true; + job->busy = false; + qemu_coroutine_yield(); /* wait for block_job_resume() */ + job->busy = true; + job->paused = false; + } + + if (job->driver->resume) { + job->driver->resume(job); + } +} + +void block_job_resume_all(void) +{ + BlockJob *job = NULL; + while ((job = block_job_next(job))) { + AioContext *aio_context = blk_get_aio_context(job->blk); + + aio_context_acquire(aio_context); + block_job_resume(job); + aio_context_release(aio_context); + } +} + +void block_job_enter(BlockJob *job) +{ + if (!block_job_started(job)) { + return; + } + if (job->deferred_to_main_loop) { + return; + } + + if (!job->busy) { + bdrv_coroutine_enter(blk_bs(job->blk), job->co); + } +} + +bool block_job_is_cancelled(BlockJob *job) +{ + return job->cancelled; +} + +void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) +{ + assert(job->busy); + + /* Check cancellation *before* setting busy = false, too! */ + if (block_job_is_cancelled(job)) { + return; + } + + job->busy = false; + if (!block_job_should_pause(job)) { + co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); + } + job->busy = true; + + block_job_pause_point(job); +} + +void block_job_yield(BlockJob *job) +{ + assert(job->busy); + + /* Check cancellation *before* setting busy = false, too! */ + if (block_job_is_cancelled(job)) { + return; + } + + job->busy = false; + if (!block_job_should_pause(job)) { + qemu_coroutine_yield(); + } + job->busy = true; + + block_job_pause_point(job); +} + +void block_job_iostatus_reset(BlockJob *job) +{ + if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + return; + } + assert(job->user_paused && job->pause_count > 0); + job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; +} + void block_job_event_ready(BlockJob *job) { job->ready = true; @@ -790,7 +906,6 @@ static void block_job_defer_to_main_loop_bh(void *opaque) aio_context_acquire(aio_context); } - data->job->deferred_to_main_loop = false; data->fn(data->job, data->opaque); if (aio_context != data->aio_context) { @@ -816,36 +931,3 @@ void block_job_defer_to_main_loop(BlockJob *job, aio_bh_schedule_oneshot(qemu_get_aio_context(), block_job_defer_to_main_loop_bh, data); } - -BlockJobTxn *block_job_txn_new(void) -{ - BlockJobTxn *txn = g_new0(BlockJobTxn, 1); - QLIST_INIT(&txn->jobs); - txn->refcnt = 1; - return txn; -} - -static void block_job_txn_ref(BlockJobTxn *txn) -{ - txn->refcnt++; -} - -void block_job_txn_unref(BlockJobTxn *txn) -{ - if (txn && --txn->refcnt == 0) { - g_free(txn); - } -} - -void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) -{ - if (!txn) { - return; - } - - assert(!job->txn); - job->txn = txn; - - QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); - block_job_txn_ref(txn); -} diff --git a/default-configs/pci.mak b/default-configs/pci.mak index 60dc651..3bbeb62 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -7,6 +7,7 @@ CONFIG_USB_UHCI=y CONFIG_USB_OHCI=y CONFIG_USB_EHCI=y CONFIG_USB_XHCI=y +CONFIG_USB_XHCI_NEC=y CONFIG_NE2000_PCI=y CONFIG_EEPRO100_PCI=y CONFIG_PCNET_PCI=y diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak index 9615a48..18aed56 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -5,4 +5,5 @@ CONFIG_SCLPCONSOLE=y CONFIG_TERMINAL3270=y CONFIG_S390_FLIC=y CONFIG_S390_FLIC_KVM=$(CONFIG_KVM) +CONFIG_VFIO_CCW=$(CONFIG_LINUX) CONFIG_WDT_DIAG288=y @@ -486,7 +486,8 @@ static MemoryRegionSection address_space_do_translate(AddressSpace *as, break; } - iotlb = mr->iommu_ops->translate(mr, addr, is_write); + iotlb = mr->iommu_ops->translate(mr, addr, is_write ? + IOMMU_WO : IOMMU_RO); addr = ((iotlb.translated_addr & ~iotlb.addr_mask) | (addr & iotlb.addr_mask)); *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1); diff --git a/hmp-commands.hx b/hmp-commands.hx index baeac47..e763606 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -676,7 +676,8 @@ ETEXI STEXI @item usb_add @var{devname} @findex usb_add -Add the USB device @var{devname}. For details of available devices see +Add the USB device @var{devname}. This command is deprecated, please +use @code{device_add} instead. For details of available devices see @ref{usb_devices} ETEXI @@ -693,7 +694,8 @@ STEXI @findex usb_del Remove the USB device @var{devname} from the QEMU virtual USB hub. @var{devname} has the syntax @code{bus.addr}. Use the monitor -command @code{info usb} to see the devices you can remove. +command @code{info usb} to see the devices you can remove. This +command is deprecated, please use @code{device_del} instead. ETEXI { diff --git a/hw/acpi/core.c b/hw/acpi/core.c index e890a5d..95fcac9 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -561,7 +561,7 @@ static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val) uint16_t sus_typ = (val >> 10) & 7; switch(sus_typ) { case 0: /* soft power off */ - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); break; case 1: qemu_system_suspend_request(); @@ -569,7 +569,7 @@ static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val) default: if (sus_typ == ar->pm1.cnt.s4_val) { /* S4 request */ qapi_event_send_suspend_disk(&error_abort); - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } break; } diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index f50f5cf..c1cf780 100644 --- a/hw/alpha/typhoon.c +++ b/hw/alpha/typhoon.c @@ -664,7 +664,7 @@ static bool window_translate(TyphoonWindow *win, hwaddr addr, /* TODO: A translation failure here ought to set PCI error codes on the Pchip and generate a machine check interrupt. */ static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu); IOMMUTLBEntry ret; diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c index 0a4508c..d209b97 100644 --- a/hw/arm/highbank.c +++ b/hw/arm/highbank.c @@ -108,9 +108,9 @@ static void hb_regs_write(void *opaque, hwaddr offset, if (offset == 0xf00) { if (value == 1 || value == 2) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else if (value == 3) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } } diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c index 5610ffc..ca3eca1 100644 --- a/hw/arm/integratorcp.c +++ b/hw/arm/integratorcp.c @@ -158,7 +158,7 @@ static void integratorcm_do_remap(IntegratorCMState *s) static void integratorcm_set_ctrl(IntegratorCMState *s, uint32_t value) { if (value & 8) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } if ((s->cm_ctrl ^ value) & 1) { /* (value & 1) != 0 means the green "MISC LED" is lit. diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c index cbbca4e..9c710f7 100644 --- a/hw/arm/musicpal.c +++ b/hw/arm/musicpal.c @@ -898,7 +898,7 @@ static void mv88w8618_pit_write(void *opaque, hwaddr offset, case MP_BOARD_RESET: if (value == MP_BOARD_RESET_MAGIC) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; } diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c index b3cf0ec..54582bd 100644 --- a/hw/arm/omap1.c +++ b/hw/arm/omap1.c @@ -355,7 +355,7 @@ static void omap_wd_timer_write(void *opaque, hwaddr addr, /* XXX: on T|E hardware somehow this has no effect, * on Zire 71 it works as specified. */ s->reset = 1; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } s->last_wr = value & 0xff; @@ -1545,8 +1545,10 @@ static inline void omap_clkm_idlect1_update(struct omap_mpu_state_s *s, if (value & (1 << 11)) { /* SETARM_IDLE */ cpu_interrupt(CPU(s->cpu), CPU_INTERRUPT_HALT); } - if (!(value & (1 << 10))) /* WKUP_MODE */ - qemu_system_shutdown_request(); /* XXX: disable wakeup from IRQ */ + if (!(value & (1 << 10))) { /* WKUP_MODE */ + /* XXX: disable wakeup from IRQ */ + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } #define SET_CANIDLE(clock, bit) \ if (diff & (1 << bit)) { \ @@ -1693,7 +1695,7 @@ static void omap_clkm_write(void *opaque, hwaddr addr, diff = s->clkm.arm_rstct1 ^ value; s->clkm.arm_rstct1 = value & 0x0007; if (value & 9) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); s->clkm.cold_start = 0xa; } if (diff & ~value & 4) { /* DSP_RST */ diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c index cf1b4ba..8afb854 100644 --- a/hw/arm/omap2.c +++ b/hw/arm/omap2.c @@ -1610,7 +1610,7 @@ static void omap_prcm_write(void *opaque, hwaddr addr, case 0x450: /* RM_RSTCTRL_WKUP */ /* TODO: reset */ if (value & 2) - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case 0x454: /* RM_RSTTIME_WKUP */ s->rsttime_wkup = value & 0x1fff; diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c index 3246268..93bde14 100644 --- a/hw/arm/spitz.c +++ b/hw/arm/spitz.c @@ -848,7 +848,7 @@ static void spitz_lcd_hsync_handler(void *opaque, int line, int level) static void spitz_reset(void *opaque, int line, int level) { if (level) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c index ea7a809..cf6e7be 100644 --- a/hw/arm/stellaris.c +++ b/hw/arm/stellaris.c @@ -1197,7 +1197,7 @@ static void do_sys_reset(void *opaque, int n, int level) { if (level) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c index 9f58a23..2421b81 100644 --- a/hw/arm/tosa.c +++ b/hw/arm/tosa.c @@ -90,7 +90,7 @@ static void tosa_out_switch(void *opaque, int line, int level) static void tosa_reset(void *opaque, int line, int level) { if (level) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/core/machine.c b/hw/core/machine.c index fd6a436..3adebf1 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -21,6 +21,7 @@ #include "qemu/error-report.h" #include "qemu/cutils.h" #include "sysemu/numa.h" +#include "sysemu/qtest.h" static char *machine_get_accel(Object *obj, Error **errp) { @@ -722,7 +723,7 @@ static void machine_numa_validate(MachineState *machine) g_free(cpu_str); } } - if (s->len) { + if (s->len && !qtest_enabled()) { error_report("warning: CPU(s) not present in any NUMA nodes: %s", s->str); error_report("warning: All CPU(s) up to maxcpus should be described " diff --git a/hw/dma/rc4030.c b/hw/dma/rc4030.c index 0080141..edf9432 100644 --- a/hw/dma/rc4030.c +++ b/hw/dma/rc4030.c @@ -489,7 +489,7 @@ static const MemoryRegionOps jazzio_ops = { }; static IOMMUTLBEntry rc4030_dma_translate(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { rc4030State *s = container_of(iommu, rc4030State, dma_mr); IOMMUTLBEntry ret = { diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index afcadac..82bd44f 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2404,14 +2404,17 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) } /* - * Entry is required for Windows to enable memory hotplug in OS. + * Entry is required for Windows to enable memory hotplug in OS + * and for Linux to enable SWIOTLB when booted with less than + * 4G of RAM. Windows works better if the entry sets proximity + * to the highest NUMA node in the machine. * Memory devices may override proximity set by this entry, * providing _PXM method if necessary. */ if (hotplugabble_address_space_size) { numamem = acpi_data_push(table_data, sizeof *numamem); build_srat_memory(numamem, pcms->hotplug_memory.base, - hotplugabble_address_space_size, 0, + hotplugabble_address_space_size, pcms->numa_nodes - 1, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); } diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 329058d..7b6d4ea 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -988,7 +988,7 @@ static inline bool amdvi_is_interrupt_addr(hwaddr addr) } static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); AMDVIState *s = as->iommu_state; @@ -1017,7 +1017,7 @@ static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr, return ret; } - amdvi_do_translate(as, addr, is_write, &ret); + amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret); trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn), PCI_FUNC(as->devfn), addr, ret.translated_addr); return ret; diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 9ba2162..15610b9 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -512,7 +512,7 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index, return 0; } -static inline bool vtd_context_entry_present(VTDContextEntry *context) +static inline bool vtd_ce_present(VTDContextEntry *context) { return context->lo & VTD_CONTEXT_ENTRY_P; } @@ -533,7 +533,7 @@ static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index, return 0; } -static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce) +static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce) { return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; } @@ -585,19 +585,49 @@ static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level) /* Get the page-table level that hardware should use for the second-level * page-table walk from the Address Width field of context-entry. */ -static inline uint32_t vtd_get_level_from_context_entry(VTDContextEntry *ce) +static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce) { return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW); } -static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce) +static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce) { return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9; } +static inline uint32_t vtd_ce_get_type(VTDContextEntry *ce) +{ + return ce->lo & VTD_CONTEXT_ENTRY_TT; +} + +/* Return true if check passed, otherwise false */ +static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu, + VTDContextEntry *ce) +{ + switch (vtd_ce_get_type(ce)) { + case VTD_CONTEXT_TT_MULTI_LEVEL: + /* Always supported */ + break; + case VTD_CONTEXT_TT_DEV_IOTLB: + if (!x86_iommu->dt_supported) { + return false; + } + break; + case VTD_CONTEXT_TT_PASS_THROUGH: + if (!x86_iommu->pt_supported) { + return false; + } + break; + default: + /* Unknwon type */ + return false; + } + return true; +} + static inline uint64_t vtd_iova_limit(VTDContextEntry *ce) { - uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce); + uint32_t ce_agaw = vtd_ce_get_agaw(ce); return 1ULL << MIN(ce_agaw, VTD_MGAW); } @@ -635,6 +665,29 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) } } +/* Find the VTD address space associated with a given bus number */ +static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) +{ + VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; + if (!vtd_bus) { + /* + * Iterate over the registered buses to find the one which + * currently hold this bus number, and update the bus_num + * lookup table: + */ + GHashTableIter iter; + + g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { + if (pci_bus_num(vtd_bus->bus) == bus_num) { + s->vtd_as_by_bus_num[bus_num] = vtd_bus; + return vtd_bus; + } + } + } + return vtd_bus; +} + /* Given the @iova, get relevant @slptep. @slpte_level will be the last level * of the translation, can be used for deciding the size of large page. */ @@ -642,8 +695,8 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t *slptep, uint32_t *slpte_level, bool *reads, bool *writes) { - dma_addr_t addr = vtd_get_slpt_base_from_context(ce); - uint32_t level = vtd_get_level_from_context_entry(ce); + dma_addr_t addr = vtd_ce_get_slpt_base(ce); + uint32_t level = vtd_ce_get_level(ce); uint32_t offset; uint64_t slpte; uint64_t access_right_check; @@ -664,7 +717,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, VTD_DPRINTF(GENERAL, "error: fail to access second-level paging " "entry at level %"PRIu32 " for iova 0x%"PRIx64, level, iova); - if (level == vtd_get_level_from_context_entry(ce)) { + if (level == vtd_ce_get_level(ce)) { /* Invalid programming of context-entry */ return -VTD_FR_CONTEXT_ENTRY_INV; } else { @@ -809,8 +862,8 @@ static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end, vtd_page_walk_hook hook_fn, void *private, bool notify_unmap) { - dma_addr_t addr = vtd_get_slpt_base_from_context(ce); - uint32_t level = vtd_get_level_from_context_entry(ce); + dma_addr_t addr = vtd_ce_get_slpt_base(ce); + uint32_t level = vtd_ce_get_level(ce); if (!vtd_iova_range_check(start, ce)) { return -VTD_FR_ADDR_BEYOND_MGAW; @@ -831,6 +884,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, { VTDRootEntry re; int ret_fr; + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); ret_fr = vtd_get_root_entry(s, bus_num, &re); if (ret_fr) { @@ -841,7 +895,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, /* Not error - it's okay we don't have root entry. */ trace_vtd_re_not_present(bus_num); return -VTD_FR_ROOT_ENTRY_P; - } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { + } + + if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { trace_vtd_re_invalid(re.rsvd, re.val); return -VTD_FR_ROOT_ENTRY_RSVD; } @@ -851,31 +907,116 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, return ret_fr; } - if (!vtd_context_entry_present(ce)) { + if (!vtd_ce_present(ce)) { /* Not error - it's okay we don't have context entry. */ trace_vtd_ce_not_present(bus_num, devfn); return -VTD_FR_CONTEXT_ENTRY_P; - } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || - (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { + } + + if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || + (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { trace_vtd_ce_invalid(ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_RSVD; } + /* Check if the programming of context-entry is valid */ - if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) { + if (!vtd_is_level_supported(s, vtd_ce_get_level(ce))) { + trace_vtd_ce_invalid(ce->hi, ce->lo); + return -VTD_FR_CONTEXT_ENTRY_INV; + } + + /* Do translation type check */ + if (!vtd_ce_type_check(x86_iommu, ce)) { trace_vtd_ce_invalid(ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_INV; + } + + return 0; +} + +/* + * Fetch translation type for specific device. Returns <0 if error + * happens, otherwise return the shifted type to check against + * VTD_CONTEXT_TT_*. + */ +static int vtd_dev_get_trans_type(VTDAddressSpace *as) +{ + IntelIOMMUState *s; + VTDContextEntry ce; + int ret; + + s = as->iommu_state; + + ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus), + as->devfn, &ce); + if (ret) { + return ret; + } + + return vtd_ce_get_type(&ce); +} + +static bool vtd_dev_pt_enabled(VTDAddressSpace *as) +{ + int ret; + + assert(as); + + ret = vtd_dev_get_trans_type(as); + if (ret < 0) { + /* + * Possibly failed to parse the context entry for some reason + * (e.g., during init, or any guest configuration errors on + * context entries). We should assume PT not enabled for + * safety. + */ + return false; + } + + return ret == VTD_CONTEXT_TT_PASS_THROUGH; +} + +/* Return whether the device is using IOMMU translation. */ +static bool vtd_switch_address_space(VTDAddressSpace *as) +{ + bool use_iommu; + + assert(as); + + use_iommu = as->iommu_state->dmar_enabled & !vtd_dev_pt_enabled(as); + + trace_vtd_switch_address_space(pci_bus_num(as->bus), + VTD_PCI_SLOT(as->devfn), + VTD_PCI_FUNC(as->devfn), + use_iommu); + + /* Turn off first then on the other */ + if (use_iommu) { + memory_region_set_enabled(&as->sys_alias, false); + memory_region_set_enabled(&as->iommu, true); } else { - switch (ce->lo & VTD_CONTEXT_ENTRY_TT) { - case VTD_CONTEXT_TT_MULTI_LEVEL: - /* fall through */ - case VTD_CONTEXT_TT_DEV_IOTLB: - break; - default: - trace_vtd_ce_invalid(ce->hi, ce->lo); - return -VTD_FR_CONTEXT_ENTRY_INV; + memory_region_set_enabled(&as->iommu, false); + memory_region_set_enabled(&as->sys_alias, true); + } + + return use_iommu; +} + +static void vtd_switch_address_space_all(IntelIOMMUState *s) +{ + GHashTableIter iter; + VTDBus *vtd_bus; + int i; + + g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { + for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) { + if (!vtd_bus->dev_as[i]) { + continue; + } + vtd_switch_address_space(vtd_bus->dev_as[i]); } } - return 0; } static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn) @@ -915,6 +1056,31 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; } +static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id) +{ + VTDBus *vtd_bus; + VTDAddressSpace *vtd_as; + bool success = false; + + vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id)); + if (!vtd_bus) { + goto out; + } + + vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)]; + if (!vtd_as) { + goto out; + } + + if (vtd_switch_address_space(vtd_as) == false) { + /* We switched off IOMMU region successfully. */ + success = true; + } + +out: + trace_vtd_pt_enable_fast_path(source_id, success); +} + /* Map dev to context-entry then do a paging-structures walk to do a iommu * translation. * @@ -986,6 +1152,30 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry->context_cache_gen = s->context_cache_gen; } + /* + * We don't need to translate for pass-through context entries. + * Also, let's ignore IOTLB caching as well for PT devices. + */ + if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) { + entry->translated_addr = entry->iova; + entry->addr_mask = VTD_PAGE_SIZE - 1; + entry->perm = IOMMU_RW; + trace_vtd_translate_pt(source_id, entry->iova); + + /* + * When this happens, it means firstly caching-mode is not + * enabled, and this is the first passthrough translation for + * the device. Let's enable the fast path for passthrough. + * + * When passthrough is disabled again for the device, we can + * capture it via the context entry invalidation, then the + * IOMMU region can be swapped back. + */ + vtd_pt_enable_fast_path(s, source_id); + + return; + } + ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level, &reads, &writes); if (ret_fr) { @@ -1005,7 +1195,7 @@ out: entry->iova = addr & page_mask; entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask; entry->addr_mask = ~page_mask; - entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0); + entry->perm = IOMMU_ACCESS_FLAG(reads, writes); } static void vtd_root_table_setup(IntelIOMMUState *s) @@ -1055,6 +1245,7 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s) if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) { vtd_reset_context_cache(s); } + vtd_switch_address_space_all(s); /* * From VT-d spec 6.5.2.1, a global context entry invalidation * should be followed by a IOTLB global invalidation, so we should @@ -1065,29 +1256,6 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s) vtd_iommu_replay_all(s); } - -/* Find the VTD address space currently associated with a given bus number, - */ -static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) -{ - VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; - if (!vtd_bus) { - /* Iterate over the registered buses to find the one - * which currently hold this bus number, and update the bus_num lookup table: - */ - GHashTableIter iter; - - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next (&iter, NULL, (void**)&vtd_bus)) { - if (pci_bus_num(vtd_bus->bus) == bus_num) { - s->vtd_as_by_bus_num[bus_num] = vtd_bus; - return vtd_bus; - } - } - } - return vtd_bus; -} - /* Do a context-cache device-selective invalidation. * @func_mask: FM field after shifting */ @@ -1130,6 +1298,11 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, VTD_PCI_FUNC(devfn_it)); vtd_as->context_cache_entry.context_cache_gen = 0; /* + * Do switch address space when needed, in case if the + * device passthrough bit is switched. + */ + vtd_switch_address_space(vtd_as); + /* * So a device is moving out of (or moving into) a * domain, a replay() suites here to notify all the * IOMMU_NOTIFIER_MAP registers about this change. @@ -1361,42 +1534,6 @@ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s) vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS); } -static void vtd_switch_address_space(VTDAddressSpace *as) -{ - assert(as); - - trace_vtd_switch_address_space(pci_bus_num(as->bus), - VTD_PCI_SLOT(as->devfn), - VTD_PCI_FUNC(as->devfn), - as->iommu_state->dmar_enabled); - - /* Turn off first then on the other */ - if (as->iommu_state->dmar_enabled) { - memory_region_set_enabled(&as->sys_alias, false); - memory_region_set_enabled(&as->iommu, true); - } else { - memory_region_set_enabled(&as->iommu, false); - memory_region_set_enabled(&as->sys_alias, true); - } -} - -static void vtd_switch_address_space_all(IntelIOMMUState *s) -{ - GHashTableIter iter; - VTDBus *vtd_bus; - int i; - - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { - for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) { - if (!vtd_bus->dev_as[i]) { - continue; - } - vtd_switch_address_space(vtd_bus->dev_as[i]); - } - } -} - /* Handle Translation Enable/Disable */ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) { @@ -2221,7 +2358,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr, } static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); IntelIOMMUState *s = vtd_as->iommu_state; @@ -2243,7 +2380,7 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, } vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr, - is_write, &ret); + flag & IOMMU_WO, &ret); VTD_DPRINTF(MMU, "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8 " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus), @@ -2844,6 +2981,10 @@ static void vtd_init(IntelIOMMUState *s) s->ecap |= VTD_ECAP_DT; } + if (x86_iommu->pt_supported) { + s->ecap |= VTD_ECAP_PT; + } + if (s->caching_mode) { s->cap |= VTD_CAP_CM; } diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 29d6707..0e73a65 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -187,6 +187,7 @@ /* Interrupt Remapping support */ #define VTD_ECAP_IR (1ULL << 3) #define VTD_ECAP_EIM (1ULL << 4) +#define VTD_ECAP_PT (1ULL << 6) #define VTD_ECAP_MHMV (15ULL << 20) /* CAP_REG */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 816bfa8..107a341 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -519,7 +519,7 @@ static void port92_write(void *opaque, hwaddr addr, uint64_t val, s->outport = val; qemu_set_irq(s->a20_out, (val >> 1) & 1); if ((val & 1) && !(oldval & 1)) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/i386/trace-events b/hw/i386/trace-events index 04a6980..72556da 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -38,6 +38,8 @@ vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"P vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set" vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64 +vtd_translate_pt(uint16_t sid, uint64_t addr) "source id 0x%"PRIu16", iova 0x%"PRIx64 +vtd_pt_enable_fast_path(uint16_t sid, bool success) "sid 0x%"PRIu16" %d" # hw/i386/amd_iommu.c amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32 diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index 23dcd3f..293caf8 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -88,55 +88,23 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) x86_iommu_set_default(X86_IOMMU_DEVICE(dev)); } +static Property x86_iommu_properties[] = { + DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false), + DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false), + DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void x86_iommu_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = x86_iommu_realize; -} - -static bool x86_iommu_intremap_prop_get(Object *o, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - return s->intr_supported; -} - -static void x86_iommu_intremap_prop_set(Object *o, bool value, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - s->intr_supported = value; -} - -static bool x86_iommu_device_iotlb_prop_get(Object *o, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - return s->dt_supported; -} - -static void x86_iommu_device_iotlb_prop_set(Object *o, bool value, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - s->dt_supported = value; -} - -static void x86_iommu_instance_init(Object *o) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - - /* By default, do not support IR */ - s->intr_supported = false; - object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get, - x86_iommu_intremap_prop_set, NULL); - s->dt_supported = false; - object_property_add_bool(o, "device-iotlb", - x86_iommu_device_iotlb_prop_get, - x86_iommu_device_iotlb_prop_set, - NULL); + dc->props = x86_iommu_properties; } static const TypeInfo x86_iommu_info = { .name = TYPE_X86_IOMMU_DEVICE, .parent = TYPE_SYS_BUS_DEVICE, - .instance_init = x86_iommu_instance_init, .instance_size = sizeof(X86IOMMUState), .class_init = x86_iommu_class_init, .class_size = sizeof(X86IOMMUClass), diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index b1c05ff..919f09b 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -1089,11 +1089,14 @@ static void cpu_handle_ioreq(void *opaque) * causes Xen to powerdown the domain. */ if (runstate_is_running()) { + ShutdownCause request; + if (qemu_shutdown_requested_get()) { destroy_hvm_domain(false); } - if (qemu_reset_requested_get()) { - qemu_system_reset(VMRESET_REPORT); + request = qemu_reset_requested_get(); + if (request) { + qemu_system_reset(request); destroy_hvm_domain(true); } } @@ -1395,7 +1398,7 @@ void xen_shutdown_fatal_error(const char *fmt, ...) va_end(ap); fprintf(stderr, "Will destroy the domain.\n"); /* destroy the domain */ - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR); } void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c index d414288..c479f82 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c @@ -226,7 +226,7 @@ static void outport_write(KBDState *s, uint32_t val) s->outport = val; qemu_set_irq(s->a20_out, (val >> 1) & 1); if (!(val & 1)) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } @@ -301,7 +301,7 @@ static void kbd_write_command(void *opaque, hwaddr addr, s->outport &= ~KBD_OUT_A20; break; case KBD_CCMD_RESET: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case KBD_CCMD_NO_OP: /* ignore that */ diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 292fffe..ea35167 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -357,6 +357,10 @@ static void icp_realize(DeviceState *dev, Error **errp) qemu_register_reset(icp_reset, dev); } +static void icp_unrealize(DeviceState *dev, Error **errp) +{ + qemu_unregister_reset(icp_reset, dev); +} static void icp_class_init(ObjectClass *klass, void *data) { @@ -364,6 +368,7 @@ static void icp_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_icp_server; dc->realize = icp_realize; + dc->unrealize = icp_unrealize; } static const TypeInfo icp_info = { diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index dd93531..14b8f6f 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -42,6 +42,14 @@ static int kernel_xics_fd = -1; +typedef struct KVMEnabledICP { + unsigned long vcpu_id; + QLIST_ENTRY(KVMEnabledICP) node; +} KVMEnabledICP; + +static QLIST_HEAD(, KVMEnabledICP) + kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps); + /* * ICP-KVM */ @@ -121,6 +129,8 @@ static void icp_kvm_reset(void *dev) static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); + KVMEnabledICP *enabled_icp; + unsigned long vcpu_id = kvm_arch_vcpu_id(cs); int ret; if (kernel_xics_fd == -1) { @@ -132,18 +142,21 @@ static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu) * which was hot-removed earlier we don't have to renable * KVM_CAP_IRQ_XICS capability again. */ - if (icp->cap_irq_xics_enabled) { - return; + QLIST_FOREACH(enabled_icp, &kvm_enabled_icps, node) { + if (enabled_icp->vcpu_id == vcpu_id) { + return; + } } - ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, - kvm_arch_vcpu_id(cs)); + ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, vcpu_id); if (ret < 0) { - error_report("Unable to connect CPU%ld to kernel XICS: %s", - kvm_arch_vcpu_id(cs), strerror(errno)); + error_report("Unable to connect CPU%ld to kernel XICS: %s", vcpu_id, + strerror(errno)); exit(1); } - icp->cap_irq_xics_enabled = true; + enabled_icp = g_malloc(sizeof(*enabled_icp)); + enabled_icp->vcpu_id = vcpu_id; + QLIST_INSERT_HEAD(&kvm_enabled_icps, enabled_icp, node); } static void icp_kvm_realize(DeviceState *dev, Error **errp) @@ -151,12 +164,18 @@ static void icp_kvm_realize(DeviceState *dev, Error **errp) qemu_register_reset(icp_kvm_reset, dev); } +static void icp_kvm_unrealize(DeviceState *dev, Error **errp) +{ + qemu_unregister_reset(icp_kvm_reset, dev); +} + static void icp_kvm_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); ICPStateClass *icpc = ICP_CLASS(klass); dc->realize = icp_kvm_realize; + dc->unrealize = icp_kvm_unrealize; icpc->pre_save = icp_get_kvm_state; icpc->post_load = icp_set_kvm_state; icpc->cpu_setup = icp_kvm_cpu_setup; diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index f05308b8..d98ea8b 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -229,7 +229,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -int xics_spapr_init(sPAPRMachineState *spapr, Error **errp) +void xics_spapr_init(sPAPRMachineState *spapr) { /* Registration of global state belongs into realize */ spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive); @@ -243,7 +243,6 @@ int xics_spapr_init(sPAPRMachineState *spapr, Error **errp) spapr_register_hypercall(H_XIRR_X, h_xirr_x); spapr_register_hypercall(H_EOI, h_eoi); spapr_register_hypercall(H_IPOLL, h_ipoll); - return 0; } #define ICS_IRQ_FREE(ics, srcno) \ diff --git a/hw/ipmi/ipmi.c b/hw/ipmi/ipmi.c index 5cf1caa..afafe14 100644 --- a/hw/ipmi/ipmi.c +++ b/hw/ipmi/ipmi.c @@ -44,14 +44,14 @@ static int ipmi_do_hw_op(IPMIInterface *s, enum ipmi_op op, int checkonly) if (checkonly) { return 0; } - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return 0; case IPMI_POWEROFF_CHASSIS: if (checkonly) { return 0; } - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); return 0; case IPMI_SEND_NMI: diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index e2215dc..ac8416d 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -606,7 +606,7 @@ static void ich9_rst_cnt_write(void *opaque, hwaddr addr, uint64_t val, ICH9LPCState *lpc = opaque; if (val & 4) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return; } lpc->rst_cnt = val & 0xA; /* keep FULL_RST (bit 3) and SYS_RST (bit 1) */ diff --git a/hw/mips/boston.c b/hw/mips/boston.c index 83f7b82..53d1e0c 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -232,7 +232,7 @@ static void boston_platreg_write(void *opaque, hwaddr addr, break; case PLAT_SOFTRST_CTL: if (val & PLAT_SOFTRST_CTL_SYSRESET) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; default: diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c index 5dd177e..7814c39 100644 --- a/hw/mips/mips_malta.c +++ b/hw/mips/mips_malta.c @@ -470,7 +470,7 @@ static void malta_fpga_write(void *opaque, hwaddr addr, /* SOFTRES Register */ case 0x00500: if (val == 0x42) - qemu_system_reset_request (); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; /* BRKRES Register */ diff --git a/hw/mips/mips_r4k.c b/hw/mips/mips_r4k.c index 748586e..f4de9fc 100644 --- a/hw/mips/mips_r4k.c +++ b/hw/mips/mips_r4k.c @@ -53,9 +53,9 @@ static void mips_qemu_write (void *opaque, hwaddr addr, uint64_t val, unsigned size) { if ((addr & 0xffff) == 0 && val == 42) - qemu_system_reset_request (); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); else if ((addr & 0xffff) == 4 && val == 42) - qemu_system_shutdown_request (); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } static uint64_t mips_qemu_read (void *opaque, hwaddr addr, diff --git a/hw/misc/arm_sysctl.c b/hw/misc/arm_sysctl.c index 8524008..b20b44e 100644 --- a/hw/misc/arm_sysctl.c +++ b/hw/misc/arm_sysctl.c @@ -351,13 +351,13 @@ static bool vexpress_cfgctrl_write(arm_sysctl_state *s, unsigned int dcc, break; case SYS_CFG_SHUTDOWN: if (site == SYS_CFG_SITE_MB && device == 0) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); return true; } break; case SYS_CFG_REBOOT: if (site == SYS_CFG_SITE_MB && device == 0) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return true; } break; @@ -429,7 +429,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset, if (s->lockval == LOCK_VALUE) { s->resetlevel = val; if (val & 0x100) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } break; @@ -438,7 +438,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset, if (s->lockval == LOCK_VALUE) { s->resetlevel = val; if (val & 0x04) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } break; diff --git a/hw/misc/cbus.c b/hw/misc/cbus.c index 0c207e3..677274c 100644 --- a/hw/misc/cbus.c +++ b/hw/misc/cbus.c @@ -356,7 +356,7 @@ static inline void retu_write(CBusRetu *s, int reg, uint16_t val) case RETU_REG_WATCHDOG: if (val == 0 && (s->cc[0] & 2)) - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); break; case RETU_REG_TXCR: diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c index 05c02fb..008d8bd 100644 --- a/hw/misc/macio/cuda.c +++ b/hw/misc/macio/cuda.c @@ -612,7 +612,7 @@ static bool cuda_cmd_powerdown(CUDAState *s, return false; } - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); return true; } @@ -624,7 +624,7 @@ static bool cuda_cmd_reset_system(CUDAState *s, return false; } - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return true; } diff --git a/hw/misc/slavio_misc.c b/hw/misc/slavio_misc.c index edd5de0..18ff677 100644 --- a/hw/misc/slavio_misc.c +++ b/hw/misc/slavio_misc.c @@ -258,7 +258,7 @@ static void slavio_aux2_mem_writeb(void *opaque, hwaddr addr, val &= AUX2_PWROFF; s->aux2 = val; if (val & AUX2_PWROFF) - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); slavio_misc_update_irq(s); } @@ -338,7 +338,7 @@ static void slavio_sysctrl_mem_writel(void *opaque, hwaddr addr, case 0: if (val & SYS_RESET) { s->sysctrl = SYS_RESETSTAT; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; default: diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c index 7891219..44304d4 100644 --- a/hw/misc/zynq_slcr.c +++ b/hw/misc/zynq_slcr.c @@ -405,7 +405,7 @@ static void zynq_slcr_write(void *opaque, hwaddr offset, switch (offset) { case PSS_RST_CTRL: if (val & R_PSS_RST_CTRL_SOFT_RST) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; } diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 98bd683..9a3d769 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -589,7 +589,15 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, if (!get_vhost_net(nc->peer)) { return features; } - return vhost_net_get_features(get_vhost_net(nc->peer), features); + features = vhost_net_get_features(get_vhost_net(nc->peer), features); + vdev->backend_features = features; + + if (n->mtu_bypass_backend && + (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { + features |= (1ULL << VIRTIO_NET_F_MTU); + } + + return features; } static uint64_t virtio_net_bad_features(VirtIODevice *vdev) @@ -640,6 +648,11 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) VirtIONet *n = VIRTIO_NET(vdev); int i; + if (n->mtu_bypass_backend && + !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { + features &= ~(1ULL << VIRTIO_NET_F_MTU); + } + virtio_net_set_multiqueue(n, virtio_has_feature(features, VIRTIO_NET_F_MQ)); @@ -2093,6 +2106,8 @@ static Property virtio_net_properties[] = { DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), + DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, + true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index edc88f4..326f5ef 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -209,7 +209,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) /* Called from RCU critical section */ static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { IOMMUState *is = container_of(iommu, IOMMUState, iommu); hwaddr baseaddr, offset; @@ -482,9 +482,9 @@ static void apb_config_writel (void *opaque, hwaddr addr, s->reset_control |= val & RESET_WMASK; if (val & SOFT_POR) { s->nr_resets = 0; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else if (val & SOFT_XIR) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } break; diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c index 85a3bb0..89133a9 100644 --- a/hw/pci-host/bonito.c +++ b/hw/pci-host/bonito.c @@ -269,7 +269,7 @@ static void bonito_writel(void *opaque, hwaddr addr, } s->regs[saddr] = val; if (reset) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; case BONITO_INTENSET: diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 2d02de1..4ce201e 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -632,7 +632,7 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) PIIX3State *d = opaque; if (val & 4) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return; } d->rcr = val & 2; /* keep System Reset type only */ diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index f7df238..62f1857 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -774,7 +774,7 @@ static qemu_irq *ppce500_init_mpic(MachineState *machine, PPCE500Params *params, static void ppce500_power_off(void *opaque, int line, int on) { if (on) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } } diff --git a/hw/ppc/mpc8544_guts.c b/hw/ppc/mpc8544_guts.c index ba69178..ce1254b 100644 --- a/hw/ppc/mpc8544_guts.c +++ b/hw/ppc/mpc8544_guts.c @@ -98,7 +98,7 @@ static void mpc8544_guts_write(void *opaque, hwaddr addr, switch (addr) { case MPC8544_GUTS_ADDR_RSTCR: if (value & MPC8544_GUTS_RSTCR_RESET) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; default: diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 5f93083..224184d 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -412,7 +412,7 @@ static void ppce500_set_irq(void *opaque, int pin, int level) if (level) { LOG_IRQ("%s: reset the PowerPC system\n", __func__); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; case PPCE500_INPUT_RESET_CORE: diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c index d5df94a..fc32e96 100644 --- a/hw/ppc/ppc405_uc.c +++ b/hw/ppc/ppc405_uc.c @@ -1807,7 +1807,7 @@ void ppc40x_chip_reset(PowerPCCPU *cpu) void ppc40x_system_reset(PowerPCCPU *cpu) { printf("Reset PowerPC system\n"); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } void store_40x_dbcr0 (CPUPPCState *env, uint32_t val) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 0980d73..ab3aab1 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -101,21 +101,26 @@ static ICSState *spapr_ics_create(sPAPRMachineState *spapr, const char *type_ics, int nr_irqs, Error **errp) { - Error *err = NULL, *local_err = NULL; + Error *local_err = NULL; Object *obj; obj = object_new(type_ics); - object_property_add_child(OBJECT(spapr), "ics", obj, NULL); + object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort); object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort); - object_property_set_int(obj, nr_irqs, "nr-irqs", &err); + object_property_set_int(obj, nr_irqs, "nr-irqs", &local_err); + if (local_err) { + goto error; + } object_property_set_bool(obj, true, "realized", &local_err); - error_propagate(&err, local_err); - if (err) { - error_propagate(errp, err); - return NULL; + if (local_err) { + goto error; } return ICS_SIMPLE(obj); + +error: + error_propagate(errp, local_err); + return NULL; } static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) @@ -123,25 +128,24 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) sPAPRMachineState *spapr = SPAPR_MACHINE(machine); if (kvm_enabled()) { - Error *err = NULL; - if (machine_kernel_irqchip_allowed(machine) && !xics_kvm_init(spapr, errp)) { spapr->icp_type = TYPE_KVM_ICP; - spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, &err); + spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, errp); } if (machine_kernel_irqchip_required(machine) && !spapr->ics) { - error_reportf_err(err, - "kernel_irqchip requested but unavailable: "); - } else { - error_free(err); + error_prepend(errp, "kernel_irqchip requested but unavailable: "); + return; } } if (!spapr->ics) { - xics_spapr_init(spapr, errp); + xics_spapr_init(spapr); spapr->icp_type = TYPE_ICP; spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, errp); + if (!spapr->ics) { + return; + } } } @@ -1222,16 +1226,21 @@ static int spapr_hpt_shift_for_ramsize(uint64_t ramsize) return shift; } +void spapr_free_hpt(sPAPRMachineState *spapr) +{ + g_free(spapr->htab); + spapr->htab = NULL; + spapr->htab_shift = 0; + close_htab_fd(spapr); +} + static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, Error **errp) { long rc; /* Clean up any HPT info from a previous boot */ - g_free(spapr->htab); - spapr->htab = NULL; - spapr->htab_shift = 0; - close_htab_fd(spapr); + spapr_free_hpt(spapr); rc = kvmppc_reset_htab(shift); if (rc < 0) { @@ -2050,6 +2059,7 @@ static void ppc_spapr_init(MachineState *machine) msi_nonbroken = true; QLIST_INIT(&spapr->phbs); + QTAILQ_INIT(&spapr->pending_dimm_unplugs); /* Allocate RMA if necessary */ rma_alloc_size = kvmppc_alloc_rma(&rma); @@ -2569,20 +2579,6 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, uint64_t align = memory_region_get_alignment(mr); uint64_t size = memory_region_size(mr); uint64_t addr; - char *mem_dev; - - if (size % SPAPR_MEMORY_BLOCK_SIZE) { - error_setg(&local_err, "Hotplugged memory size must be a multiple of " - "%lld MB", SPAPR_MEMORY_BLOCK_SIZE/M_BYTE); - goto out; - } - - mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL); - if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) { - error_setg(&local_err, "Memory backend has bad page size. " - "Use 'memory-backend-file' with correct mem-path."); - goto out; - } pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err); if (local_err) { @@ -2603,56 +2599,121 @@ out: error_propagate(errp, local_err); } -typedef struct sPAPRDIMMState { +static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t size = memory_region_size(mr); + char *mem_dev; + + if (size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, "Hotplugged memory size must be a multiple of " + "%lld MB", SPAPR_MEMORY_BLOCK_SIZE / M_BYTE); + return; + } + + mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL); + if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) { + error_setg(errp, "Memory backend has bad page size. " + "Use 'memory-backend-file' with correct mem-path."); + return; + } +} + +struct sPAPRDIMMState { + PCDIMMDevice *dimm; uint32_t nr_lmbs; -} sPAPRDIMMState; + QTAILQ_ENTRY(sPAPRDIMMState) next; +}; -static void spapr_lmb_release(DeviceState *dev, void *opaque) +static sPAPRDIMMState *spapr_pending_dimm_unplugs_find(sPAPRMachineState *s, + PCDIMMDevice *dimm) { - sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque; - HotplugHandler *hotplug_ctrl; + sPAPRDIMMState *dimm_state = NULL; - if (--ds->nr_lmbs) { - return; + QTAILQ_FOREACH(dimm_state, &s->pending_dimm_unplugs, next) { + if (dimm_state->dimm == dimm) { + break; + } } + return dimm_state; +} - g_free(ds); +static void spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr, + sPAPRDIMMState *dimm_state) +{ + g_assert(!spapr_pending_dimm_unplugs_find(spapr, dimm_state->dimm)); + QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, dimm_state, next); +} - /* - * Now that all the LMBs have been removed by the guest, call the - * pc-dimm unplug handler to cleanup up the pc-dimm device. - */ - hotplug_ctrl = qdev_get_hotplug_handler(dev); - hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); +static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr, + sPAPRDIMMState *dimm_state) +{ + QTAILQ_REMOVE(&spapr->pending_dimm_unplugs, dimm_state, next); + g_free(dimm_state); } -static void spapr_del_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, - Error **errp) +static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms, + PCDIMMDevice *dimm) { sPAPRDRConnector *drc; - sPAPRDRConnectorClass *drck; + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t size = memory_region_size(mr); uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + uint32_t avail_lmbs = 0; + uint64_t addr_start, addr; int i; - sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState)); - uint64_t addr = addr_start; + sPAPRDIMMState *ds; - ds->nr_lmbs = nr_lmbs; + addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &error_abort); + + addr = addr_start; for (i = 0; i < nr_lmbs; i++) { drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, - addr / SPAPR_MEMORY_BLOCK_SIZE); + addr / SPAPR_MEMORY_BLOCK_SIZE); g_assert(drc); - - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, dev, spapr_lmb_release, ds, errp); + if (drc->indicator_state != SPAPR_DR_INDICATOR_STATE_INACTIVE) { + avail_lmbs++; + } addr += SPAPR_MEMORY_BLOCK_SIZE; } - drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, - addr_start / SPAPR_MEMORY_BLOCK_SIZE); - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, - nr_lmbs, - drck->get_index(drc)); + ds = g_malloc0(sizeof(sPAPRDIMMState)); + ds->nr_lmbs = avail_lmbs; + ds->dimm = dimm; + spapr_pending_dimm_unplugs_add(ms, ds); + return ds; +} + +/* Callback to be called during DRC release. */ +void spapr_lmb_release(DeviceState *dev) +{ + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); + sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl); + sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev)); + + /* This information will get lost if a migration occurs + * during the unplug process. In this case recover it. */ + if (ds == NULL) { + ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev)); + if (ds->nr_lmbs) { + return; + } + } else if (--ds->nr_lmbs) { + return; + } + + spapr_pending_dimm_unplugs_remove(spapr, ds); + + /* + * Now that all the LMBs have been removed by the guest, call the + * pc-dimm unplug handler to cleanup up the pc-dimm device. + */ + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); } static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -2670,19 +2731,47 @@ static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev); Error *local_err = NULL; PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MemoryRegion *mr = ddc->get_memory_region(dimm); uint64_t size = memory_region_size(mr); - uint64_t addr; + uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + uint64_t addr_start, addr; + int i; + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + sPAPRDIMMState *ds; - addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); + addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &local_err); if (local_err) { goto out; } - spapr_del_lmbs(dev, addr, size, &error_abort); + ds = g_malloc0(sizeof(sPAPRDIMMState)); + ds->nr_lmbs = nr_lmbs; + ds->dimm = dimm; + spapr_pending_dimm_unplugs_add(spapr, ds); + + addr = addr_start; + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->detach(drc, dev, errp); + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr_start / SPAPR_MEMORY_BLOCK_SIZE); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, + nr_lmbs, + drck->get_index(drc)); out: error_propagate(errp, local_err); } @@ -2715,11 +2804,13 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, CPUCore *cc = CPU_CORE(dev); CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL); + assert(core_slot); core_slot->cpu = NULL; object_unparent(OBJECT(dev)); } -static void spapr_core_release(DeviceState *dev, void *opaque) +/* Callback to be called during DRC release. */ +void spapr_core_release(DeviceState *dev) { HotplugHandler *hotplug_ctrl; @@ -2752,7 +2843,7 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, g_assert(drc); drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, dev, spapr_core_release, NULL, &local_err); + drck->detach(drc, dev, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -2853,7 +2944,13 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } - if (cc->nr_threads != smp_threads) { + /* + * In general we should have homogeneous threads-per-core, but old + * (pre hotplug support) machine types allow the last core to have + * reduced threads as a compatibility hack for when we allowed + * total vcpus not a multiple of threads-per-core. + */ + if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) { error_setg(errp, "invalid nr-threads %d, must be %d", cc->nr_threads, smp_threads); return; @@ -2990,7 +3087,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + spapr_memory_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { spapr_core_pre_plug(hotplug_dev, dev, errp); } } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index a17ea07..ff7058e 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -143,29 +143,30 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp) Object *obj; obj = object_new(spapr->icp_type); - object_property_add_child(OBJECT(cpu), "icp", obj, NULL); + object_property_add_child(OBJECT(cpu), "icp", obj, &error_abort); + object_unref(obj); object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort); object_property_set_bool(obj, true, "realized", &local_err); if (local_err) { - error_propagate(errp, local_err); - return; + goto error; } object_property_set_bool(child, true, "realized", &local_err); if (local_err) { - object_unparent(obj); - error_propagate(errp, local_err); - return; + goto error; } spapr_cpu_init(spapr, cpu, &local_err); if (local_err) { - object_unparent(obj); - error_propagate(errp, local_err); - return; + goto error; } xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj)); + return; + +error: + object_unparent(obj); + error_propagate(errp, local_err); } static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 9fa5545..cc2400b 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -20,6 +20,7 @@ #include "qapi/visitor.h" #include "qemu/error-report.h" #include "hw/ppc/spapr.h" /* for RTAS return codes */ +#include "hw/pci-host/spapr.h" /* spapr_phb_remove_pci_device_cb callback */ #include "trace.h" #define DRC_CONTAINER_PATH "/dr-connector" @@ -99,8 +100,7 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc, if (drc->awaiting_release) { if (drc->configured) { trace_spapr_drc_set_isolation_state_finalizing(get_index(drc)); - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } else { trace_spapr_drc_set_isolation_state_deferring(get_index(drc)); } @@ -153,8 +153,7 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc, if (drc->awaiting_release && drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { trace_spapr_drc_set_allocation_state_finalizing(get_index(drc)); - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } else if (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) { drc->awaiting_allocation = false; } @@ -404,15 +403,10 @@ static void attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, NULL, 0, NULL); } -static void detach(sPAPRDRConnector *drc, DeviceState *d, - spapr_drc_detach_cb *detach_cb, - void *detach_cb_opaque, Error **errp) +static void detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp) { trace_spapr_drc_detach(get_index(drc)); - drc->detach_cb = detach_cb; - drc->detach_cb_opaque = detach_cb_opaque; - /* if we've signalled device presence to the guest, or if the guest * has gone ahead and configured the device (via manually-executed * device add via drmgr in guest, namely), we need to wait @@ -456,8 +450,21 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE; - if (drc->detach_cb) { - drc->detach_cb(drc->dev, drc->detach_cb_opaque); + /* Calling release callbacks based on drc->type. */ + switch (drc->type) { + case SPAPR_DR_CONNECTOR_TYPE_CPU: + spapr_core_release(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_PCI: + spapr_phb_remove_pci_device_cb(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_LMB: + spapr_lmb_release(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + case SPAPR_DR_CONNECTOR_TYPE_VIO: + default: + g_assert(false); } drc->awaiting_release = false; @@ -467,8 +474,6 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, drc->fdt_start_offset = 0; object_property_del(OBJECT(drc), "device", NULL); drc->dev = NULL; - drc->detach_cb = NULL; - drc->detach_cb_opaque = NULL; } static bool release_pending(sPAPRDRConnector *drc) @@ -498,8 +503,7 @@ static void reset(DeviceState *d) * force removal if we are */ if (drc->awaiting_release) { - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } /* non-PCI devices may be awaiting a transition to UNUSABLE */ @@ -515,6 +519,60 @@ static void reset(DeviceState *d) } } +static bool spapr_drc_needed(void *opaque) +{ + sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque; + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + bool rc = false; + sPAPRDREntitySense value; + drck->entity_sense(drc, &value); + + /* If no dev is plugged in there is no need to migrate the DRC state */ + if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) { + return false; + } + + /* + * If there is dev plugged in, we need to migrate the DRC state when + * it is different from cold-plugged state + */ + switch (drc->type) { + case SPAPR_DR_CONNECTOR_TYPE_PCI: + rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) && + (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) && + drc->configured && drc->signalled && !drc->awaiting_release); + break; + case SPAPR_DR_CONNECTOR_TYPE_CPU: + case SPAPR_DR_CONNECTOR_TYPE_LMB: + rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) && + (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) && + drc->configured && drc->signalled && !drc->awaiting_release); + break; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + case SPAPR_DR_CONNECTOR_TYPE_VIO: + default: + g_assert(false); + } + return rc; +} + +static const VMStateDescription vmstate_spapr_drc = { + .name = "spapr_drc", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_drc_needed, + .fields = (VMStateField []) { + VMSTATE_UINT32(isolation_state, sPAPRDRConnector), + VMSTATE_UINT32(allocation_state, sPAPRDRConnector), + VMSTATE_UINT32(indicator_state, sPAPRDRConnector), + VMSTATE_BOOL(configured, sPAPRDRConnector), + VMSTATE_BOOL(awaiting_release, sPAPRDRConnector), + VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector), + VMSTATE_BOOL(signalled, sPAPRDRConnector), + VMSTATE_END_OF_LIST() + } +}; + static void realize(DeviceState *d, Error **errp) { sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d); @@ -543,6 +601,8 @@ static void realize(DeviceState *d, Error **errp) object_unref(OBJECT(drc)); } g_free(child_name); + vmstate_register(DEVICE(drc), drck->get_index(drc), &vmstate_spapr_drc, + drc); trace_spapr_drc_realize_complete(drck->get_index(drc)); } diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index f0b28d8..73e2a18 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -342,20 +342,18 @@ static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type) return source->irq; } -static void rtas_event_log_queue(int log_type, void *data, bool exception) +static void rtas_event_log_queue(int log_type, void *data) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1); g_assert(data); entry->log_type = log_type; - entry->exception = exception; entry->data = data; QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next); } -static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, - bool exception) +static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; @@ -364,10 +362,6 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, const sPAPREventSource *source = rtas_event_log_to_source(spapr, entry->log_type); - if (entry->exception != exception) { - continue; - } - if (source->mask & event_mask) { break; } @@ -380,7 +374,7 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, return entry; } -static bool rtas_event_log_contains(uint32_t event_mask, bool exception) +static bool rtas_event_log_contains(uint32_t event_mask) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; @@ -389,10 +383,6 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception) const sPAPREventSource *source = rtas_event_log_to_source(spapr, entry->log_type); - if (entry->exception != exception) { - continue; - } - if (source->mask & event_mask) { return true; } @@ -479,7 +469,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL; epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC; - rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); + rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -572,7 +562,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, cpu_to_be32(drc_id->count_indexed.index); } - rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); + rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -667,7 +657,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, xinfo |= (uint64_t)rtas_ld(args, 6) << 32; } - event = rtas_event_log_dequeue(mask, true); + event = rtas_event_log_dequeue(mask); if (!event) { goto out_no_events; } @@ -690,7 +680,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, * interrupts. */ for (i = 0; i < EVENT_CLASS_MAX; i++) { - if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) { + if (rtas_event_log_contains(EVENT_CLASS_MASK(i))) { const sPAPREventSource *source = spapr_event_sources_get_source(spapr->event_sources, i); @@ -710,38 +700,10 @@ static void event_scan(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong args, uint32_t nret, target_ulong rets) { - uint32_t mask, buf, len, event_len; - sPAPREventLogEntry *event; - struct rtas_error_log *hdr; - if (nargs != 4 || nret != 1) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } - - mask = rtas_ld(args, 0); - buf = rtas_ld(args, 2); - len = rtas_ld(args, 3); - - event = rtas_event_log_dequeue(mask, false); - if (!event) { - goto out_no_events; - } - - hdr = event->data; - event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr); - - if (event_len < len) { - len = event_len; - } - - cpu_physical_memory_write(buf, event->data, len); - rtas_st(rets, 0, RTAS_OUT_SUCCESS); - g_free(event->data); - g_free(event); - return; - -out_no_events: rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 0d608d6..aae5a62 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -913,10 +913,7 @@ static void spapr_check_setup_free_hpt(sPAPRMachineState *spapr, /* We assume RADIX, so this catches all the "Do Nothing" cases */ } else if (!(patbe_old & PATBE1_GR)) { /* HASH->RADIX : Free HPT */ - g_free(spapr->htab); - spapr->htab = NULL; - spapr->htab_shift = 0; - close_htab_fd(spapr); + spapr_free_hpt(spapr); } else if (!(patbe_new & PATBE1_GR)) { /* RADIX->HASH || NOTHING->HASH : Allocate HPT */ spapr_setup_hpt_and_vrma(spapr); @@ -1047,19 +1044,13 @@ static target_ulong h_signal_sys_reset(PowerPCCPU *cpu, } } -static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - sPAPRMachineState *spapr, - target_ulong opcode, - target_ulong *args) +static uint32_t cas_check_pvr(PowerPCCPU *cpu, target_ulong *addr, + Error **errp) { - target_ulong list = ppc64_phys_to_real(args[0]); - target_ulong ov_table; bool explicit_match = false; /* Matched the CPU's real PVR */ uint32_t max_compat = cpu->max_compat; uint32_t best_compat = 0; int i; - sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates; - bool guest_radix; /* * We scan the supplied table of PVRs looking for two things @@ -1069,9 +1060,9 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, for (i = 0; i < 512; ++i) { uint32_t pvr, pvr_mask; - pvr_mask = ldl_be_phys(&address_space_memory, list); - pvr = ldl_be_phys(&address_space_memory, list + 4); - list += 8; + pvr_mask = ldl_be_phys(&address_space_memory, *addr); + pvr = ldl_be_phys(&address_space_memory, *addr + 4); + *addr += 8; if (~pvr_mask & pvr) { break; /* Terminator record */ @@ -1090,17 +1081,38 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, /* We couldn't find a suitable compatibility mode, and either * the guest doesn't support "raw" mode for this CPU, or raw * mode is disabled because a maximum compat mode is set */ - return H_HARDWARE; + error_setg(errp, "Couldn't negotiate a suitable PVR during CAS"); + return 0; } /* Parsing finished */ trace_spapr_cas_pvr(cpu->compat_pvr, explicit_match, best_compat); - /* Update CPUs */ - if (cpu->compat_pvr != best_compat) { - Error *local_err = NULL; + return best_compat; +} - ppc_set_compat_all(best_compat, &local_err); +static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* Working address in data buffer */ + target_ulong addr = ppc64_phys_to_real(args[0]); + target_ulong ov_table; + uint32_t cas_pvr; + sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates; + bool guest_radix; + Error *local_err = NULL; + + cas_pvr = cas_check_pvr(cpu, &addr, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + + /* Update CPUs */ + if (cpu->compat_pvr != cas_pvr) { + ppc_set_compat_all(cas_pvr, &local_err); if (local_err) { error_report_err(local_err); return H_HARDWARE; @@ -1108,7 +1120,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, } /* For the future use: here @ov_table points to the first option vector */ - ov_table = list; + ov_table = addr; ov1_guest = spapr_ovec_parse_vector(ov_table, 1); ov5_guest = spapr_ovec_parse_vector(ov_table, 5); @@ -1162,7 +1174,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, spapr_ovec_cleanup(ov5_updates); if (spapr->cas_reboot) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else { /* If ppc_spapr_reset() did not set up a HPT but one is necessary * (because the guest isn't going to use radix) then set it up here. */ diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 29c80bb..0341bc0 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -111,7 +111,7 @@ static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table) /* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu); uint64_t tce; diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a7cff32..e4daf8d 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1369,7 +1369,8 @@ out: } } -static void spapr_phb_remove_pci_device_cb(DeviceState *dev, void *opaque) +/* Callback to be called during DRC release. */ +void spapr_phb_remove_pci_device_cb(DeviceState *dev) { /* some version guests do not wait for completion of a device * cleanup (generally done asynchronously by the kernel) before @@ -1392,7 +1393,7 @@ static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc, { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp); + drck->detach(drc, DEVICE(pdev), errp); } static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb, diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 619f32c..128d993 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -110,7 +110,7 @@ static void rtas_power_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); cpu_stop_current(); rtas_st(rets, 0, RTAS_OUT_SUCCESS); } @@ -124,7 +124,7 @@ static void rtas_system_reboot(PowerPCCPU *cpu, sPAPRMachineState *spapr, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); rtas_st(rets, 0, RTAS_OUT_SUCCESS); } diff --git a/hw/s390x/3270-ccw.c b/hw/s390x/3270-ccw.c index a7a5b41..6e6eee4 100644 --- a/hw/s390x/3270-ccw.c +++ b/hw/s390x/3270-ccw.c @@ -98,9 +98,13 @@ static void emulated_ccw_3270_realize(DeviceState *ds, Error **errp) EmulatedCcw3270Class *ck = EMULATED_CCW_3270_GET_CLASS(dev); CcwDevice *cdev = CCW_DEVICE(ds); CCWDeviceClass *cdk = CCW_DEVICE_GET_CLASS(cdev); - SubchDev *sch = css_create_virtual_sch(cdev->devno, errp); + DeviceState *parent = DEVICE(cdev); + BusState *qbus = qdev_get_parent_bus(parent); + VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus); + SubchDev *sch; Error *err = NULL; + sch = css_create_sch(cdev->devno, true, cbus->squash_mcss, errp); if (!sch) { return; } diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs index 36bd4b1..a8e5575 100644 --- a/hw/s390x/Makefile.objs +++ b/hw/s390x/Makefile.objs @@ -14,3 +14,4 @@ obj-y += ccw-device.o obj-y += s390-pci-bus.o s390-pci-inst.o obj-y += s390-skeys.o obj-$(CONFIG_KVM) += s390-skeys-kvm.o +obj-y += s390-ccw.o diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c index b54ac01..823747f 100644 --- a/hw/s390x/css-bridge.c +++ b/hw/s390x/css-bridge.c @@ -17,6 +17,7 @@ #include "hw/s390x/css.h" #include "ccw-device.h" #include "hw/s390x/css-bridge.h" +#include "cpu.h" /* * Invoke device-specific unplug handler, disable the subchannel @@ -103,6 +104,7 @@ VirtualCssBus *virtual_css_bus_init(void) /* Create bus on bridge device */ bus = qbus_create(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css"); cbus = VIRTUAL_CSS_BUS(bus); + cbus->squash_mcss = s390_get_squash_mcss(); /* Enable hotplugging */ qbus_set_hotplug_handler(bus, dev, &error_abort); diff --git a/hw/s390x/css.c b/hw/s390x/css.c index 15c4f4b..1e2f26b 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -13,6 +13,7 @@ #include "qapi/error.h" #include "qapi/visitor.h" #include "hw/qdev.h" +#include "qemu/error-report.h" #include "qemu/bitops.h" #include "exec/address-spaces.h" #include "cpu.h" @@ -258,7 +259,7 @@ uint16_t css_build_subchannel_id(SubchDev *sch) return css_do_build_subchannel_id(sch->cssid, sch->ssid); } -static void css_inject_io_interrupt(SubchDev *sch) +void css_inject_io_interrupt(SubchDev *sch) { uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11; @@ -523,7 +524,7 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr, return ret; } -static void sch_handle_start_func(SubchDev *sch, ORB *orb) +static void sch_handle_start_func_virtual(SubchDev *sch, ORB *orb) { PMCW *p = &sch->curr_status.pmcw; @@ -625,13 +626,58 @@ static void sch_handle_start_func(SubchDev *sch, ORB *orb) } +static int sch_handle_start_func_passthrough(SubchDev *sch, ORB *orb) +{ + + PMCW *p = &sch->curr_status.pmcw; + SCSW *s = &sch->curr_status.scsw; + int ret; + + if (!(s->ctrl & SCSW_ACTL_SUSP)) { + assert(orb != NULL); + p->intparm = orb->intparm; + } + + /* + * Only support prefetch enable mode. + * Only support 64bit addressing idal. + */ + if (!(orb->ctrl0 & ORB_CTRL0_MASK_PFCH) || + !(orb->ctrl0 & ORB_CTRL0_MASK_C64)) { + return -EINVAL; + } + + ret = s390_ccw_cmd_request(orb, s, sch->driver_data); + switch (ret) { + /* Currently we don't update control block and just return the cc code. */ + case 0: + break; + case -EBUSY: + break; + case -ENODEV: + break; + case -EACCES: + /* Let's reflect an inaccessible host device by cc 3. */ + ret = -ENODEV; + break; + default: + /* + * All other return codes will trigger a program check, + * or set cc to 1. + */ + break; + }; + + return ret; +} + /* * On real machines, this would run asynchronously to the main vcpus. * We might want to make some parts of the ssch handling (interpreting * read/writes) asynchronous later on if we start supporting more than * our current very simple devices. */ -static void do_subchannel_work(SubchDev *sch, ORB *orb) +int do_subchannel_work_virtual(SubchDev *sch, ORB *orb) { SCSW *s = &sch->curr_status.scsw; @@ -642,12 +688,45 @@ static void do_subchannel_work(SubchDev *sch, ORB *orb) sch_handle_halt_func(sch); } else if (s->ctrl & SCSW_FCTL_START_FUNC) { /* Triggered by both ssch and rsch. */ - sch_handle_start_func(sch, orb); + sch_handle_start_func_virtual(sch, orb); } else { /* Cannot happen. */ - return; + return 0; } css_inject_io_interrupt(sch); + return 0; +} + +int do_subchannel_work_passthrough(SubchDev *sch, ORB *orb) +{ + int ret; + SCSW *s = &sch->curr_status.scsw; + + if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) { + /* TODO: Clear handling */ + sch_handle_clear_func(sch); + ret = 0; + } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) { + /* TODO: Halt handling */ + sch_handle_halt_func(sch); + ret = 0; + } else if (s->ctrl & SCSW_FCTL_START_FUNC) { + ret = sch_handle_start_func_passthrough(sch, orb); + } else { + /* Cannot happen. */ + return -ENODEV; + } + + return ret; +} + +static int do_subchannel_work(SubchDev *sch, ORB *orb) +{ + if (sch->do_subchannel_work) { + return sch->do_subchannel_work(sch, orb); + } else { + return -EINVAL; + } } static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src) @@ -670,7 +749,7 @@ static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src) dest->chars = cpu_to_be32(src->chars); } -static void copy_scsw_to_guest(SCSW *dest, const SCSW *src) +void copy_scsw_to_guest(SCSW *dest, const SCSW *src) { dest->flags = cpu_to_be16(src->flags); dest->ctrl = cpu_to_be16(src->ctrl); @@ -966,8 +1045,7 @@ int css_do_ssch(SubchDev *sch, ORB *orb) s->ctrl |= (SCSW_FCTL_START_FUNC | SCSW_ACTL_START_PEND); s->flags &= ~SCSW_FLAGS_MASK_PNO; - do_subchannel_work(sch, orb); - ret = 0; + ret = do_subchannel_work(sch, orb); out: return ret; @@ -1326,7 +1404,8 @@ unsigned int css_find_free_chpid(uint8_t cssid) return MAX_CHPID + 1; } -static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type) +static int css_add_chpid(uint8_t cssid, uint8_t chpid, uint8_t type, + bool is_virt) { CssImage *css; @@ -1340,7 +1419,7 @@ static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type) } css->chpids[chpid].in_use = 1; css->chpids[chpid].type = type; - css->chpids[chpid].is_virtual = 1; + css->chpids[chpid].is_virtual = is_virt; css_generate_chp_crws(cssid, chpid); @@ -1364,7 +1443,7 @@ void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type) p->pam = 0x80; p->chpid[0] = chpid; if (!css->chpids[chpid].in_use) { - css_add_virtual_chpid(sch->cssid, chpid, type); + css_add_chpid(sch->cssid, chpid, type, true); } memset(s, 0, sizeof(SCSW)); @@ -1946,28 +2025,59 @@ PropertyInfo css_devid_ro_propinfo = { .get = get_css_devid, }; -SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp) +SubchDev *css_create_sch(CssDevId bus_id, bool is_virtual, bool squash_mcss, + Error **errp) { uint16_t schid = 0; SubchDev *sch; if (bus_id.valid) { - /* Enforce use of virtual cssid. */ - if (bus_id.cssid != VIRTUAL_CSSID) { - error_setg(errp, "cssid %hhx not valid for virtual devices", - bus_id.cssid); + if (is_virtual != (bus_id.cssid == VIRTUAL_CSSID)) { + error_setg(errp, "cssid %hhx not valid for %s devices", + bus_id.cssid, + (is_virtual ? "virtual" : "non-virtual")); return NULL; } + } + + if (bus_id.valid) { + if (squash_mcss) { + bus_id.cssid = channel_subsys.default_cssid; + } else if (!channel_subsys.css[bus_id.cssid]) { + css_create_css_image(bus_id.cssid, false); + } + if (!css_find_free_subch_for_devno(bus_id.cssid, bus_id.ssid, bus_id.devid, &schid, errp)) { return NULL; } - } else { - bus_id.cssid = VIRTUAL_CSSID; + } else if (squash_mcss || is_virtual) { + bus_id.cssid = channel_subsys.default_cssid; + if (!css_find_free_subch_and_devno(bus_id.cssid, &bus_id.ssid, &bus_id.devid, &schid, errp)) { return NULL; } + } else { + for (bus_id.cssid = 0; bus_id.cssid < MAX_CSSID; ++bus_id.cssid) { + if (bus_id.cssid == VIRTUAL_CSSID) { + continue; + } + + if (!channel_subsys.css[bus_id.cssid]) { + css_create_css_image(bus_id.cssid, false); + } + + if (css_find_free_subch_and_devno(bus_id.cssid, &bus_id.ssid, + &bus_id.devid, &schid, + NULL)) { + break; + } + if (bus_id.cssid == MAX_CSSID) { + error_setg(errp, "Virtual channel subsystem is full!"); + return NULL; + } + } } sch = g_malloc0(sizeof(*sch)); @@ -1978,3 +2088,147 @@ SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp) css_subch_assign(sch->cssid, sch->ssid, schid, sch->devno, sch); return sch; } + +static int css_sch_get_chpids(SubchDev *sch, CssDevId *dev_id) +{ + char *fid_path; + FILE *fd; + uint32_t chpid[8]; + int i; + PMCW *p = &sch->curr_status.pmcw; + + fid_path = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/chpids", + dev_id->cssid, dev_id->ssid, dev_id->devid); + fd = fopen(fid_path, "r"); + if (fd == NULL) { + error_report("%s: open %s failed", __func__, fid_path); + g_free(fid_path); + return -EINVAL; + } + + if (fscanf(fd, "%x %x %x %x %x %x %x %x", + &chpid[0], &chpid[1], &chpid[2], &chpid[3], + &chpid[4], &chpid[5], &chpid[6], &chpid[7]) != 8) { + fclose(fd); + g_free(fid_path); + return -EINVAL; + } + + for (i = 0; i < ARRAY_SIZE(p->chpid); i++) { + p->chpid[i] = chpid[i]; + } + + fclose(fd); + g_free(fid_path); + + return 0; +} + +static int css_sch_get_path_masks(SubchDev *sch, CssDevId *dev_id) +{ + char *fid_path; + FILE *fd; + uint32_t pim, pam, pom; + PMCW *p = &sch->curr_status.pmcw; + + fid_path = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/pimpampom", + dev_id->cssid, dev_id->ssid, dev_id->devid); + fd = fopen(fid_path, "r"); + if (fd == NULL) { + error_report("%s: open %s failed", __func__, fid_path); + g_free(fid_path); + return -EINVAL; + } + + if (fscanf(fd, "%x %x %x", &pim, &pam, &pom) != 3) { + fclose(fd); + g_free(fid_path); + return -EINVAL; + } + + p->pim = pim; + p->pam = pam; + p->pom = pom; + fclose(fd); + g_free(fid_path); + + return 0; +} + +static int css_sch_get_chpid_type(uint8_t chpid, uint32_t *type, + CssDevId *dev_id) +{ + char *fid_path; + FILE *fd; + + fid_path = g_strdup_printf("/sys/devices/css%x/chp0.%02x/type", + dev_id->cssid, chpid); + fd = fopen(fid_path, "r"); + if (fd == NULL) { + error_report("%s: open %s failed", __func__, fid_path); + g_free(fid_path); + return -EINVAL; + } + + if (fscanf(fd, "%x", type) != 1) { + fclose(fd); + g_free(fid_path); + return -EINVAL; + } + + fclose(fd); + g_free(fid_path); + + return 0; +} + +/* + * We currently retrieve the real device information from sysfs to build the + * guest subchannel information block without considering the migration feature. + * We need to revisit this problem when we want to add migration support. + */ +int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id) +{ + CssImage *css = channel_subsys.css[sch->cssid]; + PMCW *p = &sch->curr_status.pmcw; + SCSW *s = &sch->curr_status.scsw; + uint32_t type; + int i, ret; + + assert(css != NULL); + memset(p, 0, sizeof(PMCW)); + p->flags |= PMCW_FLAGS_MASK_DNV; + /* We are dealing with I/O subchannels only. */ + p->devno = sch->devno; + + /* Grab path mask from sysfs. */ + ret = css_sch_get_path_masks(sch, dev_id); + if (ret) { + return ret; + } + + /* Grab chpids from sysfs. */ + ret = css_sch_get_chpids(sch, dev_id); + if (ret) { + return ret; + } + + /* Build chpid type. */ + for (i = 0; i < ARRAY_SIZE(p->chpid); i++) { + if (p->chpid[i] && !css->chpids[p->chpid[i]].in_use) { + ret = css_sch_get_chpid_type(p->chpid[i], &type, dev_id); + if (ret) { + return ret; + } + css_add_chpid(sch->cssid, p->chpid[i], type, false); + } + } + + memset(s, 0, sizeof(SCSW)); + sch->curr_status.mba = 0; + for (i = 0; i < ARRAY_SIZE(sch->curr_status.mda); i++) { + sch->curr_status.mda[i] = 0; + } + + return 0; +} diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 75d3c68..4e6469d 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -396,7 +396,7 @@ void s390_reipl_request(void) S390IPLState *ipl = get_ipl_device(); ipl->reipl_requested = true; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } void s390_ipl_prepare_cpu(S390CPU *cpu) diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c new file mode 100644 index 0000000..8614dda --- /dev/null +++ b/hw/s390x/s390-ccw.c @@ -0,0 +1,153 @@ +/* + * s390 CCW Assignment Support + * + * Copyright 2017 IBM Corp + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + * Pierre Morel <pmorel@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 + * or (at your option) any later version. See the COPYING file in the + * top-level directory. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/sysbus.h" +#include "libgen.h" +#include "hw/s390x/css.h" +#include "hw/s390x/css-bridge.h" +#include "hw/s390x/s390-ccw.h" + +int s390_ccw_cmd_request(ORB *orb, SCSW *scsw, void *data) +{ + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(data); + + if (cdc->handle_request) { + return cdc->handle_request(orb, scsw, data); + } else { + return -ENOSYS; + } +} + +static void s390_ccw_get_dev_info(S390CCWDevice *cdev, + char *sysfsdev, + Error **errp) +{ + unsigned int cssid, ssid, devid; + char dev_path[PATH_MAX] = {0}, *tmp; + + if (!sysfsdev) { + error_setg(errp, "No host device provided"); + error_append_hint(errp, + "Use -device vfio-ccw,sysfsdev=PATH_TO_DEVICE\n"); + return; + } + + if (!realpath(sysfsdev, dev_path)) { + error_setg_errno(errp, errno, "Host device '%s' not found", sysfsdev); + return; + } + + cdev->mdevid = g_strdup(basename(dev_path)); + + tmp = basename(dirname(dev_path)); + if (sscanf(tmp, "%2x.%1x.%4x", &cssid, &ssid, &devid) != 3) { + error_setg_errno(errp, errno, "Failed to read %s", tmp); + return; + } + + cdev->hostid.cssid = cssid; + cdev->hostid.ssid = ssid; + cdev->hostid.devid = devid; + cdev->hostid.valid = true; +} + +static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp) +{ + CcwDevice *ccw_dev = CCW_DEVICE(cdev); + CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev); + DeviceState *parent = DEVICE(ccw_dev); + BusState *qbus = qdev_get_parent_bus(parent); + VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus); + SubchDev *sch; + int ret; + Error *err = NULL; + + s390_ccw_get_dev_info(cdev, sysfsdev, &err); + if (err) { + goto out_err_propagate; + } + + sch = css_create_sch(ccw_dev->devno, false, cbus->squash_mcss, &err); + if (!sch) { + goto out_mdevid_free; + } + sch->driver_data = cdev; + sch->do_subchannel_work = do_subchannel_work_passthrough; + + ccw_dev->sch = sch; + ret = css_sch_build_schib(sch, &cdev->hostid); + if (ret) { + error_setg_errno(&err, -ret, "%s: Failed to build initial schib", + __func__); + goto out_err; + } + + ck->realize(ccw_dev, &err); + if (err) { + goto out_err; + } + + css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid, + parent->hotplugged, 1); + return; + +out_err: + css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); + ccw_dev->sch = NULL; + g_free(sch); +out_mdevid_free: + g_free(cdev->mdevid); +out_err_propagate: + error_propagate(errp, err); +} + +static void s390_ccw_unrealize(S390CCWDevice *cdev, Error **errp) +{ + CcwDevice *ccw_dev = CCW_DEVICE(cdev); + SubchDev *sch = ccw_dev->sch; + + if (sch) { + css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); + g_free(sch); + ccw_dev->sch = NULL; + } + + g_free(cdev->mdevid); +} + +static void s390_ccw_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); + + dc->bus_type = TYPE_VIRTUAL_CSS_BUS; + cdc->realize = s390_ccw_realize; + cdc->unrealize = s390_ccw_unrealize; +} + +static const TypeInfo s390_ccw_info = { + .name = TYPE_S390_CCW, + .parent = TYPE_CCW_DEVICE, + .instance_size = sizeof(S390CCWDevice), + .class_size = sizeof(S390CCWDeviceClass), + .class_init = s390_ccw_class_init, + .abstract = true, +}; + +static void register_s390_ccw_type(void) +{ + type_register_static(&s390_ccw_info); +} + +type_init(register_s390_ccw_type) diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 66a6fbeb..5651483 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -357,7 +357,7 @@ out: } static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *mr, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { uint64_t pte; uint32_t flags; diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index 314a9cb..8bc7c98 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -624,7 +624,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) mr = &iommu->iommu_mr; while (start < end) { - entry = mr->iommu_ops->translate(mr, start, 0); + entry = mr->iommu_ops->translate(mr, start, IOMMU_NONE); if (!entry.translated_addr) { pbdev->state = ZPCI_FS_ERROR; diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index fdd4384..c9021f2 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -136,10 +136,15 @@ static void ccw_init(MachineState *machine) kvm_s390_enable_css_support(s390_cpu_addr2state(0)); } /* - * Create virtual css and set it as default so that non mcss-e - * enabled guests only see virtio devices. + * Non mcss-e enabled guests only see the devices from the default + * css, which is determined by the value of the squash_mcss property. + * Note: we must not squash non virtual devices to css 0xFE. */ - ret = css_create_css_image(VIRTUAL_CSSID, true); + if (css_bus->squash_mcss) { + ret = css_create_css_image(0, true); + } else { + ret = css_create_css_image(VIRTUAL_CSSID, true); + } assert(ret == 0); /* Create VirtIO network adapters */ @@ -303,6 +308,20 @@ static void machine_set_loadparm(Object *obj, const char *val, Error **errp) ms->loadparm[i] = ' '; /* pad right with spaces */ } } +static inline bool machine_get_squash_mcss(Object *obj, Error **errp) +{ + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); + + return ms->s390_squash_mcss; +} + +static inline void machine_set_squash_mcss(Object *obj, bool value, + Error **errp) +{ + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); + + ms->s390_squash_mcss = value; +} static inline void s390_machine_initfn(Object *obj) { @@ -328,6 +347,13 @@ static inline void s390_machine_initfn(Object *obj) " to upper case) to pass to machine loader, boot manager," " and guest kernel", NULL); + object_property_add_bool(obj, "s390-squash-mcss", + machine_get_squash_mcss, + machine_set_squash_mcss, NULL); + object_property_set_description(obj, "s390-squash-mcss", + "enable/disable squashing subchannels into the default css", + NULL); + object_property_set_bool(obj, false, "s390-squash-mcss", NULL); } static const TypeInfo ccw_machine_info = { diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index e7167e3..e6a6f74 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -680,9 +680,13 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_GET_CLASS(dev); CcwDevice *ccw_dev = CCW_DEVICE(dev); CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev); - SubchDev *sch = css_create_virtual_sch(ccw_dev->devno, errp); + DeviceState *parent = DEVICE(ccw_dev); + BusState *qbus = qdev_get_parent_bus(parent); + VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus); + SubchDev *sch; Error *err = NULL; + sch = css_create_sch(ccw_dev->devno, true, cbus->squash_mcss, errp); if (!sch) { return; } @@ -697,6 +701,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) sch->disable_cb = virtio_sch_disable_cb; sch->id.reserved = 0xff; sch->id.cu_type = VIRTIO_CCW_CU_TYPE; + sch->do_subchannel_work = do_subchannel_work_virtual; ccw_dev->sch = sch; dev->indicators = NULL; dev->revision = -1; diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c index 8f520ce..e6fc74e 100644 --- a/hw/sh4/r2d.c +++ b/hw/sh4/r2d.c @@ -164,7 +164,7 @@ r2d_fpga_write(void *opaque, hwaddr addr, uint64_t value, unsigned int size) break; case PA_POWOFF: if (value & 1) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } break; case PA_VERREG: diff --git a/hw/timer/etraxfs_timer.c b/hw/timer/etraxfs_timer.c index 8e18236..d13bc30 100644 --- a/hw/timer/etraxfs_timer.c +++ b/hw/timer/etraxfs_timer.c @@ -207,7 +207,7 @@ static void watchdog_hit(void *opaque) qemu_irq_raise(t->nmi); } else - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); t->wd_hits++; } diff --git a/hw/timer/m48t59.c b/hw/timer/m48t59.c index 474981a..4a064fb 100644 --- a/hw/timer/m48t59.c +++ b/hw/timer/m48t59.c @@ -1,7 +1,7 @@ /* * QEMU M48T59 and M48T08 NVRAM emulation for PPC PREP and Sparc platforms * - * Copyright (c) 2003-2005, 2007 Jocelyn Mayer + * Copyright (c) 2003-2005, 2007, 2017 Jocelyn Mayer * Copyright (c) 2013 Hervé Poussineau * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -159,7 +159,7 @@ static void watchdog_cb (void *opaque) NVRAM->buffer[0x1FF7] = 0x00; NVRAM->buffer[0x1FFC] &= ~0x40; /* May it be a hw CPU Reset instead ? */ - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else { qemu_set_irq(NVRAM->IRQ, 1); qemu_set_irq(NVRAM->IRQ, 0); diff --git a/hw/timer/milkymist-sysctl.c b/hw/timer/milkymist-sysctl.c index 4488590..93bc6e17 100644 --- a/hw/timer/milkymist-sysctl.c +++ b/hw/timer/milkymist-sysctl.c @@ -90,7 +90,7 @@ static void sysctl_icap_write(MilkymistSysctlState *s, uint32_t value) trace_milkymist_sysctl_icap_write(value); switch (value & 0xffff) { case 0x000e: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); break; } } @@ -195,7 +195,7 @@ static void sysctl_write(void *opaque, hwaddr addr, uint64_t value, s->regs[addr] = 1; break; case R_SYSTEM_ID: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case R_GPIO_IN: diff --git a/hw/timer/pxa2xx_timer.c b/hw/timer/pxa2xx_timer.c index 59002b4..68ba5a7 100644 --- a/hw/timer/pxa2xx_timer.c +++ b/hw/timer/pxa2xx_timer.c @@ -401,7 +401,7 @@ static void pxa2xx_timer_tick(void *opaque) if (t->num == 3) if (i->reset3 & 1) { i->reset3 = 0; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs index 5958be8..97f1c456 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs @@ -8,6 +8,7 @@ common-obj-$(CONFIG_USB_OHCI) += hcd-ohci.o common-obj-$(CONFIG_USB_EHCI) += hcd-ehci.o hcd-ehci-pci.o common-obj-$(CONFIG_USB_EHCI_SYSBUS) += hcd-ehci-sysbus.o common-obj-$(CONFIG_USB_XHCI) += hcd-xhci.o +common-obj-$(CONFIG_USB_XHCI_NEC) += hcd-xhci-nec.o common-obj-$(CONFIG_USB_MUSB) += hcd-musb.o obj-$(CONFIG_TUSB6010) += tusb6010.o diff --git a/hw/usb/core.c b/hw/usb/core.c index 45fa00c..241ae66 100644 --- a/hw/usb/core.c +++ b/hw/usb/core.c @@ -98,6 +98,14 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) USBDevice *dev = ep->dev; USBBus *bus = usb_bus_from_device(dev); + if (!qdev_hotplug) { + /* + * This is machine init cold plug. No need to wakeup anyone, + * all devices will be reset anyway. And trying to wakeup can + * cause problems due to hitting uninitialized devices. + */ + return; + } if (dev->remote_wakeup && dev->port && dev->port->ops->wakeup) { dev->port->ops->wakeup(dev->port); } diff --git a/hw/usb/dev-hub.c b/hw/usb/dev-hub.c index 47b7519..e82a6a6 100644 --- a/hw/usb/dev-hub.c +++ b/hw/usb/dev-hub.c @@ -402,7 +402,20 @@ static void usb_hub_handle_control(USBDevice *dev, USBPacket *p, port->wPortChange &= ~PORT_STAT_C_ENABLE; break; case PORT_SUSPEND: - port->wPortStatus &= ~PORT_STAT_SUSPEND; + if (port->wPortStatus & PORT_STAT_SUSPEND) { + port->wPortStatus &= ~PORT_STAT_SUSPEND; + + /* + * USB Spec rev2.0 11.24.2.7.2.3 C_PORT_SUSPEND + * "This bit is set on the following transitions: + * - On transition from the Resuming state to the + * SendEOP [sic] state" + * + * Note that this includes both remote wake-up and + * explicit ClearPortFeature(PORT_SUSPEND). + */ + port->wPortChange |= PORT_STAT_C_SUSPEND; + } break; case PORT_C_SUSPEND: port->wPortChange &= ~PORT_STAT_C_SUSPEND; diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c index 83a4f0e..76ceca1 100644 --- a/hw/usb/dev-serial.c +++ b/hw/usb/dev-serial.c @@ -516,27 +516,16 @@ static USBDevice *usb_serial_init(USBBus *bus, const char *filename) char label[32]; static int index; - while (*filename && *filename != ':') { - const char *p; - - if (strstart(filename, "vendorid=", &p)) { - error_report("vendorid is not supported anymore"); - return NULL; - } else if (strstart(filename, "productid=", &p)) { - error_report("productid is not supported anymore"); - return NULL; - } else { - error_report("unrecognized serial USB option %s", filename); - return NULL; - } - while(*filename == ',') - filename++; + if (*filename == ':') { + filename++; + } else if (*filename) { + error_report("unrecognized serial USB option %s", filename); + return NULL; } if (!*filename) { error_report("character device specification needed"); return NULL; } - filename++; snprintf(label, sizeof(label), "usbserial%d", index++); cdrv = qemu_chr_new(label, filename); diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index 50ef817..17c572c 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -2232,13 +2232,13 @@ static void ehci_update_frindex(EHCIState *ehci, int uframes) ehci->frindex = (ehci->frindex + uframes) % 0x4000; } -static void ehci_frame_timer(void *opaque) +static void ehci_work_bh(void *opaque) { EHCIState *ehci = opaque; int need_timer = 0; int64_t expire_time, t_now; uint64_t ns_elapsed; - int uframes, skipped_uframes; + uint64_t uframes, skipped_uframes; int i; t_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); @@ -2324,6 +2324,13 @@ static void ehci_frame_timer(void *opaque) } } +static void ehci_work_timer(void *opaque) +{ + EHCIState *ehci = opaque; + + qemu_bh_schedule(ehci->async_bh); +} + static const MemoryRegionOps ehci_mmio_caps_ops = { .read = ehci_caps_read, .write = ehci_caps_write, @@ -2478,8 +2485,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) s->ports[i].dev = 0; } - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_frame_timer, s); - s->async_bh = qemu_bh_new(ehci_frame_timer, s); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); + s->async_bh = qemu_bh_new(ehci_work_bh, s); s->device = dev; s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); diff --git a/hw/usb/hcd-xhci-nec.c b/hw/usb/hcd-xhci-nec.c new file mode 100644 index 0000000..75715a0 --- /dev/null +++ b/hw/usb/hcd-xhci-nec.c @@ -0,0 +1,63 @@ +/* + * USB xHCI controller emulation + * + * Copyright (c) 2011 Securiforest + * Date: 2011-05-11 ; Author: Hector Martin <hector@marcansoft.com> + * Based on usb-ohci.c, emulates Renesas NEC USB 3.0 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "hw/usb.h" +#include "hw/pci/pci.h" + +#include "hcd-xhci.h" + +static Property nec_xhci_properties[] = { + DEFINE_PROP_ON_OFF_AUTO("msi", XHCIState, msi, ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO("msix", XHCIState, msix, ON_OFF_AUTO_AUTO), + DEFINE_PROP_BIT("superspeed-ports-first", + XHCIState, flags, XHCI_FLAG_SS_FIRST, true), + DEFINE_PROP_BIT("force-pcie-endcap", XHCIState, flags, + XHCI_FLAG_FORCE_PCIE_ENDCAP, false), + DEFINE_PROP_UINT32("intrs", XHCIState, numintrs, MAXINTRS), + DEFINE_PROP_UINT32("slots", XHCIState, numslots, MAXSLOTS), + DEFINE_PROP_END_OF_LIST(), +}; + +static void nec_xhci_class_init(ObjectClass *klass, void *data) +{ + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->props = nec_xhci_properties; + k->vendor_id = PCI_VENDOR_ID_NEC; + k->device_id = PCI_DEVICE_ID_NEC_UPD720200; + k->revision = 0x03; +} + +static const TypeInfo nec_xhci_info = { + .name = TYPE_NEC_XHCI, + .parent = TYPE_XHCI, + .class_init = nec_xhci_class_init, +}; + +static void nec_xhci_register_types(void) +{ + type_register_static(&nec_xhci_info); +} + +type_init(nec_xhci_register_types) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 77d8e11..a0c7960 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -29,6 +29,8 @@ #include "trace.h" #include "qapi/error.h" +#include "hcd-xhci.h" + //#define DEBUG_XHCI //#define DEBUG_DATA @@ -40,16 +42,6 @@ #define FIXME(_msg) do { fprintf(stderr, "FIXME %s:%d %s\n", \ __func__, __LINE__, _msg); abort(); } while (0) -#define MAXPORTS_2 15 -#define MAXPORTS_3 15 - -#define MAXPORTS (MAXPORTS_2+MAXPORTS_3) -#define MAXSLOTS 64 -#define MAXINTRS 16 - -/* Very pessimistic, let's hope it's enough for all cases */ -#define EV_QUEUE (((3 * 24) + 16) * MAXSLOTS) - #define TRB_LINK_LIMIT 32 #define COMMAND_LIMIT 256 #define TRANSFER_LIMIT 256 @@ -164,84 +156,8 @@ enum { PLS_RESUME = 15, }; -typedef enum TRBType { - TRB_RESERVED = 0, - TR_NORMAL, - TR_SETUP, - TR_DATA, - TR_STATUS, - TR_ISOCH, - TR_LINK, - TR_EVDATA, - TR_NOOP, - CR_ENABLE_SLOT, - CR_DISABLE_SLOT, - CR_ADDRESS_DEVICE, - CR_CONFIGURE_ENDPOINT, - CR_EVALUATE_CONTEXT, - CR_RESET_ENDPOINT, - CR_STOP_ENDPOINT, - CR_SET_TR_DEQUEUE, - CR_RESET_DEVICE, - CR_FORCE_EVENT, - CR_NEGOTIATE_BW, - CR_SET_LATENCY_TOLERANCE, - CR_GET_PORT_BANDWIDTH, - CR_FORCE_HEADER, - CR_NOOP, - ER_TRANSFER = 32, - ER_COMMAND_COMPLETE, - ER_PORT_STATUS_CHANGE, - ER_BANDWIDTH_REQUEST, - ER_DOORBELL, - ER_HOST_CONTROLLER, - ER_DEVICE_NOTIFICATION, - ER_MFINDEX_WRAP, - /* vendor specific bits */ - CR_VENDOR_NEC_FIRMWARE_REVISION = 49, - CR_VENDOR_NEC_CHALLENGE_RESPONSE = 50, -} TRBType; - #define CR_LINK TR_LINK -typedef enum TRBCCode { - CC_INVALID = 0, - CC_SUCCESS, - CC_DATA_BUFFER_ERROR, - CC_BABBLE_DETECTED, - CC_USB_TRANSACTION_ERROR, - CC_TRB_ERROR, - CC_STALL_ERROR, - CC_RESOURCE_ERROR, - CC_BANDWIDTH_ERROR, - CC_NO_SLOTS_ERROR, - CC_INVALID_STREAM_TYPE_ERROR, - CC_SLOT_NOT_ENABLED_ERROR, - CC_EP_NOT_ENABLED_ERROR, - CC_SHORT_PACKET, - CC_RING_UNDERRUN, - CC_RING_OVERRUN, - CC_VF_ER_FULL, - CC_PARAMETER_ERROR, - CC_BANDWIDTH_OVERRUN, - CC_CONTEXT_STATE_ERROR, - CC_NO_PING_RESPONSE_ERROR, - CC_EVENT_RING_FULL_ERROR, - CC_INCOMPATIBLE_DEVICE_ERROR, - CC_MISSED_SERVICE_ERROR, - CC_COMMAND_RING_STOPPED, - CC_COMMAND_ABORTED, - CC_STOPPED, - CC_STOPPED_LENGTH_INVALID, - CC_MAX_EXIT_LATENCY_TOO_LARGE_ERROR = 29, - CC_ISOCH_BUFFER_OVERRUN = 31, - CC_EVENT_LOST_ERROR, - CC_UNDEFINED_ERROR, - CC_INVALID_STREAM_ID_ERROR, - CC_SECONDARY_BANDWIDTH_ERROR, - CC_SPLIT_TRANSACTION_ERROR -} TRBCCode; - #define TRB_C (1<<0) #define TRB_TYPE_SHIFT 10 #define TRB_TYPE_MASK 0x3f @@ -301,10 +217,6 @@ typedef enum TRBCCode { #define SLOT_CONTEXT_ENTRIES_MASK 0x1f #define SLOT_CONTEXT_ENTRIES_SHIFT 27 -typedef struct XHCIState XHCIState; -typedef struct XHCIStreamContext XHCIStreamContext; -typedef struct XHCIEPContext XHCIEPContext; - #define get_field(data, field) \ (((data) >> field##_SHIFT) & field##_MASK) @@ -326,21 +238,6 @@ typedef enum EPType { ET_INTR_IN, } EPType; -typedef struct XHCIRing { - dma_addr_t dequeue; - bool ccs; -} XHCIRing; - -typedef struct XHCIPort { - XHCIState *xhci; - uint32_t portsc; - uint32_t portnr; - USBPort *uport; - uint32_t speedmask; - char name[16]; - MemoryRegion mem; -} XHCIPort; - typedef struct XHCITransfer { XHCIEPContext *epctx; USBPacket packet; @@ -402,101 +299,6 @@ struct XHCIEPContext { QEMUTimer *kick_timer; }; -typedef struct XHCISlot { - bool enabled; - bool addressed; - dma_addr_t ctx; - USBPort *uport; - XHCIEPContext * eps[31]; -} XHCISlot; - -typedef struct XHCIEvent { - TRBType type; - TRBCCode ccode; - uint64_t ptr; - uint32_t length; - uint32_t flags; - uint8_t slotid; - uint8_t epid; -} XHCIEvent; - -typedef struct XHCIInterrupter { - uint32_t iman; - uint32_t imod; - uint32_t erstsz; - uint32_t erstba_low; - uint32_t erstba_high; - uint32_t erdp_low; - uint32_t erdp_high; - - bool msix_used, er_pcs; - - dma_addr_t er_start; - uint32_t er_size; - unsigned int er_ep_idx; - - /* kept for live migration compat only */ - bool er_full_unused; - XHCIEvent ev_buffer[EV_QUEUE]; - unsigned int ev_buffer_put; - unsigned int ev_buffer_get; - -} XHCIInterrupter; - -struct XHCIState { - /*< private >*/ - PCIDevice parent_obj; - /*< public >*/ - - USBBus bus; - MemoryRegion mem; - MemoryRegion mem_cap; - MemoryRegion mem_oper; - MemoryRegion mem_runtime; - MemoryRegion mem_doorbell; - - /* properties */ - uint32_t numports_2; - uint32_t numports_3; - uint32_t numintrs; - uint32_t numslots; - uint32_t flags; - uint32_t max_pstreams_mask; - OnOffAuto msi; - OnOffAuto msix; - - /* Operational Registers */ - uint32_t usbcmd; - uint32_t usbsts; - uint32_t dnctrl; - uint32_t crcr_low; - uint32_t crcr_high; - uint32_t dcbaap_low; - uint32_t dcbaap_high; - uint32_t config; - - USBPort uports[MAX(MAXPORTS_2, MAXPORTS_3)]; - XHCIPort ports[MAXPORTS]; - XHCISlot slots[MAXSLOTS]; - uint32_t numports; - - /* Runtime Registers */ - int64_t mfindex_start; - QEMUTimer *mfwrap_timer; - XHCIInterrupter intr[MAXINTRS]; - - XHCIRing cmd_ring; - - bool nec_quirks; -}; - -#define TYPE_XHCI "base-xhci" -#define TYPE_NEC_XHCI "nec-usb-xhci" -#define TYPE_QEMU_XHCI "qemu-xhci" - -#define XHCI(obj) \ - OBJECT_CHECK(XHCIState, (obj), TYPE_XHCI) - typedef struct XHCIEvRingSeg { uint32_t addr_low; uint32_t addr_high; @@ -504,12 +306,6 @@ typedef struct XHCIEvRingSeg { uint32_t rsvd; } XHCIEvRingSeg; -enum xhci_flags { - XHCI_FLAG_SS_FIRST = 1, - XHCI_FLAG_FORCE_PCIE_ENDCAP, - XHCI_FLAG_ENABLE_STREAMS, -}; - static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid, unsigned int streamid); static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid); @@ -3843,18 +3639,6 @@ static const VMStateDescription vmstate_xhci = { } }; -static Property nec_xhci_properties[] = { - DEFINE_PROP_ON_OFF_AUTO("msi", XHCIState, msi, ON_OFF_AUTO_AUTO), - DEFINE_PROP_ON_OFF_AUTO("msix", XHCIState, msix, ON_OFF_AUTO_AUTO), - DEFINE_PROP_BIT("superspeed-ports-first", - XHCIState, flags, XHCI_FLAG_SS_FIRST, true), - DEFINE_PROP_BIT("force-pcie-endcap", XHCIState, flags, - XHCI_FLAG_FORCE_PCIE_ENDCAP, false), - DEFINE_PROP_UINT32("intrs", XHCIState, numintrs, MAXINTRS), - DEFINE_PROP_UINT32("slots", XHCIState, numslots, MAXSLOTS), - DEFINE_PROP_END_OF_LIST(), -}; - static Property xhci_properties[] = { DEFINE_PROP_BIT("streams", XHCIState, flags, XHCI_FLAG_ENABLE_STREAMS, true), @@ -3886,23 +3670,6 @@ static const TypeInfo xhci_info = { .abstract = true, }; -static void nec_xhci_class_init(ObjectClass *klass, void *data) -{ - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->props = nec_xhci_properties; - k->vendor_id = PCI_VENDOR_ID_NEC; - k->device_id = PCI_DEVICE_ID_NEC_UPD720200; - k->revision = 0x03; -} - -static const TypeInfo nec_xhci_info = { - .name = TYPE_NEC_XHCI, - .parent = TYPE_XHCI, - .class_init = nec_xhci_class_init, -}; - static void qemu_xhci_class_init(ObjectClass *klass, void *data) { PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); @@ -3933,7 +3700,6 @@ static const TypeInfo qemu_xhci_info = { static void xhci_register_types(void) { type_register_static(&xhci_info); - type_register_static(&nec_xhci_info); type_register_static(&qemu_xhci_info); } diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h new file mode 100644 index 0000000..fc36a4c --- /dev/null +++ b/hw/usb/hcd-xhci.h @@ -0,0 +1,226 @@ +/* + * USB xHCI controller emulation + * + * Copyright (c) 2011 Securiforest + * Date: 2011-05-11 ; Author: Hector Martin <hector@marcansoft.com> + * Based on usb-ohci.c, emulates Renesas NEC USB 3.0 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#define TYPE_XHCI "base-xhci" +#define TYPE_NEC_XHCI "nec-usb-xhci" +#define TYPE_QEMU_XHCI "qemu-xhci" + +#define XHCI(obj) \ + OBJECT_CHECK(XHCIState, (obj), TYPE_XHCI) + +#define MAXPORTS_2 15 +#define MAXPORTS_3 15 + +#define MAXPORTS (MAXPORTS_2 + MAXPORTS_3) +#define MAXSLOTS 64 +#define MAXINTRS 16 + +/* Very pessimistic, let's hope it's enough for all cases */ +#define EV_QUEUE (((3 * 24) + 16) * MAXSLOTS) + +typedef struct XHCIState XHCIState; +typedef struct XHCIStreamContext XHCIStreamContext; +typedef struct XHCIEPContext XHCIEPContext; + +enum xhci_flags { + XHCI_FLAG_SS_FIRST = 1, + XHCI_FLAG_FORCE_PCIE_ENDCAP, + XHCI_FLAG_ENABLE_STREAMS, +}; + +typedef enum TRBType { + TRB_RESERVED = 0, + TR_NORMAL, + TR_SETUP, + TR_DATA, + TR_STATUS, + TR_ISOCH, + TR_LINK, + TR_EVDATA, + TR_NOOP, + CR_ENABLE_SLOT, + CR_DISABLE_SLOT, + CR_ADDRESS_DEVICE, + CR_CONFIGURE_ENDPOINT, + CR_EVALUATE_CONTEXT, + CR_RESET_ENDPOINT, + CR_STOP_ENDPOINT, + CR_SET_TR_DEQUEUE, + CR_RESET_DEVICE, + CR_FORCE_EVENT, + CR_NEGOTIATE_BW, + CR_SET_LATENCY_TOLERANCE, + CR_GET_PORT_BANDWIDTH, + CR_FORCE_HEADER, + CR_NOOP, + ER_TRANSFER = 32, + ER_COMMAND_COMPLETE, + ER_PORT_STATUS_CHANGE, + ER_BANDWIDTH_REQUEST, + ER_DOORBELL, + ER_HOST_CONTROLLER, + ER_DEVICE_NOTIFICATION, + ER_MFINDEX_WRAP, + /* vendor specific bits */ + CR_VENDOR_NEC_FIRMWARE_REVISION = 49, + CR_VENDOR_NEC_CHALLENGE_RESPONSE = 50, +} TRBType; + +typedef enum TRBCCode { + CC_INVALID = 0, + CC_SUCCESS, + CC_DATA_BUFFER_ERROR, + CC_BABBLE_DETECTED, + CC_USB_TRANSACTION_ERROR, + CC_TRB_ERROR, + CC_STALL_ERROR, + CC_RESOURCE_ERROR, + CC_BANDWIDTH_ERROR, + CC_NO_SLOTS_ERROR, + CC_INVALID_STREAM_TYPE_ERROR, + CC_SLOT_NOT_ENABLED_ERROR, + CC_EP_NOT_ENABLED_ERROR, + CC_SHORT_PACKET, + CC_RING_UNDERRUN, + CC_RING_OVERRUN, + CC_VF_ER_FULL, + CC_PARAMETER_ERROR, + CC_BANDWIDTH_OVERRUN, + CC_CONTEXT_STATE_ERROR, + CC_NO_PING_RESPONSE_ERROR, + CC_EVENT_RING_FULL_ERROR, + CC_INCOMPATIBLE_DEVICE_ERROR, + CC_MISSED_SERVICE_ERROR, + CC_COMMAND_RING_STOPPED, + CC_COMMAND_ABORTED, + CC_STOPPED, + CC_STOPPED_LENGTH_INVALID, + CC_MAX_EXIT_LATENCY_TOO_LARGE_ERROR = 29, + CC_ISOCH_BUFFER_OVERRUN = 31, + CC_EVENT_LOST_ERROR, + CC_UNDEFINED_ERROR, + CC_INVALID_STREAM_ID_ERROR, + CC_SECONDARY_BANDWIDTH_ERROR, + CC_SPLIT_TRANSACTION_ERROR +} TRBCCode; + +typedef struct XHCIRing { + dma_addr_t dequeue; + bool ccs; +} XHCIRing; + +typedef struct XHCIPort { + XHCIState *xhci; + uint32_t portsc; + uint32_t portnr; + USBPort *uport; + uint32_t speedmask; + char name[16]; + MemoryRegion mem; +} XHCIPort; + +typedef struct XHCISlot { + bool enabled; + bool addressed; + dma_addr_t ctx; + USBPort *uport; + XHCIEPContext *eps[31]; +} XHCISlot; + +typedef struct XHCIEvent { + TRBType type; + TRBCCode ccode; + uint64_t ptr; + uint32_t length; + uint32_t flags; + uint8_t slotid; + uint8_t epid; +} XHCIEvent; + +typedef struct XHCIInterrupter { + uint32_t iman; + uint32_t imod; + uint32_t erstsz; + uint32_t erstba_low; + uint32_t erstba_high; + uint32_t erdp_low; + uint32_t erdp_high; + + bool msix_used, er_pcs; + + dma_addr_t er_start; + uint32_t er_size; + unsigned int er_ep_idx; + + /* kept for live migration compat only */ + bool er_full_unused; + XHCIEvent ev_buffer[EV_QUEUE]; + unsigned int ev_buffer_put; + unsigned int ev_buffer_get; + +} XHCIInterrupter; + +struct XHCIState { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + USBBus bus; + MemoryRegion mem; + MemoryRegion mem_cap; + MemoryRegion mem_oper; + MemoryRegion mem_runtime; + MemoryRegion mem_doorbell; + + /* properties */ + uint32_t numports_2; + uint32_t numports_3; + uint32_t numintrs; + uint32_t numslots; + uint32_t flags; + uint32_t max_pstreams_mask; + OnOffAuto msi; + OnOffAuto msix; + + /* Operational Registers */ + uint32_t usbcmd; + uint32_t usbsts; + uint32_t dnctrl; + uint32_t crcr_low; + uint32_t crcr_high; + uint32_t dcbaap_low; + uint32_t dcbaap_high; + uint32_t config; + + USBPort uports[MAX(MAXPORTS_2, MAXPORTS_3)]; + XHCIPort ports[MAXPORTS]; + XHCISlot slots[MAXSLOTS]; + uint32_t numports; + + /* Runtime Registers */ + int64_t mfindex_start; + QEMUTimer *mfwrap_timer; + XHCIInterrupter intr[MAXINTRS]; + + XHCIRing cmd_ring; + + bool nec_quirks; +}; diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs index 05e7fbb..c3ab909 100644 --- a/hw/vfio/Makefile.objs +++ b/hw/vfio/Makefile.objs @@ -1,6 +1,7 @@ ifeq ($(CONFIG_LINUX), y) obj-$(CONFIG_SOFTMMU) += common.o obj-$(CONFIG_PCI) += pci.o pci-quirks.o +obj-$(CONFIG_VFIO_CCW) += ccw.o obj-$(CONFIG_SOFTMMU) += platform.o obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c new file mode 100644 index 0000000..12d0262 --- /dev/null +++ b/hw/vfio/ccw.c @@ -0,0 +1,434 @@ +/* + * vfio based subchannel assignment support + * + * Copyright 2017 IBM Corp. + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + * Pierre Morel <pmorel@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or(at + * your option) any version. See the COPYING file in the top-level + * directory. + */ + +#include <linux/vfio.h> +#include <linux/vfio_ccw.h> +#include <sys/ioctl.h> + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/sysbus.h" +#include "hw/vfio/vfio.h" +#include "hw/vfio/vfio-common.h" +#include "hw/s390x/s390-ccw.h" +#include "hw/s390x/ccw-device.h" +#include "qemu/error-report.h" + +#define TYPE_VFIO_CCW "vfio-ccw" +typedef struct VFIOCCWDevice { + S390CCWDevice cdev; + VFIODevice vdev; + uint64_t io_region_size; + uint64_t io_region_offset; + struct ccw_io_region *io_region; + EventNotifier io_notifier; +} VFIOCCWDevice; + +static void vfio_ccw_compute_needs_reset(VFIODevice *vdev) +{ + vdev->needs_reset = false; +} + +/* + * We don't need vfio_hot_reset_multi and vfio_eoi operations for + * vfio_ccw device now. + */ +struct VFIODeviceOps vfio_ccw_ops = { + .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset, +}; + +static int vfio_ccw_handle_request(ORB *orb, SCSW *scsw, void *data) +{ + S390CCWDevice *cdev = data; + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + struct ccw_io_region *region = vcdev->io_region; + int ret; + + QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); + QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW)); + QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB)); + + memset(region, 0, sizeof(*region)); + + memcpy(region->orb_area, orb, sizeof(ORB)); + memcpy(region->scsw_area, scsw, sizeof(SCSW)); + +again: + ret = pwrite(vcdev->vdev.fd, region, + vcdev->io_region_size, vcdev->io_region_offset); + if (ret != vcdev->io_region_size) { + if (errno == EAGAIN) { + goto again; + } + error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno); + return -errno; + } + + return region->ret_code; +} + +static void vfio_ccw_reset(DeviceState *dev) +{ + CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); + S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + + ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); +} + +static void vfio_ccw_io_notifier_handler(void *opaque) +{ + VFIOCCWDevice *vcdev = opaque; + struct ccw_io_region *region = vcdev->io_region; + S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev); + CcwDevice *ccw_dev = CCW_DEVICE(cdev); + SubchDev *sch = ccw_dev->sch; + SCSW *s = &sch->curr_status.scsw; + PMCW *p = &sch->curr_status.pmcw; + IRB irb; + int size; + + if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { + return; + } + + size = pread(vcdev->vdev.fd, region, vcdev->io_region_size, + vcdev->io_region_offset); + if (size == -1) { + switch (errno) { + case ENODEV: + /* Generate a deferred cc 3 condition. */ + s->flags |= SCSW_FLAGS_MASK_CC; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND); + goto read_err; + case EFAULT: + /* Memory problem, generate channel data check. */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->cstat = SCSW_CSTAT_DATA_CHECK; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + goto read_err; + default: + /* Error, generate channel program check. */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->cstat = SCSW_CSTAT_PROG_CHECK; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + goto read_err; + } + } else if (size != vcdev->io_region_size) { + /* Information transfer error, generate channel-control check. */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->cstat = SCSW_CSTAT_CHN_CTRL_CHK; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + goto read_err; + } + + memcpy(&irb, region->irb_area, sizeof(IRB)); + + /* Update control block via irb. */ + copy_scsw_to_guest(s, &irb.scsw); + + /* If a uint check is pending, copy sense data. */ + if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) && + (p->chars & PMCW_CHARS_MASK_CSENSE)) { + memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw)); + } + +read_err: + css_inject_io_interrupt(sch); +} + +static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) +{ + VFIODevice *vdev = &vcdev->vdev; + struct vfio_irq_info *irq_info; + struct vfio_irq_set *irq_set; + size_t argsz; + int32_t *pfd; + + if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { + error_setg(errp, "vfio: unexpected number of io irqs %u", + vdev->num_irqs); + return; + } + + argsz = sizeof(*irq_set); + irq_info = g_malloc0(argsz); + irq_info->index = VFIO_CCW_IO_IRQ_INDEX; + irq_info->argsz = argsz; + if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, + irq_info) < 0 || irq_info->count < 1) { + error_setg_errno(errp, errno, "vfio: Error getting irq info"); + goto out_free_info; + } + + if (event_notifier_init(&vcdev->io_notifier, 0)) { + error_setg_errno(errp, errno, + "vfio: Unable to init event notifier for IO"); + goto out_free_info; + } + + argsz = sizeof(*irq_set) + sizeof(*pfd); + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | + VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_CCW_IO_IRQ_INDEX; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *) &irq_set->data; + + *pfd = event_notifier_get_fd(&vcdev->io_notifier); + qemu_set_fd_handler(*pfd, vfio_ccw_io_notifier_handler, NULL, vcdev); + if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + error_setg(errp, "vfio: Failed to set up io notification"); + qemu_set_fd_handler(*pfd, NULL, NULL, vcdev); + event_notifier_cleanup(&vcdev->io_notifier); + } + + g_free(irq_set); + +out_free_info: + g_free(irq_info); +} + +static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) +{ + struct vfio_irq_set *irq_set; + size_t argsz; + int32_t *pfd; + + argsz = sizeof(*irq_set) + sizeof(*pfd); + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | + VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_CCW_IO_IRQ_INDEX; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *) &irq_set->data; + *pfd = -1; + + if (ioctl(vcdev->vdev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + error_report("vfio: Failed to de-assign device io fd: %m"); + } + + qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), + NULL, NULL, vcdev); + event_notifier_cleanup(&vcdev->io_notifier); + + g_free(irq_set); +} + +static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) +{ + VFIODevice *vdev = &vcdev->vdev; + struct vfio_region_info *info; + int ret; + + /* Sanity check device */ + if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) { + error_setg(errp, "vfio: Um, this isn't a vfio-ccw device"); + return; + } + + if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) { + error_setg(errp, "vfio: Unexpected number of the I/O region %u", + vdev->num_regions); + return; + } + + ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info); + if (ret) { + error_setg_errno(errp, -ret, "vfio: Error getting config info"); + return; + } + + vcdev->io_region_size = info->size; + if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { + error_setg(errp, "vfio: Unexpected size of the I/O region"); + g_free(info); + return; + } + + vcdev->io_region_offset = info->offset; + vcdev->io_region = g_malloc0(info->size); + + g_free(info); +} + +static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) +{ + g_free(vcdev->io_region); +} + +static void vfio_put_device(VFIOCCWDevice *vcdev) +{ + g_free(vcdev->vdev.name); + vfio_put_base_device(&vcdev->vdev); +} + +static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp) +{ + char *tmp, group_path[PATH_MAX]; + ssize_t len; + int groupid; + + tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group", + cdev->hostid.cssid, cdev->hostid.ssid, + cdev->hostid.devid, cdev->mdevid); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); + + if (len <= 0 || len >= sizeof(group_path)) { + error_setg(errp, "vfio: no iommu_group found"); + return NULL; + } + + group_path[len] = 0; + + if (sscanf(basename(group_path), "%d", &groupid) != 1) { + error_setg(errp, "vfio: failed to read %s", group_path); + return NULL; + } + + return vfio_get_group(groupid, &address_space_memory, errp); +} + +static void vfio_ccw_realize(DeviceState *dev, Error **errp) +{ + VFIODevice *vbasedev; + VFIOGroup *group; + CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); + S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); + Error *err = NULL; + + /* Call the class init function for subchannel. */ + if (cdc->realize) { + cdc->realize(cdev, vcdev->vdev.sysfsdev, &err); + if (err) { + goto out_err_propagate; + } + } + + group = vfio_ccw_get_group(cdev, &err); + if (!group) { + goto out_group_err; + } + + vcdev->vdev.ops = &vfio_ccw_ops; + vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW; + vcdev->vdev.name = g_strdup_printf("%x.%x.%04x", cdev->hostid.cssid, + cdev->hostid.ssid, cdev->hostid.devid); + QLIST_FOREACH(vbasedev, &group->device_list, next) { + if (strcmp(vbasedev->name, vcdev->vdev.name) == 0) { + error_setg(&err, "vfio: subchannel %s has already been attached", + vcdev->vdev.name); + goto out_device_err; + } + } + + if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, &err)) { + goto out_device_err; + } + + vfio_ccw_get_region(vcdev, &err); + if (err) { + goto out_region_err; + } + + vfio_ccw_register_io_notifier(vcdev, &err); + if (err) { + goto out_notifier_err; + } + + return; + +out_notifier_err: + vfio_ccw_put_region(vcdev); +out_region_err: + vfio_put_device(vcdev); +out_device_err: + vfio_put_group(group); +out_group_err: + if (cdc->unrealize) { + cdc->unrealize(cdev, NULL); + } +out_err_propagate: + error_propagate(errp, err); +} + +static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) +{ + CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); + S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); + VFIOGroup *group = vcdev->vdev.group; + + vfio_ccw_unregister_io_notifier(vcdev); + vfio_ccw_put_region(vcdev); + vfio_put_device(vcdev); + vfio_put_group(group); + + if (cdc->unrealize) { + cdc->unrealize(cdev, errp); + } +} + +static Property vfio_ccw_properties[] = { + DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vfio_ccw_vmstate = { + .name = TYPE_VFIO_CCW, + .unmigratable = 1, +}; + +static void vfio_ccw_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); + + dc->props = vfio_ccw_properties; + dc->vmsd = &vfio_ccw_vmstate; + dc->desc = "VFIO-based subchannel assignment"; + dc->realize = vfio_ccw_realize; + dc->unrealize = vfio_ccw_unrealize; + dc->reset = vfio_ccw_reset; + + cdc->handle_request = vfio_ccw_handle_request; +} + +static const TypeInfo vfio_ccw_info = { + .name = TYPE_VFIO_CCW, + .parent = TYPE_S390_CCW, + .instance_size = sizeof(VFIOCCWDevice), + .class_init = vfio_ccw_class_init, +}; + +static void register_vfio_ccw_type(void) +{ + type_register_static(&vfio_ccw_info); +} + +type_init(register_vfio_ccw_type) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index a8f12ee..b9abe77 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -502,7 +502,7 @@ static void vfio_listener_region_add(MemoryListener *listener, QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); - memory_region_iommu_replay(giommu->iommu, &giommu->n, false); + memory_region_iommu_replay(giommu->iommu, &giommu->n); return; } diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index b87a176..dde094a 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -162,11 +162,11 @@ fail: } static int process_message_reply(struct vhost_dev *dev, - VhostUserMsg msg) + const VhostUserMsg *msg) { VhostUserMsg msg_reply; - if ((msg.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { + if ((msg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { return 0; } @@ -174,10 +174,10 @@ static int process_message_reply(struct vhost_dev *dev, return -1; } - if (msg_reply.request != msg.request) { + if (msg_reply.request != msg->request) { error_report("Received unexpected msg type." "Expected %d received %d", - msg.request, msg_reply.request); + msg->request, msg_reply.request); return -1; } @@ -324,7 +324,7 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, } if (reply_supported) { - return process_message_reply(dev, msg); + return process_message_reply(dev, &msg); } return 0; @@ -716,7 +716,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) /* If reply_ack supported, slave has to ack specified MTU is valid */ if (reply_supported) { - return process_message_reply(dev, msg); + return process_message_reply(dev, &msg); } return 0; diff --git a/hw/watchdog/watchdog.c b/hw/watchdog/watchdog.c index 2aeaf1f..0c5c9cd 100644 --- a/hw/watchdog/watchdog.c +++ b/hw/watchdog/watchdog.c @@ -110,7 +110,7 @@ void watchdog_perform_action(void) switch (watchdog_action) { case WDT_RESET: /* same as 'system_reset' in monitor */ qapi_event_send_watchdog(WATCHDOG_EXPIRATION_ACTION_RESET, &error_abort); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case WDT_SHUTDOWN: /* same as 'system_powerdown' in monitor */ diff --git a/hw/xenpv/xen_domainbuild.c b/hw/xenpv/xen_domainbuild.c index 457a897..c89ced2 100644 --- a/hw/xenpv/xen_domainbuild.c +++ b/hw/xenpv/xen_domainbuild.c @@ -148,7 +148,7 @@ static void xen_domain_poll(void *opaque) return; quit: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } static int xen_domain_watcher(void) diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c index 11176e2..4636f8e9 100644 --- a/hw/xtensa/xtfpga.c +++ b/hw/xtensa/xtfpga.c @@ -100,7 +100,7 @@ static void lx60_fpga_write(void *opaque, hwaddr addr, case 0x10: /*board reset*/ if (val == 0xdead) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; } diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 9e906f7..09c7c69 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -235,14 +235,6 @@ void block_job_complete(BlockJob *job, Error **errp); BlockJobInfo *block_job_query(BlockJob *job, Error **errp); /** - * block_job_pause: - * @job: The job to be paused. - * - * Asynchronously pause the specified job. - */ -void block_job_pause(BlockJob *job); - -/** * block_job_user_pause: * @job: The job to be paused. * @@ -260,14 +252,6 @@ void block_job_user_pause(BlockJob *job); bool block_job_user_paused(BlockJob *job); /** - * block_job_resume: - * @job: The job to be resumed. - * - * Resume the specified job. Must be paired with a preceding block_job_pause. - */ -void block_job_resume(BlockJob *job); - -/** * block_job_user_resume: * @job: The job to be resumed. * diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 3f86cc5..f13ad05 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -44,9 +44,6 @@ struct BlockJobDriver { /** Optional callback for job types that support setting a speed limit */ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); - /** Optional callback for job types that need to forward I/O status reset */ - void (*iostatus_reset)(BlockJob *job); - /** Mandatory: Entrypoint for the Coroutine. */ CoroutineEntry *start; @@ -159,21 +156,26 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); void block_job_yield(BlockJob *job); /** - * block_job_ref: - * @bs: The block device. + * block_job_pause_all: + * + * Asynchronously pause all jobs. + */ +void block_job_pause_all(void); + +/** + * block_job_resume_all: * - * Grab a reference to the block job. Should be paired with block_job_unref. + * Resume all block jobs. Must be paired with a preceding block_job_pause_all. */ -void block_job_ref(BlockJob *job); +void block_job_resume_all(void); /** - * block_job_unref: + * block_job_early_fail: * @bs: The block device. * - * Release reference to the block job and release resources if it is the last - * reference. + * The block job could not be started, free it. */ -void block_job_unref(BlockJob *job); +void block_job_early_fail(BlockJob *job); /** * block_job_completed: @@ -239,7 +241,8 @@ typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque); * @fn: The function to run in the main loop * @opaque: The opaque value that is passed to @fn * - * Execute a given function in the main loop with the BlockDriverState + * This function must be called by the main job coroutine just before it + * returns. @fn is executed in the main loop with the BlockDriverState * AioContext acquired. Block jobs must call bdrv_unref(), bdrv_close(), and * anything that uses bdrv_drain_all() in the main loop. * diff --git a/include/exec/memory.h b/include/exec/memory.h index 99e0f54..bfdc685 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -185,8 +185,14 @@ struct MemoryRegionOps { typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps; struct MemoryRegionIOMMUOps { - /* Return a TLB entry that contains a given address. */ - IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write); + /* + * Return a TLB entry that contains a given address. Flag should + * be the access permission of this translation operation. We can + * set flag to IOMMU_NONE to mean that we don't need any + * read/write permission checks, like, when for region replay. + */ + IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, + IOMMUAccessFlags flag); /* Returns minimum supported page size */ uint64_t (*get_min_page_size)(MemoryRegion *iommu); /* Called when IOMMU Notifier flag changed */ @@ -725,11 +731,8 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr, * * @mr: the memory region to observe * @n: the notifier to which to replay iommu mappings - * @is_write: Whether to treat the replay as a translate "write" - * through the iommu */ -void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, - bool is_write); +void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n); /** * memory_region_iommu_replay_all: replay existing IOMMU translations diff --git a/include/hw/compat.h b/include/hw/compat.h index 55b1765..400c64b 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -6,6 +6,14 @@ .driver = "pci-bridge",\ .property = "shpc",\ .value = "off",\ + },{\ + .driver = "intel-iommu",\ + .property = "pt",\ + .value = "off",\ + },{\ + .driver = "virtio-net-device",\ + .property = "x-mtu-bypass-backend",\ + .value = "off",\ }, #define HW_COMPAT_2_8 \ diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index 361c07c..ef89c0c 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -74,6 +74,7 @@ struct X86IOMMUState { SysBusDevice busdev; bool intr_supported; /* Whether vIOMMU supports IR */ bool dt_supported; /* Whether vIOMMU supports DT */ + bool pt_supported; /* Whether vIOMMU supports pass-through */ IommuType type; /* IOMMU type - AMD/Intel */ QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */ }; diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 1c2e970..38470b2 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -123,6 +123,9 @@ sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid); PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, uint32_t config_addr); +/* PCI release callback. */ +void spapr_phb_remove_pci_device_cb(DeviceState *dev); + /* VFIO EEH hooks */ #ifdef CONFIG_LINUX bool spapr_phb_eeh_available(sPAPRPHBState *sphb); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 5802f88..98fb78b 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -32,6 +32,7 @@ struct sPAPRRTCState { int64_t ns_offset; }; +typedef struct sPAPRDIMMState sPAPRDIMMState; typedef struct sPAPRMachineClass sPAPRMachineClass; #define TYPE_SPAPR_MACHINE "spapr-machine" @@ -104,6 +105,11 @@ struct sPAPRMachineState { /* RTAS state */ QTAILQ_HEAD(, sPAPRConfigureConnectorState) ccs_list; + /* Pending DIMM unplug cache. It is populated when a LMB + * unplug starts. It can be regenerated if a migration + * occurs during the unplug process. */ + QTAILQ_HEAD(, sPAPRDIMMState) pending_dimm_unplugs; + /*< public >*/ char *kvm_type; MemoryHotplugState hotplug_memory; @@ -598,7 +604,6 @@ sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn); struct sPAPREventLogEntry { int log_type; - bool exception; void *data; QTAILQ_ENTRY(sPAPREventLogEntry) next; }; @@ -610,6 +615,7 @@ int spapr_h_cas_compose_response(sPAPRMachineState *sm, sPAPROptionVector *ov5_updates); void close_htab_fd(sPAPRMachineState *spapr); void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr); +void spapr_free_hpt(sPAPRMachineState *spapr); sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn); void spapr_tce_table_enable(sPAPRTCETable *tcet, uint32_t page_shift, uint64_t bus_offset, @@ -636,6 +642,10 @@ void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset, sPAPRMachineState *spapr); +/* CPU and LMB DRC release callbacks. */ +void spapr_core_release(DeviceState *dev); +void spapr_lmb_release(DeviceState *dev); + /* rtas-configure-connector state */ struct sPAPRConfigureConnectorState { uint32_t drc_index; diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index 5524247..813b9ff 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -130,8 +130,6 @@ typedef enum { SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE = -9003, } sPAPRDRCCResponse; -typedef void (spapr_drc_detach_cb)(DeviceState *d, void *opaque); - typedef struct sPAPRDRConnector { /*< private >*/ DeviceState parent; @@ -158,8 +156,6 @@ typedef struct sPAPRDRConnector { /* device pointer, via link property */ DeviceState *dev; - spapr_drc_detach_cb *detach_cb; - void *detach_cb_opaque; } sPAPRDRConnector; typedef struct sPAPRDRConnectorClass { @@ -188,9 +184,7 @@ typedef struct sPAPRDRConnectorClass { /* QEMU interfaces for managing hotplug operations */ void (*attach)(sPAPRDRConnector *drc, DeviceState *d, void *fdt, int fdt_start_offset, bool coldplug, Error **errp); - void (*detach)(sPAPRDRConnector *drc, DeviceState *d, - spapr_drc_detach_cb *detach_cb, - void *detach_cb_opaque, Error **errp); + void (*detach)(sPAPRDRConnector *drc, DeviceState *d, Error **errp); bool (*release_pending)(sPAPRDRConnector *drc); void (*set_signalled)(sPAPRDRConnector *drc); } sPAPRDRConnectorClass; diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 05e6acb..a3073f9 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -81,7 +81,6 @@ struct ICPState { uint8_t pending_priority; uint8_t mfrr; qemu_irq output; - bool cap_irq_xics_enabled; XICSFabric *xics; }; @@ -206,6 +205,6 @@ void icp_resend(ICPState *ss); typedef struct sPAPRMachineState sPAPRMachineState; int xics_kvm_init(sPAPRMachineState *spapr, Error **errp); -int xics_spapr_init(sPAPRMachineState *spapr, Error **errp); +void xics_spapr_init(sPAPRMachineState *spapr); #endif /* XICS_H */ diff --git a/include/hw/s390x/css-bridge.h b/include/hw/s390x/css-bridge.h index 5a0203b..cf08604 100644 --- a/include/hw/s390x/css-bridge.h +++ b/include/hw/s390x/css-bridge.h @@ -28,6 +28,7 @@ typedef struct VirtualCssBridge { /* virtual css bus type */ typedef struct VirtualCssBus { BusState parent_obj; + bool squash_mcss; } VirtualCssBus; #define TYPE_VIRTUAL_CSS_BUS "virtual-css-bus" diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h index e61fa74..596a2f2 100644 --- a/include/hw/s390x/css.h +++ b/include/hw/s390x/css.h @@ -91,10 +91,29 @@ struct SubchDev { /* transport-provided data: */ int (*ccw_cb) (SubchDev *, CCW1); void (*disable_cb)(SubchDev *); + int (*do_subchannel_work) (SubchDev *, ORB *); SenseId id; void *driver_data; }; +/* + * Identify a device within the channel subsystem. + * Note that this can be used to identify either the subchannel or + * the attached I/O device, as there's always one I/O device per + * subchannel. + */ +typedef struct CssDevId { + uint8_t cssid; + uint8_t ssid; + uint16_t devid; + bool valid; +} CssDevId; + +extern PropertyInfo css_devid_propinfo; + +#define DEFINE_PROP_CSS_DEV_ID(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, css_devid_propinfo, CssDevId) + typedef struct IndAddr { hwaddr addr; uint64_t map; @@ -116,8 +135,11 @@ bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno); void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid, uint16_t devno, SubchDev *sch); void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type); +int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id); unsigned int css_find_free_chpid(uint8_t cssid); uint16_t css_build_subchannel_id(SubchDev *sch); +void copy_scsw_to_guest(SCSW *dest, const SCSW *src); +void css_inject_io_interrupt(SubchDev *sch); void css_reset(void); void css_reset_sch(SubchDev *sch); void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid); @@ -127,6 +149,9 @@ void css_generate_chp_crws(uint8_t cssid, uint8_t chpid); void css_generate_css_crws(uint8_t cssid); void css_clear_sei_pending(void); void css_adapter_interrupt(uint8_t isc); +int s390_ccw_cmd_request(ORB *orb, SCSW *scsw, void *data); +int do_subchannel_work_virtual(SubchDev *sub, ORB *orb); +int do_subchannel_work_passthrough(SubchDev *sub, ORB *orb); typedef enum { CSS_IO_ADAPTER_VIRTIO = 0, @@ -164,23 +189,6 @@ int css_do_rsch(SubchDev *sch); int css_do_rchp(uint8_t cssid, uint8_t chpid); bool css_present(uint8_t cssid); #endif -/* - * Identify a device within the channel subsystem. - * Note that this can be used to identify either the subchannel or - * the attached I/O device, as there's always one I/O device per - * subchannel. - */ -typedef struct CssDevId { - uint8_t cssid; - uint8_t ssid; - uint16_t devid; - bool valid; -} CssDevId; - -extern PropertyInfo css_devid_propinfo; - -#define DEFINE_PROP_CSS_DEV_ID(_n, _s, _f) \ - DEFINE_PROP(_n, _s, _f, css_devid_propinfo, CssDevId) extern PropertyInfo css_devid_ro_propinfo; @@ -190,16 +198,25 @@ extern PropertyInfo css_devid_ro_propinfo; /** * Create a subchannel for the given bus id. * - * If @p bus_id is valid, verify that it uses the virtual channel - * subsystem id and is not already in use, and find a free subchannel - * id for it. If @p bus_id is not valid, find a free subchannel id and - * device number across all subchannel sets. If either of the former - * actions succeed, allocate a subchannel structure, initialise it - * with the bus id, subchannel id and device number, register it with - * the CSS and return it. Otherwise return NULL. + * If @p bus_id is valid, and @p squash_mcss is true, verify that it is + * not already in use in the default css, and find a free devno from the + * default css image for it. + * If @p bus_id is valid, and @p squash_mcss is false, verify that it is + * not already in use, and find a free devno for it. + * If @p bus_id is not valid, and if either @p squash_mcss or @p is_virtual + * is true, find a free subchannel id and device number across all + * subchannel sets from the default css image. + * If @p bus_id is not valid, and if both @p squash_mcss and @p is_virtual + * are false, find a non-full css image and find a free subchannel id and + * device number across all subchannel sets from it. + * + * If either of the former actions succeed, allocate a subchannel structure, + * initialise it with the bus id, subchannel id and device number, register + * it with the CSS and return it. Otherwise return NULL. * * The caller becomes owner of the returned subchannel structure and * is responsible for unregistering and freeing it. */ -SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp); +SubchDev *css_create_sch(CssDevId bus_id, bool is_virtual, bool squash_mcss, + Error **errp); #endif diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h new file mode 100644 index 0000000..9f45cf1 --- /dev/null +++ b/include/hw/s390x/s390-ccw.h @@ -0,0 +1,39 @@ +/* + * s390 CCW Assignment Support + * + * Copyright 2017 IBM Corp. + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390_CCW_H +#define HW_S390_CCW_H + +#include "hw/s390x/ccw-device.h" + +#define TYPE_S390_CCW "s390-ccw" +#define S390_CCW_DEVICE(obj) \ + OBJECT_CHECK(S390CCWDevice, (obj), TYPE_S390_CCW) +#define S390_CCW_DEVICE_CLASS(klass) \ + OBJECT_CLASS_CHECK(S390CCWDeviceClass, (klass), TYPE_S390_CCW) +#define S390_CCW_DEVICE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(S390CCWDeviceClass, (obj), TYPE_S390_CCW) + +typedef struct S390CCWDevice { + CcwDevice parent_obj; + CssDevId hostid; + char *mdevid; +} S390CCWDevice; + +typedef struct S390CCWDeviceClass { + CCWDeviceClass parent_class; + void (*realize)(S390CCWDevice *dev, char *sysfsdev, Error **errp); + void (*unrealize)(S390CCWDevice *dev, Error **errp); + int (*handle_request) (ORB *, SCSW *, void *); +} S390CCWDeviceClass; + +#endif diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h index 7b8a3e4..3027555f 100644 --- a/include/hw/s390x/s390-virtio-ccw.h +++ b/include/hw/s390x/s390-virtio-ccw.h @@ -29,6 +29,7 @@ typedef struct S390CcwMachineState { bool aes_key_wrap; bool dea_key_wrap; uint8_t loadparm[8]; + bool s390_squash_mcss; } S390CcwMachineState; typedef struct S390CcwMachineClass { diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index c582de1..9521013 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -45,6 +45,7 @@ enum { VFIO_DEVICE_TYPE_PCI = 0, VFIO_DEVICE_TYPE_PLATFORM = 1, + VFIO_DEVICE_TYPE_CCW = 2, }; typedef struct VFIOMmap { diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 1eec9a2..602b486 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -97,6 +97,7 @@ typedef struct VirtIONet { QEMUTimer *announce_timer; int announce_counter; bool needs_vnet_hdr_swap; + bool mtu_bypass_backend; } VirtIONet; void virtio_net_set_netclient_name(VirtIONet *n, const char *name, diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 7b6edba..80c45c3 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -79,6 +79,7 @@ struct VirtIODevice uint16_t queue_sel; uint64_t guest_features; uint64_t host_features; + uint64_t backend_features; size_t config_len; void *config; uint16_t config_vector; diff --git a/include/standard-headers/asm-x86/hyperv.h b/include/standard-headers/asm-x86/hyperv.h index eca9a2c..d0c6e0a 100644 --- a/include/standard-headers/asm-x86/hyperv.h +++ b/include/standard-headers/asm-x86/hyperv.h @@ -124,7 +124,7 @@ * Recommend using hypercall for address space switches rather * than MOV to CR3 instruction */ -#define HV_X64_MWAIT_RECOMMENDED (1 << 0) +#define HV_X64_AS_SWITCH_RECOMMENDED (1 << 0) /* Recommend using hypercall for local TLB flushes rather * than INVLPG or MOV to CR3 instructions */ #define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1) @@ -148,6 +148,11 @@ #define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) /* + * Virtual APIC support + */ +#define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9) + +/* * Crash notification flag. */ #define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index c8b3338..29d463a 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -641,6 +641,7 @@ * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) */ #define KEY_DATA 0x277 +#define KEY_ONSCREEN_KEYBOARD 0x278 #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h index b472b85..666e201 100644 --- a/include/standard-headers/linux/input.h +++ b/include/standard-headers/linux/input.h @@ -58,9 +58,14 @@ struct input_id { * Note that input core does not clamp reported values to the * [minimum, maximum] limits, such task is left to userspace. * - * Resolution for main axes (ABS_X, ABS_Y, ABS_Z) is reported in - * units per millimeter (units/mm), resolution for rotational axes - * (ABS_RX, ABS_RY, ABS_RZ) is reported in units per radian. + * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z) + * is reported in units per millimeter (units/mm), resolution + * for rotational axes (ABS_RX, ABS_RY, ABS_RZ) is reported + * in units per radian. + * When INPUT_PROP_ACCELEROMETER is set the resolution changes. + * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in + * in units per g (units/g) and in units per degree per second + * (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ). */ struct input_absinfo { int32_t value; diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index 634c9c4..d56bb00 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -114,7 +114,7 @@ #define PCI_SUBSYSTEM_ID 0x2e #define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ #define PCI_ROM_ADDRESS_ENABLE 0x01 -#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) +#define PCI_ROM_ADDRESS_MASK (~0x7ffU) #define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ @@ -630,6 +630,7 @@ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ +#define PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK 0x0080 /* Block atomic egress */ #define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */ #define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */ #define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */ diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h index f1c0712..fa14d0e 100644 --- a/include/sysemu/replay.h +++ b/include/sysemu/replay.h @@ -13,6 +13,7 @@ */ #include "qapi-types.h" +#include "sysemu.h" /* replay clock kinds */ enum ReplayClockKind { @@ -98,7 +99,7 @@ int64_t replay_read_clock(ReplayClockKind kind); /* Events */ /*! Called when qemu shutdown is requested. */ -void replay_shutdown_request(void); +void replay_shutdown_request(ShutdownCause cause); /*! Should be called at check points in the execution. These check points are skipped, if they were not met. Saves checkpoint in the SAVE mode and validates in the PLAY mode. diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index ed8fe3b..69046eb 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -33,8 +33,26 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, void qemu_del_vm_change_state_handler(VMChangeStateEntry *e); void vm_state_notify(int running, RunState state); -#define VMRESET_SILENT false -#define VMRESET_REPORT true +/* Enumeration of various causes for shutdown. */ +typedef enum ShutdownCause { + SHUTDOWN_CAUSE_NONE, /* No shutdown request pending */ + SHUTDOWN_CAUSE_HOST_ERROR, /* An error prevents further use of guest */ + SHUTDOWN_CAUSE_HOST_QMP, /* Reaction to a QMP command, like 'quit' */ + SHUTDOWN_CAUSE_HOST_SIGNAL, /* Reaction to a signal, such as SIGINT */ + SHUTDOWN_CAUSE_HOST_UI, /* Reaction to UI event, like window close */ + SHUTDOWN_CAUSE_GUEST_SHUTDOWN,/* Guest shutdown/suspend request, via + ACPI or other hardware-specific means */ + SHUTDOWN_CAUSE_GUEST_RESET, /* Guest reset request, and command line + turns that into a shutdown */ + SHUTDOWN_CAUSE_GUEST_PANIC, /* Guest panicked, and command line turns + that into a shutdown */ + SHUTDOWN_CAUSE__MAX, +} ShutdownCause; + +static inline bool shutdown_caused_by_guest(ShutdownCause cause) +{ + return cause >= SHUTDOWN_CAUSE_GUEST_SHUTDOWN; +} void vm_start(void); int vm_prepare_start(void); @@ -49,23 +67,23 @@ typedef enum WakeupReason { QEMU_WAKEUP_REASON_OTHER, } WakeupReason; -void qemu_system_reset_request(void); +void qemu_system_reset_request(ShutdownCause reason); void qemu_system_suspend_request(void); void qemu_register_suspend_notifier(Notifier *notifier); void qemu_system_wakeup_request(WakeupReason reason); void qemu_system_wakeup_enable(WakeupReason reason, bool enabled); void qemu_register_wakeup_notifier(Notifier *notifier); -void qemu_system_shutdown_request(void); +void qemu_system_shutdown_request(ShutdownCause reason); void qemu_system_powerdown_request(void); void qemu_register_powerdown_notifier(Notifier *notifier); void qemu_system_debug_request(void); void qemu_system_vmstop_request(RunState reason); void qemu_system_vmstop_request_prepare(void); bool qemu_vmstop_requested(RunState *r); -int qemu_shutdown_requested_get(void); -int qemu_reset_requested_get(void); +ShutdownCause qemu_shutdown_requested_get(void); +ShutdownCause qemu_reset_requested_get(void); void qemu_system_killed(int signal, pid_t pid); -void qemu_system_reset(bool report); +void qemu_system_reset(ShutdownCause reason); void qemu_system_guest_panicked(GuestPanicInformation *info); void qemu_add_exit_notifier(Notifier *notify); @@ -2052,7 +2052,7 @@ int kvm_cpu_exec(CPUState *cpu) break; case KVM_EXIT_SHUTDOWN: DPRINTF("shutdown\n"); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); ret = EXCP_INTERRUPT; break; case KVM_EXIT_UNKNOWN: @@ -2066,11 +2066,11 @@ int kvm_cpu_exec(CPUState *cpu) case KVM_EXIT_SYSTEM_EVENT: switch (run->system_event.type) { case KVM_SYSTEM_EVENT_SHUTDOWN: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); ret = EXCP_INTERRUPT; break; case KVM_SYSTEM_EVENT_RESET: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); ret = EXCP_INTERRUPT; break; case KVM_SYSTEM_EVENT_CRASH: diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h index 1101d55..7258a00 100644 --- a/linux-headers/asm-arm/kvm.h +++ b/linux-headers/asm-arm/kvm.h @@ -27,6 +27,8 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -114,6 +116,8 @@ struct kvm_debug_exit_arch { }; struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { @@ -192,13 +196,17 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff #define VGIC_LEVEL_INFO_LINE_LEVEL 0 -#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_ITS_SAVE_TABLES 1 +#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 +#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/linux-headers/asm-arm/unistd-common.h b/linux-headers/asm-arm/unistd-common.h index 13a74af..8d5ceae 100644 --- a/linux-headers/asm-arm/unistd-common.h +++ b/linux-headers/asm-arm/unistd-common.h @@ -353,5 +353,6 @@ #define __NR_pkey_mprotect (__NR_SYSCALL_BASE + 394) #define __NR_pkey_alloc (__NR_SYSCALL_BASE + 395) #define __NR_pkey_free (__NR_SYSCALL_BASE + 396) +#define __NR_statx (__NR_SYSCALL_BASE + 397) #endif /* _ASM_ARM_UNISTD_COMMON_H */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 651ec30..31bb1dd 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -39,6 +39,8 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -143,6 +145,8 @@ struct kvm_debug_exit_arch { #define KVM_GUESTDBG_USE_HW (1 << 17) struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { @@ -212,13 +216,17 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff #define VGIC_LEVEL_INFO_LINE_LEVEL 0 -#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_ITS_SAVE_TABLES 1 +#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 +#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 4edbe4b..07fbeb9 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -29,6 +29,9 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_GUEST_DEBUG +/* Not always available, but if it is, this is the correct offset. */ +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + struct kvm_regs { __u64 pc; __u64 cr; diff --git a/linux-headers/asm-powerpc/unistd.h b/linux-headers/asm-powerpc/unistd.h index 598043c..a1786340 100644 --- a/linux-headers/asm-powerpc/unistd.h +++ b/linux-headers/asm-powerpc/unistd.h @@ -393,5 +393,6 @@ #define __NR_preadv2 380 #define __NR_pwritev2 381 #define __NR_kexec_file_load 382 +#define __NR_statx 383 #endif /* _ASM_POWERPC_UNISTD_H_ */ diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h index ac63ca6..243f195 100644 --- a/linux-headers/asm-s390/kvm.h +++ b/linux-headers/asm-s390/kvm.h @@ -26,6 +26,8 @@ #define KVM_DEV_FLIC_ADAPTER_REGISTER 6 #define KVM_DEV_FLIC_ADAPTER_MODIFY 7 #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 +#define KVM_DEV_FLIC_AISM 9 +#define KVM_DEV_FLIC_AIRQ_INJECT 10 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. @@ -41,7 +43,14 @@ struct kvm_s390_io_adapter { __u8 isc; __u8 maskable; __u8 swap; - __u8 pad; + __u8 flags; +}; + +#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01 + +struct kvm_s390_ais_req { + __u8 isc; + __u16 mode; }; #define KVM_S390_IO_ADAPTER_MASK 1 @@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine { #define KVM_S390_VM_CPU_FEAT_CMMA 10 #define KVM_S390_VM_CPU_FEAT_PFMFI 11 #define KVM_S390_VM_CPU_FEAT_SIGPIF 12 +#define KVM_S390_VM_CPU_FEAT_KSS 13 struct kvm_s390_vm_cpu_feat { __u64 feat[16]; }; @@ -131,7 +141,8 @@ struct kvm_s390_vm_cpu_subfunc { __u8 kmo[16]; /* with MSA4 */ __u8 pcc[16]; /* with MSA4 */ __u8 ppno[16]; /* with MSA5 */ - __u8 reserved[1824]; + __u8 kma[16]; /* with MSA8 */ + __u8 reserved[1808]; }; /* kvm attributes for crypto */ @@ -197,6 +208,10 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_VRS (1UL << 6) #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) +#define KVM_SYNC_GSCB (1UL << 9) +/* length and alignment of the sdnx as a power of two */ +#define SDNXC 8 +#define SDNXL (1UL << SDNXC) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -217,8 +232,16 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 padding1[52]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ + __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ + union { + __u8 sdnx[SDNXL]; /* state description annex */ + struct { + __u64 reserved1[2]; + __u64 gscb[4]; + }; + }; }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/linux-headers/asm-s390/unistd.h b/linux-headers/asm-s390/unistd.h index 8a404fd..65e7e59 100644 --- a/linux-headers/asm-s390/unistd.h +++ b/linux-headers/asm-s390/unistd.h @@ -313,7 +313,9 @@ #define __NR_copy_file_range 375 #define __NR_preadv2 376 #define __NR_pwritev2 377 -#define NR_syscalls 378 +#define __NR_s390_guarded_storage 378 +#define __NR_statx 379 +#define NR_syscalls 380 /* * There are some system calls that are not present on 64 bit, some diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 739c0c5..c2824d0 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -9,6 +9,9 @@ #include <linux/types.h> #include <linux/ioctl.h> +#define KVM_PIO_PAGE_OFFSET 1 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 + #define DE_VECTOR 0 #define DB_VECTOR 1 #define BP_VECTOR 3 diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index d45ea28..8a206df 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -380,5 +380,7 @@ #define __NR_pkey_mprotect 380 #define __NR_pkey_alloc 381 #define __NR_pkey_free 382 +#define __NR_statx 383 +#define __NR_arch_prctl 384 #endif /* _ASM_X86_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index e22db91..336c2e4 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -333,5 +333,6 @@ #define __NR_pkey_mprotect 329 #define __NR_pkey_alloc 330 #define __NR_pkey_free 331 +#define __NR_statx 332 #endif /* _ASM_X86_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index 84e58b2..cb98a52 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -286,6 +286,7 @@ #define __NR_pkey_mprotect (__X32_SYSCALL_BIT + 329) #define __NR_pkey_alloc (__X32_SYSCALL_BIT + 330) #define __NR_pkey_free (__X32_SYSCALL_BIT + 331) +#define __NR_statx (__X32_SYSCALL_BIT + 332) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 4e082a8..d2892da 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -702,6 +702,10 @@ struct kvm_ppc_resize_hpt { #define KVM_VM_PPC_HV 1 #define KVM_VM_PPC_PR 2 +/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */ +#define KVM_VM_MIPS_TE 0 +#define KVM_VM_MIPS_VZ 1 + #define KVM_S390_SIE_PAGE_OFFSET 1 /* @@ -883,6 +887,14 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_MMU_RADIX 134 #define KVM_CAP_PPC_MMU_HASH_V3 135 #define KVM_CAP_IMMEDIATE_EXIT 136 +#define KVM_CAP_MIPS_VZ 137 +#define KVM_CAP_MIPS_TE 138 +#define KVM_CAP_MIPS_64BIT 139 +#define KVM_CAP_S390_GS 140 +#define KVM_CAP_S390_AIS 141 +#define KVM_CAP_SPAPR_TCE_VFIO 142 +#define KVM_CAP_X86_GUEST_MWAIT 143 +#define KVM_CAP_ARM_USER_IRQ 144 #ifdef KVM_CAP_IRQ_ROUTING @@ -1087,6 +1099,7 @@ struct kvm_device_attr { #define KVM_DEV_VFIO_GROUP 1 #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3 enum kvm_device_type { KVM_DEV_TYPE_FSL_MPIC_20 = 1, @@ -1108,6 +1121,11 @@ enum kvm_device_type { KVM_DEV_TYPE_MAX, }; +struct kvm_vfio_spapr_tce { + __s32 groupfd; + __s32 tablefd; +}; + /* * ioctls for VM fds */ @@ -1354,4 +1372,11 @@ struct kvm_assigned_msix_entry { #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +/* Available with KVM_CAP_ARM_USER_IRQ */ + +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + #endif /* __LINUX_KVM_H */ diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 2ed5dc3..9701772 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -20,7 +20,8 @@ #define UFFD_API ((__u64)0xAA) #define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ UFFD_FEATURE_EVENT_REMAP | \ - UFFD_FEATURE_EVENT_MADVDONTNEED | \ + UFFD_FEATURE_EVENT_REMOVE | \ + UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ UFFD_FEATURE_MISSING_SHMEM) #define UFFD_API_IOCTLS \ @@ -92,7 +93,7 @@ struct uffd_msg { struct { __u64 start; __u64 end; - } madv_dn; + } remove; struct { /* unused reserved fields */ @@ -109,7 +110,8 @@ struct uffd_msg { #define UFFD_EVENT_PAGEFAULT 0x12 #define UFFD_EVENT_FORK 0x13 #define UFFD_EVENT_REMAP 0x14 -#define UFFD_EVENT_MADVDONTNEED 0x15 +#define UFFD_EVENT_REMOVE 0x15 +#define UFFD_EVENT_UNMAP 0x16 /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ @@ -155,9 +157,10 @@ struct uffdio_api { #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) #define UFFD_FEATURE_EVENT_REMAP (1<<2) -#define UFFD_FEATURE_EVENT_MADVDONTNEED (1<<3) +#define UFFD_FEATURE_EVENT_REMOVE (1<<3) #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) #define UFFD_FEATURE_MISSING_SHMEM (1<<5) +#define UFFD_FEATURE_EVENT_UNMAP (1<<6) __u64 features; __u64 ioctls; diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 531cb2e..4e7ab4c 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -198,6 +198,7 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ +#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; @@ -212,6 +213,7 @@ struct vfio_device_info { #define VFIO_DEVICE_API_PCI_STRING "vfio-pci" #define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" #define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" +#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, @@ -446,6 +448,22 @@ enum { VFIO_PCI_NUM_IRQS }; +/* + * The vfio-ccw bus driver makes use of the following fixed region and + * IRQ index mapping. Unimplemented regions return a size of zero. + * Unimplemented IRQ types return a count of zero. + */ + +enum { + VFIO_CCW_CONFIG_REGION_INDEX, + VFIO_CCW_NUM_REGIONS +}; + +enum { + VFIO_CCW_IO_IRQ_INDEX, + VFIO_CCW_NUM_IRQS +}; + /** * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12, * struct vfio_pci_hot_reset_info) diff --git a/linux-headers/linux/vfio_ccw.h b/linux-headers/linux/vfio_ccw.h new file mode 100644 index 0000000..3a56551 --- /dev/null +++ b/linux-headers/linux/vfio_ccw.h @@ -0,0 +1,24 @@ +/* + * Interfaces for vfio-ccw + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + */ + +#ifndef _VFIO_CCW_H_ +#define _VFIO_CCW_H_ + +#include <linux/types.h> + +struct ccw_io_region { +#define ORB_AREA_SIZE 12 + __u8 orb_area[ORB_AREA_SIZE]; +#define SCSW_AREA_SIZE 12 + __u8 scsw_area[SCSW_AREA_SIZE]; +#define IRB_AREA_SIZE 96 + __u8 irb_area[IRB_AREA_SIZE]; + __u32 ret_code; +} __attribute__((packed)); + +#endif @@ -1620,8 +1620,7 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr) return TARGET_PAGE_SIZE; } -void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, - bool is_write) +void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n) { hwaddr addr, granularity; IOMMUTLBEntry iotlb; @@ -1635,7 +1634,7 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, granularity = memory_region_iommu_get_min_page_size(mr); for (addr = 0; addr < memory_region_size(mr); addr += granularity) { - iotlb = mr->iommu_ops->translate(mr, addr, is_write); + iotlb = mr->iommu_ops->translate(mr, addr, IOMMU_NONE); if (iotlb.perm != IOMMU_NONE) { n->notify(n, &iotlb); } @@ -1653,7 +1652,7 @@ void memory_region_iommu_replay_all(MemoryRegion *mr) IOMMUNotifier *notifier; IOMMU_NOTIFIER_FOREACH(notifier, mr) { - memory_region_iommu_replay(mr, notifier, false); + memory_region_iommu_replay(mr, notifier); } } diff --git a/migration/colo.c b/migration/colo.c index 929b31c..8c13a3c 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -626,7 +626,7 @@ void *colo_process_incoming_thread(void *opaque) } qemu_mutex_lock_iothread(); - qemu_system_reset(VMRESET_SILENT); + qemu_system_reset(SHUTDOWN_CAUSE_NONE); vmstate_loading = true; if (qemu_loadvm_state(fb) < 0) { error_report("COLO: loadvm failed"); diff --git a/migration/savevm.c b/migration/savevm.c index d971e5e..a4532b6 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2281,7 +2281,7 @@ int load_vmstate(const char *name, Error **errp) return -EINVAL; } - qemu_system_reset(VMRESET_SILENT); + qemu_system_reset(SHUTDOWN_CAUSE_NONE); mis->from_src_file = f; aio_context_acquire(aio_context); @@ -52,7 +52,7 @@ int setenv(const char *name, const char *value, int overwrite) static BOOL WINAPI qemu_ctrl_handler(DWORD type) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_SIGNAL); /* Windows 7 kills application when the function returns. Sleep here to give QEMU a try for closing. Sleep period is 10000ms because Windows kills the program diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img Binary files differindex 0b01d49..5ad0564 100644 --- a/pc-bios/s390-ccw.img +++ b/pc-bios/s390-ccw.img diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h index 07d8cbc..2089274 100644 --- a/pc-bios/s390-ccw/s390-ccw.h +++ b/pc-bios/s390-ccw/s390-ccw.h @@ -42,6 +42,13 @@ typedef unsigned long long __u64; #ifndef NULL #define NULL 0 #endif +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif +#ifndef MIN_NON_ZERO +#define MIN_NON_ZERO(a, b) ((a) == 0 ? (b) : \ + ((b) == 0 ? (a) : (MIN(a, b)))) +#endif #include "cio.h" #include "iplb.h" diff --git a/pc-bios/s390-ccw/scsi.h b/pc-bios/s390-ccw/scsi.h index fc830f7..fe3fd5a 100644 --- a/pc-bios/s390-ccw/scsi.h +++ b/pc-bios/s390-ccw/scsi.h @@ -26,6 +26,15 @@ #define SCSI_SENSE_KEY_NO_SENSE 0 #define SCSI_SENSE_KEY_UNIT_ATTENTION 6 +/* SCSI Inquiry Types */ +#define SCSI_INQUIRY_STANDARD 0x00U +#define SCSI_INQUIRY_EVPD 0x01U + +/* SCSI Inquiry Pages */ +#define SCSI_INQUIRY_STANDARD_NONE 0x00U +#define SCSI_INQUIRY_EVPD_SUPPORTED_PAGES 0x00U +#define SCSI_INQUIRY_EVPD_BLOCK_LIMITS 0xb0U + union ScsiLun { uint64_t v64; /* numeric shortcut */ uint8_t v8[8]; /* generic 8 bytes representation */ @@ -71,6 +80,27 @@ struct ScsiInquiryStd { } __attribute__((packed)); typedef struct ScsiInquiryStd ScsiInquiryStd; +struct ScsiInquiryEvpdPages { + uint8_t peripheral_qdt; /* b0, use (b0 & 0x1f) to get SCSI_INQ_RDT */ + uint8_t page_code; /* b1 */ + uint16_t page_length; /* b2..b3 length = N-3 */ + uint8_t byte[28]; /* b4..bN Supported EVPD pages (N=31 here) */ +} __attribute__((packed)); +typedef struct ScsiInquiryEvpdPages ScsiInquiryEvpdPages; + +struct ScsiInquiryEvpdBl { + uint8_t peripheral_qdt; /* b0, use (b0 & 0x1f) to get SCSI_INQ_RDT */ + uint8_t page_code; + uint16_t page_length; + uint8_t b4; + uint8_t b5; + uint16_t b6; + uint32_t max_transfer; /* b8 */ + uint32_t b12[7]; /* b12..b43 (defined fields) */ + uint32_t b44[5]; /* b44..b63 (reserved fields) */ +} __attribute__((packed)); +typedef struct ScsiInquiryEvpdBl ScsiInquiryEvpdBl; + struct ScsiCdbInquiry { uint8_t command; /* b0, == 0x12 */ uint8_t b1; /* b1, |= 0x01 (evpd) */ diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c index d850a8d..f61ecf0 100644 --- a/pc-bios/s390-ccw/virtio-scsi.c +++ b/pc-bios/s390-ccw/virtio-scsi.c @@ -19,6 +19,8 @@ static VirtioScsiCmdReq req; static VirtioScsiCmdResp resp; static uint8_t scsi_inquiry_std_response[256]; +static ScsiInquiryEvpdPages scsi_inquiry_evpd_pages_response; +static ScsiInquiryEvpdBl scsi_inquiry_evpd_bl_response; static inline void vs_assert(bool term, const char **msgs) { @@ -89,10 +91,13 @@ static void vs_run(const char *title, VirtioCmd *cmd, VDev *vdev, /* SCSI protocol implementation routines */ -static bool scsi_inquiry(VDev *vdev, void *data, uint32_t data_size) +static bool scsi_inquiry(VDev *vdev, uint8_t evpd, uint8_t page, + void *data, uint32_t data_size) { ScsiCdbInquiry cdb = { .command = 0x12, + .b1 = evpd, + .b2 = page, .alloc_len = data_size < 65535 ? data_size : 65535, }; VirtioCmd inquiry[] = { @@ -142,19 +147,18 @@ static bool scsi_report_luns(VDev *vdev, void *data, uint32_t data_size) } static bool scsi_read_10(VDev *vdev, - ulong sector, int sectors, void *data) + ulong sector, int sectors, void *data, + unsigned int data_size) { - int f = vdev->blk_factor; - unsigned int data_size = sectors * virtio_get_block_size() * f; ScsiCdbRead10 cdb = { .command = 0x28, - .lba = sector * f, - .xfer_length = sectors * f, + .lba = sector, + .xfer_length = sectors, }; VirtioCmd read_10[] = { { &req, sizeof(req), VRING_DESC_F_NEXT }, { &resp, sizeof(resp), VRING_DESC_F_WRITE | VRING_DESC_F_NEXT }, - { data, data_size * f, VRING_DESC_F_WRITE }, + { data, data_size, VRING_DESC_F_WRITE }, }; debug_print_int("read_10 sector", sector); @@ -203,6 +207,7 @@ static void virtio_scsi_locate_device(VDev *vdev) debug_print_int("config.scsi.max_channel", vdev->config.scsi.max_channel); debug_print_int("config.scsi.max_target ", vdev->config.scsi.max_target); debug_print_int("config.scsi.max_lun ", vdev->config.scsi.max_lun); + debug_print_int("config.scsi.max_sectors", vdev->config.scsi.max_sectors); if (vdev->scsi_device_selected) { sdev->channel = vdev->selected_scsi_device.channel; @@ -255,9 +260,23 @@ static void virtio_scsi_locate_device(VDev *vdev) int virtio_scsi_read_many(VDev *vdev, ulong sector, void *load_addr, int sec_num) { - if (!scsi_read_10(vdev, sector, sec_num, load_addr)) { - virtio_scsi_verify_response(&resp, "virtio-scsi:read_many"); - } + int sector_count; + int f = vdev->blk_factor; + unsigned int data_size; + unsigned int max_transfer = MIN_NON_ZERO(vdev->config.scsi.max_sectors, + vdev->max_transfer); + + do { + sector_count = MIN_NON_ZERO(sec_num, max_transfer); + data_size = sector_count * virtio_get_block_size() * f; + if (!scsi_read_10(vdev, sector * f, sector_count * f, load_addr, + data_size)) { + virtio_scsi_verify_response(&resp, "virtio-scsi:read_many"); + } + load_addr += data_size; + sector += sector_count; + sec_num -= sector_count; + } while (sec_num > 0); return 0; } @@ -304,6 +323,9 @@ void virtio_scsi_setup(VDev *vdev) int retry_test_unit_ready = 3; uint8_t data[256]; uint32_t data_size = sizeof(data); + ScsiInquiryEvpdPages *evpd = &scsi_inquiry_evpd_pages_response; + ScsiInquiryEvpdBl *evpd_bl = &scsi_inquiry_evpd_bl_response; + int i; vdev->scsi_device = &default_scsi_device; virtio_scsi_locate_device(vdev); @@ -334,7 +356,10 @@ void virtio_scsi_setup(VDev *vdev) } /* read and cache SCSI INQUIRY response */ - if (!scsi_inquiry(vdev, scsi_inquiry_std_response, + if (!scsi_inquiry(vdev, + SCSI_INQUIRY_STANDARD, + SCSI_INQUIRY_STANDARD_NONE, + scsi_inquiry_std_response, sizeof(scsi_inquiry_std_response))) { virtio_scsi_verify_response(&resp, "virtio-scsi:setup:inquiry"); } @@ -345,6 +370,44 @@ void virtio_scsi_setup(VDev *vdev) vdev->scsi_block_size = VIRTIO_ISO_BLOCK_SIZE; } + if (!scsi_inquiry(vdev, + SCSI_INQUIRY_EVPD, + SCSI_INQUIRY_EVPD_SUPPORTED_PAGES, + evpd, + sizeof(*evpd))) { + virtio_scsi_verify_response(&resp, "virtio-scsi:setup:supported_pages"); + } + + debug_print_int("EVPD length", evpd->page_length); + + for (i = 0; i <= evpd->page_length; i++) { + debug_print_int("supported EVPD page", evpd->byte[i]); + + if (evpd->byte[i] != SCSI_INQUIRY_EVPD_BLOCK_LIMITS) { + continue; + } + + if (!scsi_inquiry(vdev, + SCSI_INQUIRY_EVPD, + SCSI_INQUIRY_EVPD_BLOCK_LIMITS, + evpd_bl, + sizeof(*evpd_bl))) { + virtio_scsi_verify_response(&resp, "virtio-scsi:setup:blocklimits"); + } + + debug_print_int("max transfer", evpd_bl->max_transfer); + vdev->max_transfer = evpd_bl->max_transfer; + } + + /* + * The host sg driver will often be unhappy with particularly large + * I/Os that exceed the block iovec limits. Let's enforce something + * reasonable, despite what the device configuration tells us. + */ + + vdev->max_transfer = MIN_NON_ZERO(VIRTIO_SCSI_MAX_SECTORS, + vdev->max_transfer); + if (!scsi_read_capacity(vdev, data, data_size)) { virtio_scsi_verify_response(&resp, "virtio-scsi:setup:read_capacity"); } diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h index f50b38b..4c4f4bb 100644 --- a/pc-bios/s390-ccw/virtio-scsi.h +++ b/pc-bios/s390-ccw/virtio-scsi.h @@ -19,6 +19,8 @@ #define VIRTIO_SCSI_CDB_SIZE SCSI_DEFAULT_CDB_SIZE #define VIRTIO_SCSI_SENSE_SIZE SCSI_DEFAULT_SENSE_SIZE +#define VIRTIO_SCSI_MAX_SECTORS 2048 + /* command-specific response values */ #define VIRTIO_SCSI_S_OK 0x00 #define VIRTIO_SCSI_S_BAD_TARGET 0x03 diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h index 3388a42..1eaf865 100644 --- a/pc-bios/s390-ccw/virtio.h +++ b/pc-bios/s390-ccw/virtio.h @@ -277,6 +277,7 @@ struct VDev { bool scsi_device_selected; ScsiDevice selected_scsi_device; uint64_t netboot_start_addr; + uint32_t max_transfer; }; typedef struct VDev VDev; diff --git a/qapi-schema.json b/qapi-schema.json index e38c5f0..4b50b65 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3057,8 +3057,6 @@ # @name: the type name found in the search # # Since: 1.1 -# -# Notes: This command is experimental and may change syntax in future releases. ## { 'struct': 'ObjectTypeInfo', 'data': { 'name': 'str' } } diff --git a/qapi/event.json b/qapi/event.json index e80f3f4..6d22b02 100644 --- a/qapi/event.json +++ b/qapi/event.json @@ -10,6 +10,10 @@ # Emitted when the virtual machine has shut down, indicating that qemu is # about to exit. # +# @guest: If true, the shutdown was triggered by a guest request (such as +# a guest-initiated ACPI shutdown request or other hardware-specific action) +# rather than a host request (such as sending qemu a SIGINT). (since 2.10) +# # Note: If the command-line option "-no-shutdown" has been specified, qemu will # not exit, and a STOP event will eventually follow the SHUTDOWN event # @@ -17,11 +21,11 @@ # # Example: # -# <- { "event": "SHUTDOWN", +# <- { "event": "SHUTDOWN", "data": { "guest": true }, # "timestamp": { "seconds": 1267040730, "microseconds": 682951 } } # ## -{ 'event': 'SHUTDOWN' } +{ 'event': 'SHUTDOWN', 'data': { 'guest': 'bool' } } ## # @POWERDOWN: @@ -44,15 +48,20 @@ # # Emitted when the virtual machine is reset # +# @guest: If true, the reset was triggered by a guest request (such as +# a guest-initiated ACPI reboot request or other hardware-specific action) +# rather than a host request (such as the QMP command system_reset). +# (since 2.10) +# # Since: 0.12.0 # # Example: # -# <- { "event": "RESET", +# <- { "event": "RESET", "data": { "guest": false }, # "timestamp": { "seconds": 1267041653, "microseconds": 9518 } } # ## -{ 'event': 'RESET' } +{ 'event': 'RESET', 'data': { 'guest': 'bool' } } ## # @STOP: @@ -298,7 +298,7 @@ static BlockBackend *img_open_opts(const char *optstr, QDECREF(options); return NULL; } - qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); } blk = blk_new_open(NULL, NULL, options, flags, &local_err); if (!blk) { @@ -331,7 +331,7 @@ static BlockBackend *img_open_file(const char *filename, } if (force_share) { - qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); } blk = blk_new_open(filename, NULL, options, flags, &local_err); if (!blk) { @@ -3219,8 +3219,7 @@ static int img_rebase(int argc, char **argv) if (!options) { options = qdict_new(); } - qdict_put(options, BDRV_OPT_FORCE_SHARE, - qbool_from_bool(true)); + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); } bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); blk_old_backing = blk_new_open(backing_name, NULL, @@ -76,7 +76,7 @@ static int openfile(char *name, int flags, bool writethrough, bool force_share, QDECREF(opts); return 1; } - qdict_put(opts, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + qdict_put_bool(opts, BDRV_OPT_FORCE_SHARE, true); } qemuio_blk = blk_new_open(name, NULL, opts, flags, &local_err); if (!qemuio_blk) { diff --git a/qemu-options.hx b/qemu-options.hx index f63f7dc..a6c9b9e 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -42,7 +42,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ " dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n" " suppress-vmdesc=on|off disables self-describing migration (default=off)\n" " nvdimm=on|off controls NVDIMM support (default=off)\n" - " enforce-config-section=on|off enforce configuration section migration (default=off)\n", + " enforce-config-section=on|off enforce configuration section migration (default=off)\n" + " s390-squash-mcss=on|off controls support for squashing into default css (default=off)\n", QEMU_ARCH_ALL) STEXI @item -machine [type=]@var{name}[,prop=@var{value}[,...]] @@ -81,6 +82,9 @@ controls whether DEA wrapping keys will be created to allow execution of DEA cryptographic functions. The default is on. @item nvdimm=on|off Enables or disables NVDIMM support. The default is off. +@item s390-squash-mcss=on|off +Enables or disables squashing subchannels into the default css. +The default is off. @end table ETEXI @@ -980,12 +984,12 @@ STEXI ETEXI DEF("usb", 0, QEMU_OPTION_usb, - "-usb enable the USB driver (will be the default soon)\n", + "-usb enable the USB driver (if it is not used by default yet)\n", QEMU_ARCH_ALL) STEXI @item -usb @findex -usb -Enable the USB driver (will be the default soon) +Enable the USB driver (if it is not used by default yet). ETEXI DEF("usbdevice", HAS_ARG, QEMU_OPTION_usbdevice, @@ -995,7 +999,8 @@ STEXI @item -usbdevice @var{devname} @findex -usbdevice -Add the USB device @var{devname}. @xref{usb_devices}. +Add the USB device @var{devname}. Note that this option is deprecated, +please use @code{-device usb-...} instead. @xref{usb_devices}. @table @option @@ -1373,7 +1378,7 @@ output such as guest graphics, guest console, and the QEMU monitor in a window. With this option, you can have QEMU listen on VNC display @var{display} and redirect the VGA display over the VNC session. It is very useful to enable the usb tablet device when using this option -(option @option{-usbdevice tablet}). When using the VNC display, you +(option @option{-device usb-tablet}). When using the VNC display, you must use the @option{-k} parameter to set the keyboard layout if you are not using en-us. Valid syntax for the @var{display} is @@ -84,7 +84,7 @@ UuidInfo *qmp_query_uuid(Error **errp) void qmp_quit(Error **errp) { no_shutdown = 0; - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_QMP); } void qmp_stop(Error **errp) @@ -105,7 +105,7 @@ void qmp_stop(Error **errp) void qmp_system_reset(Error **errp) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_HOST_QMP); } void qmp_system_powerdown(Error **erp) diff --git a/replay/replay-internal.h b/replay/replay-internal.h index ed66ed8..3ebb199 100644 --- a/replay/replay-internal.h +++ b/replay/replay-internal.h @@ -22,8 +22,9 @@ enum ReplayEvents { EVENT_EXCEPTION, /* for async events */ EVENT_ASYNC, - /* for shutdown request */ + /* for shutdown requests, range allows recovery of ShutdownCause */ EVENT_SHUTDOWN, + EVENT_SHUTDOWN_LAST = EVENT_SHUTDOWN + SHUTDOWN_CAUSE__MAX, /* for character device write event */ EVENT_CHAR_WRITE, /* for character device read all event */ diff --git a/replay/replay.c b/replay/replay.c index f810628..ff58a5a 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -49,9 +49,10 @@ bool replay_next_event_is(int event) res = true; } switch (replay_state.data_kind) { - case EVENT_SHUTDOWN: + case EVENT_SHUTDOWN ... EVENT_SHUTDOWN_LAST: replay_finish_event(); - qemu_system_shutdown_request(); + qemu_system_shutdown_request(replay_state.data_kind - + EVENT_SHUTDOWN); break; default: /* clock, time_t, checkpoint and other events */ @@ -170,11 +171,11 @@ bool replay_has_interrupt(void) return res; } -void replay_shutdown_request(void) +void replay_shutdown_request(ShutdownCause cause) { if (replay_mode == REPLAY_MODE_RECORD) { replay_mutex_lock(); - replay_put_event(EVENT_SHUTDOWN); + replay_put_event(EVENT_SHUTDOWN + cause); replay_mutex_unlock(); } } diff --git a/scripts/qmp/qom-set b/scripts/qmp/qom-set index 54ecfec..94e2778 100755 --- a/scripts/qmp/qom-set +++ b/scripts/qmp/qom-set @@ -61,4 +61,4 @@ else: srv = QEMUMonitorProtocol(socket_path) srv.connect() -print srv.command('qom-set', path=path, property=prop, value=sys.argv[2]) +print srv.command('qom-set', path=path, property=prop, value=value) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 6a370a8..2f906c4 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -113,7 +113,7 @@ done rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" -for header in kvm.h kvm_para.h vfio.h vhost.h \ +for header in kvm.h kvm_para.h vfio.h vfio_ccw.h vhost.h \ psci.h userfaultfd.h; do cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" done diff --git a/slirp/socket.c b/slirp/socket.c index 8692772..3b49a69 100644 --- a/slirp/socket.c +++ b/slirp/socket.c @@ -100,6 +100,9 @@ sofree(struct socket *so) if(so->so_next && so->so_prev) remque(so); /* crashes if so is not in a queue */ + if (so->so_tcpcb) { + free(so->so_tcpcb); + } free(so); } diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c index edb98f0..07bcbdb 100644 --- a/slirp/tcp_input.c +++ b/slirp/tcp_input.c @@ -1587,11 +1587,11 @@ tcp_mss(struct tcpcb *tp, u_int offer) switch (so->so_ffamily) { case AF_INET: mss = MIN(IF_MTU, IF_MRU) - sizeof(struct tcphdr) - + sizeof(struct ip); + - sizeof(struct ip); break; case AF_INET6: mss = MIN(IF_MTU, IF_MRU) - sizeof(struct tcphdr) - + sizeof(struct ip6); + - sizeof(struct ip6); break; default: g_assert_not_reached(); diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c index ed16e18..dc8b4bb 100644 --- a/slirp/tcp_subr.c +++ b/slirp/tcp_subr.c @@ -204,7 +204,7 @@ tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ip = mtod(m, struct ip *); - ip->ip_len = tlen; + ip->ip_len = m->m_len; ip->ip_dst = tcpiph_save.ti_dst; ip->ip_src = tcpiph_save.ti_src; ip->ip_p = tcpiph_save.ti_pr; @@ -224,7 +224,7 @@ tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip6); ip6 = mtod(m, struct ip6 *); - ip6->ip_pl = tlen; + ip6->ip_pl = tcpiph_save.ti_len; ip6->ip_dst = tcpiph_save.ti_dst6; ip6->ip_src = tcpiph_save.ti_src6; ip6->ip_nh = tcpiph_save.ti_nh6; diff --git a/target/alpha/sys_helper.c b/target/alpha/sys_helper.c index 652195d..ac22323 100644 --- a/target/alpha/sys_helper.c +++ b/target/alpha/sys_helper.c @@ -60,9 +60,9 @@ void helper_tb_flush(CPUAlphaState *env) void helper_halt(uint64_t restart) { if (restart) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } } diff --git a/target/arm/psci.c b/target/arm/psci.c index ade9fe2..fc34b26 100644 --- a/target/arm/psci.c +++ b/target/arm/psci.c @@ -137,7 +137,7 @@ void arm_handle_psci_call(ARMCPU *cpu) } break; case QEMU_PSCI_0_2_FN_SYSTEM_RESET: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); /* QEMU reset and shutdown are async requests, but PSCI * mandates that we never return from the reset/shutdown * call, so power the CPU off now so it doesn't execute @@ -145,7 +145,7 @@ void arm_handle_psci_call(ARMCPU *cpu) */ goto cpu_off; case QEMU_PSCI_0_2_FN_SYSTEM_OFF: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); goto cpu_off; case QEMU_PSCI_0_1_FN_CPU_ON: case QEMU_PSCI_0_2_FN_CPU_ON: diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c index ee596c6..b769772 100644 --- a/target/i386/excp_helper.c +++ b/target/i386/excp_helper.c @@ -59,7 +59,7 @@ static int check_exception(CPUX86State *env, int intno, int *error_code, qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return EXCP_HLT; } #endif diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c index ef13015..7346931 100644 --- a/target/i386/hax-all.c +++ b/target/i386/hax-all.c @@ -540,14 +540,14 @@ static int hax_vcpu_hax_exec(CPUArchState *env) /* Guest state changed, currently only for shutdown */ case HAX_EXIT_STATECHANGE: fprintf(stdout, "VCPU shutdown request\n"); - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); hax_vcpu_sync_state(env, 0); ret = 1; break; case HAX_EXIT_UNKNOWN_VMEXIT: fprintf(stderr, "Unknown VMX exit %x from guest\n", ht->_exit_reason); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); hax_vcpu_sync_state(env, 0); cpu_dump_state(cpu, stderr, fprintf, 0); ret = -1; @@ -578,7 +578,7 @@ static int hax_vcpu_hax_exec(CPUArchState *env) break; default: fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); hax_vcpu_sync_state(env, 0); cpu_dump_state(cpu, stderr, fprintf, 0); ret = 1; diff --git a/target/i386/helper.c b/target/i386/helper.c index f11cac6..ee7eff2 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -1212,7 +1212,7 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data) " triple fault\n", cs->cpu_index); qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return; } if (banks[1] & MCI_STATUS_VAL) { diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 011d4a5..49b6115 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -2930,7 +2930,7 @@ int kvm_arch_process_async_events(CPUState *cs) if (env->exception_injected == EXCP08_DBLE) { /* this means triple fault */ - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); cs->exit_request = 1; return 0; } diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index a6bcb47..9cb2123 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -995,6 +995,9 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr) */ cs->interrupt_request |= CPU_INTERRUPT_EXITTB; + /* Reset the reservation */ + env->reserve_addr = -1; + /* Context synchronizing: check if TCG TLB needs flush */ check_tlb_flush(env, false); } diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index a1bf2ba..a69005d 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -92,9 +92,10 @@ static void s390_cpu_initial_reset(CPUState *s) int i; s390_cpu_reset(s); - /* initial reset does not touch regs,fregs and aregs */ - memset(&env->fpc, 0, offsetof(CPUS390XState, end_reset_fields) - - offsetof(CPUS390XState, fpc)); + /* initial reset does not clear everything! */ + memset(&env->start_initial_reset_fields, 0, + offsetof(CPUS390XState, end_reset_fields) - + offsetof(CPUS390XState, start_initial_reset_fields)); /* architectured initial values for CR 0 and 14 */ env->cregs[0] = CR0_RESET; diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 240b8a5..c74b419 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -88,6 +88,10 @@ typedef struct CPUS390XState { */ CPU_DoubleU vregs[32][2]; /* vector registers */ uint32_t aregs[16]; /* access registers */ + uint8_t riccb[64]; /* runtime instrumentation control */ + + /* Fields up to this point are not cleared by initial CPU reset */ + struct {} start_initial_reset_fields; uint32_t fpc; /* floating-point control register */ uint32_t cc_op; @@ -137,8 +141,6 @@ typedef struct CPUS390XState { uint64_t gbea; uint64_t pp; - uint8_t riccb[64]; - /* Fields up to this point are cleared by a CPU reset */ struct {} end_reset_fields; @@ -1256,6 +1258,16 @@ static inline void s390_crypto_reset(void) } } +static inline bool s390_get_squash_mcss(void) +{ + if (object_property_get_bool(OBJECT(qdev_get_machine()), "s390-squash-mcss", + NULL)) { + return true; + } + + return false; +} + /* machine check interruption code */ /* subclasses */ diff --git a/target/s390x/helper.c b/target/s390x/helper.c index 9978490..4f8aadf 100644 --- a/target/s390x/helper.c +++ b/target/s390x/helper.c @@ -266,7 +266,7 @@ void load_psw(CPUS390XState *env, uint64_t mask, uint64_t addr) S390CPU *cpu = s390_env_get_cpu(env); if (s390_cpu_halt(cpu) == 0) { #ifndef CONFIG_USER_ONLY - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); #endif } } diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c index 590bfa4..62a7771 100644 --- a/target/s390x/ioinst.c +++ b/target/s390x/ioinst.c @@ -244,6 +244,15 @@ void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb) case -EBUSY: cc = 2; break; + case -EFAULT: + /* + * TODO: + * I'm wondering whether there is something better + * to do for us here (like setting some device or + * subchannel status). + */ + program_interrupt(env, PGM_ADDRESSING, 4); + return; case 0: cc = 0; break; diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index fb10542..ba1e60f 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -1927,7 +1927,7 @@ static int handle_intercept(S390CPU *cpu) cpu_synchronize_state(cs); if (s390_cpu_halt(cpu) == 0) { if (is_special_wait_psw(cs)) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } else { qemu_system_guest_panicked(NULL); } @@ -1936,7 +1936,7 @@ static int handle_intercept(S390CPU *cpu) break; case ICPT_CPU_STOP: if (s390_cpu_set_state(CPU_STATE_STOPPED, cpu) == 0) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } if (cpu->env.sigp_order == SIGP_STOP_STORE_STATUS) { kvm_s390_store_status(cpu, KVM_S390_STORE_STATUS_DEF_ADDR, diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c index 23ec52c..1b9f448 100644 --- a/target/s390x/misc_helper.c +++ b/target/s390x/misc_helper.c @@ -532,11 +532,11 @@ uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1, break; #if !defined(CONFIG_USER_ONLY) case SIGP_RESTART: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); cpu_loop_exit(CPU(s390_env_get_cpu(env))); break; case SIGP_STOP: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); cpu_loop_exit(CPU(s390_env_get_cpu(env))); break; #endif diff --git a/target/sparc/int32_helper.c b/target/sparc/int32_helper.c index 09afe13..eec9a4d 100644 --- a/target/sparc/int32_helper.c +++ b/target/sparc/int32_helper.c @@ -109,7 +109,7 @@ void sparc_cpu_do_interrupt(CPUState *cs) if (env->psret == 0) { if (cs->exception_index == 0x80 && env->def->features & CPU_FEATURE_TA0_SHUTDOWN) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } else { cpu_abort(cs, "Trap 0x%02x while interrupts disabled, Error state", cs->exception_index); diff --git a/tests/acpi-test-data/pc/SRAT.memhp b/tests/acpi-test-data/pc/SRAT.memhp Binary files differindex a7dddf7..e508b4a 100644 --- a/tests/acpi-test-data/pc/SRAT.memhp +++ b/tests/acpi-test-data/pc/SRAT.memhp diff --git a/tests/acpi-test-data/q35/SRAT.memhp b/tests/acpi-test-data/q35/SRAT.memhp Binary files differindex a7dddf7..e508b4a 100644 --- a/tests/acpi-test-data/q35/SRAT.memhp +++ b/tests/acpi-test-data/q35/SRAT.memhp diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out index dd879f1..1d5e28d 100644 --- a/tests/qemu-iotests/071.out +++ b/tests/qemu-iotests/071.out @@ -46,7 +46,7 @@ QMP_VERSION read failed: Input/output error {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} === Testing blkverify on existing block device === @@ -85,7 +85,7 @@ wrote 512/512 bytes at offset 0 read failed: Input/output error {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} QEMU_PROG: Failed to flush the L2 table cache: Input/output error QEMU_PROG: Failed to flush the refcount block cache: Input/output error diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out index 97df69d..2533c31 100644 --- a/tests/qemu-iotests/081.out +++ b/tests/qemu-iotests/081.out @@ -36,7 +36,7 @@ read 10485760/10485760 bytes at offset 0 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} == using quorum rewrite corrupted mode == diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out index dc6baf9..59c5208 100644 --- a/tests/qemu-iotests/087.out +++ b/tests/qemu-iotests/087.out @@ -8,7 +8,7 @@ QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "'node-name' must be specified for the root node"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} === Duplicate ID === @@ -19,7 +19,7 @@ QMP_VERSION {"error": {"class": "GenericError", "desc": "node-name=disk is conflicting with a device id"}} {"error": {"class": "GenericError", "desc": "Duplicate node name"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} === aio=native without O_DIRECT === @@ -29,7 +29,7 @@ QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "aio=native was specified, but it requires cache.direct=on, which was not specified."}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} === Encrypted image === @@ -40,14 +40,14 @@ QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} Testing: QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} === Missing driver === @@ -58,6 +58,6 @@ QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "Parameter 'driver' is missing"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done diff --git a/tests/qemu-iotests/094.out b/tests/qemu-iotests/094.out index b66dc07..f52baff 100644 --- a/tests/qemu-iotests/094.out +++ b/tests/qemu-iotests/094.out @@ -7,5 +7,5 @@ Formatting 'TEST_DIR/source.IMGFMT', fmt=IMGFMT size=67108864 {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 67108864, "offset": 67108864, "speed": 0, "type": "mirror"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done diff --git a/tests/qemu-iotests/117.out b/tests/qemu-iotests/117.out index f52dc1a..851e214 100644 --- a/tests/qemu-iotests/117.out +++ b/tests/qemu-iotests/117.out @@ -7,7 +7,7 @@ wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} No errors were found on the image. read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) diff --git a/tests/qemu-iotests/119.out b/tests/qemu-iotests/119.out index 58e7114..a8743b8 100644 --- a/tests/qemu-iotests/119.out +++ b/tests/qemu-iotests/119.out @@ -6,6 +6,6 @@ read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done diff --git a/tests/qemu-iotests/120.out b/tests/qemu-iotests/120.out index 9131b1b..1af1aeb 100644 --- a/tests/qemu-iotests/120.out +++ b/tests/qemu-iotests/120.out @@ -6,7 +6,7 @@ wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 0 diff --git a/tests/qemu-iotests/140.out b/tests/qemu-iotests/140.out index 6c04456..0689b2b 100644 --- a/tests/qemu-iotests/140.out +++ b/tests/qemu-iotests/140.out @@ -10,5 +10,5 @@ read 65536/65536 bytes at offset 0 {"return": {}} can't open device nbd+unix:///drv?socket=TEST_DIR/nbd: No export with name 'drv' available {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out index d24ad20..0978b89 100644 --- a/tests/qemu-iotests/143.out +++ b/tests/qemu-iotests/143.out @@ -3,5 +3,5 @@ QA output created by 143 {"return": {}} can't open device nbd+unix:///no_such_export?socket=TEST_DIR/nbd: No export with name 'no_such_export' available {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done diff --git a/tests/qemu-iotests/156.out b/tests/qemu-iotests/156.out index 3af82ae..f96a564 100644 --- a/tests/qemu-iotests/156.out +++ b/tests/qemu-iotests/156.out @@ -34,7 +34,7 @@ read 65536/65536 bytes at offset 196608 {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c index 0f80194..c77343f 100644 --- a/tests/test-blockjob-txn.c +++ b/tests/test-blockjob-txn.c @@ -167,6 +167,11 @@ static void test_pair_jobs(int expected1, int expected2) block_job_start(job1); block_job_start(job2); + /* Release our reference now to trigger as many nice + * use-after-free bugs as possible. + */ + block_job_txn_unref(txn); + if (expected1 == -ECANCELED) { block_job_cancel(job1); } @@ -187,8 +192,6 @@ static void test_pair_jobs(int expected1, int expected2) g_assert_cmpint(result1, ==, expected1); g_assert_cmpint(result2, ==, expected2); - - block_job_txn_unref(txn); } static void test_pair_jobs_success(void) diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 740e740..23bdf1a 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -116,11 +116,11 @@ static void test_job_ids(void) job[1] = do_test_id(blk[1], "id0", false); /* But once job[0] finishes we can reuse its ID */ - block_job_unref(job[0]); + block_job_early_fail(job[0]); job[1] = do_test_id(blk[1], "id0", true); /* No job ID specified, defaults to the backend name ('drive1') */ - block_job_unref(job[1]); + block_job_early_fail(job[1]); job[1] = do_test_id(blk[1], NULL, true); /* Duplicate job ID */ @@ -133,9 +133,9 @@ static void test_job_ids(void) /* This one is valid */ job[2] = do_test_id(blk[2], "id_2", true); - block_job_unref(job[0]); - block_job_unref(job[1]); - block_job_unref(job[2]); + block_job_early_fail(job[0]); + block_job_early_fail(job[1]); + block_job_early_fail(job[2]); destroy_blk(blk[0]); destroy_blk(blk[1]); diff --git a/trace-events b/trace-events index e582d63..433865f 100644 --- a/trace-events +++ b/trace-events @@ -38,7 +38,7 @@ vm_state_notify(int running, int reason) "running %d reason %d" load_file(const char *name, const char *path) "name %s location %s" runstate_set(int new_state) "new state %d" system_wakeup_request(int reason) "reason=%d" -qemu_system_shutdown_request(void) "" +qemu_system_shutdown_request(int reason) "reason=%d" qemu_system_powerdown_request(void) "" # spice-qemu-char.c @@ -934,7 +934,7 @@ QemuCocoaView *cocoaView; { COCOA_DEBUG("QemuCocoaAppController: applicationWillTerminate\n"); - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI); exit(0); } @@ -837,7 +837,7 @@ static void sdl_refresh(DisplayChangeListener *dcl) case SDL_QUIT: if (!no_quit) { no_shutdown = 0; - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI); } break; case SDL_MOUSEMOTION: @@ -568,7 +568,7 @@ static void handle_windowevent(SDL_Event *ev) case SDL_WINDOWEVENT_CLOSE: if (!no_quit) { no_shutdown = 0; - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI); } break; case SDL_WINDOWEVENT_SHOWN: @@ -611,7 +611,7 @@ void sdl2_poll_events(struct sdl2_console *scon) case SDL_QUIT: if (!no_quit) { no_shutdown = 0; - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI); } break; case SDL_MOUSEMOTION: diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index d8183f7..b39ae74 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -1338,12 +1338,14 @@ char *socket_address_to_string(struct SocketAddress *addr, Error **errp) SocketAddress *socket_address_flatten(SocketAddressLegacy *addr_legacy) { - SocketAddress *addr = g_new(SocketAddress, 1); + SocketAddress *addr; if (!addr_legacy) { return NULL; } + addr = g_new(SocketAddress, 1); + switch (addr_legacy->type) { case SOCKET_ADDRESS_LEGACY_KIND_INET: addr->type = SOCKET_ADDRESS_TYPE_INET; @@ -1436,6 +1436,9 @@ static int usb_parse(const char *cmdline) void hmp_usb_add(Monitor *mon, const QDict *qdict) { const char *devname = qdict_get_str(qdict, "devname"); + + error_report("usb_add is deprecated, please use device_add instead"); + if (usb_device_add(devname) < 0) { error_report("could not add USB device '%s'", devname); } @@ -1444,6 +1447,9 @@ void hmp_usb_add(Monitor *mon, const QDict *qdict) void hmp_usb_del(Monitor *mon, const QDict *qdict) { const char *devname = qdict_get_str(qdict, "devname"); + + error_report("usb_del is deprecated, please use device_del instead"); + if (usb_device_del(devname) < 0) { error_report("could not delete USB device '%s'", devname); } @@ -1598,8 +1604,9 @@ void vm_state_notify(int running, RunState state) } } -static int reset_requested; -static int shutdown_requested, shutdown_signal = -1; +static ShutdownCause reset_requested; +static ShutdownCause shutdown_requested; +static int shutdown_signal; static pid_t shutdown_pid; static int powerdown_requested; static int debug_requested; @@ -1613,24 +1620,24 @@ static NotifierList wakeup_notifiers = NOTIFIER_LIST_INITIALIZER(wakeup_notifiers); static uint32_t wakeup_reason_mask = ~(1 << QEMU_WAKEUP_REASON_NONE); -int qemu_shutdown_requested_get(void) +ShutdownCause qemu_shutdown_requested_get(void) { return shutdown_requested; } -int qemu_reset_requested_get(void) +ShutdownCause qemu_reset_requested_get(void) { return reset_requested; } static int qemu_shutdown_requested(void) { - return atomic_xchg(&shutdown_requested, 0); + return atomic_xchg(&shutdown_requested, SHUTDOWN_CAUSE_NONE); } static void qemu_kill_report(void) { - if (!qtest_driver() && shutdown_signal != -1) { + if (!qtest_driver() && shutdown_signal) { if (shutdown_pid == 0) { /* This happens for eg ^C at the terminal, so it's worth * avoiding printing an odd message in that case. @@ -1644,18 +1651,19 @@ static void qemu_kill_report(void) shutdown_cmd ? shutdown_cmd : "<unknown process>"); g_free(shutdown_cmd); } - shutdown_signal = -1; + shutdown_signal = 0; } } -static int qemu_reset_requested(void) +static ShutdownCause qemu_reset_requested(void) { - int r = reset_requested; + ShutdownCause r = reset_requested; + if (r && replay_checkpoint(CHECKPOINT_RESET_REQUESTED)) { - reset_requested = 0; + reset_requested = SHUTDOWN_CAUSE_NONE; return r; } - return false; + return SHUTDOWN_CAUSE_NONE; } static int qemu_suspend_requested(void) @@ -1687,7 +1695,10 @@ static int qemu_debug_requested(void) return r; } -void qemu_system_reset(bool report) +/* + * Reset the VM. Issue an event unless @reason is SHUTDOWN_CAUSE_NONE. + */ +void qemu_system_reset(ShutdownCause reason) { MachineClass *mc; @@ -1700,8 +1711,9 @@ void qemu_system_reset(bool report) } else { qemu_devices_reset(); } - if (report) { - qapi_event_send_reset(&error_abort); + if (reason) { + qapi_event_send_reset(shutdown_caused_by_guest(reason), + &error_abort); } cpu_synchronize_all_post_reset(); } @@ -1719,7 +1731,7 @@ void qemu_system_guest_panicked(GuestPanicInformation *info) if (!no_shutdown) { qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF, !!info, info, &error_abort); - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_PANIC); } if (info) { @@ -1736,12 +1748,12 @@ void qemu_system_guest_panicked(GuestPanicInformation *info) } } -void qemu_system_reset_request(void) +void qemu_system_reset_request(ShutdownCause reason) { if (no_reboot) { - shutdown_requested = 1; + shutdown_requested = reason; } else { - reset_requested = 1; + reset_requested = reason; } cpu_stop_current(); qemu_notify_event(); @@ -1808,15 +1820,15 @@ void qemu_system_killed(int signal, pid_t pid) /* Cannot call qemu_system_shutdown_request directly because * we are in a signal handler. */ - shutdown_requested = 1; + shutdown_requested = SHUTDOWN_CAUSE_HOST_SIGNAL; qemu_notify_event(); } -void qemu_system_shutdown_request(void) +void qemu_system_shutdown_request(ShutdownCause reason) { - trace_qemu_system_shutdown_request(); - replay_shutdown_request(); - shutdown_requested = 1; + trace_qemu_system_shutdown_request(reason); + replay_shutdown_request(reason); + shutdown_requested = reason; qemu_notify_event(); } @@ -1847,24 +1859,29 @@ void qemu_system_debug_request(void) static bool main_loop_should_exit(void) { RunState r; + ShutdownCause request; + if (qemu_debug_requested()) { vm_stop(RUN_STATE_DEBUG); } if (qemu_suspend_requested()) { qemu_system_suspend(); } - if (qemu_shutdown_requested()) { + request = qemu_shutdown_requested(); + if (request) { qemu_kill_report(); - qapi_event_send_shutdown(&error_abort); + qapi_event_send_shutdown(shutdown_caused_by_guest(request), + &error_abort); if (no_shutdown) { vm_stop(RUN_STATE_SHUTDOWN); } else { return true; } } - if (qemu_reset_requested()) { + request = qemu_reset_requested(); + if (request) { pause_all_vcpus(); - qemu_system_reset(VMRESET_REPORT); + qemu_system_reset(request); resume_all_vcpus(); if (!runstate_check(RUN_STATE_RUNNING) && !runstate_check(RUN_STATE_INMIGRATE)) { @@ -1873,7 +1890,7 @@ static bool main_loop_should_exit(void) } if (qemu_wakeup_requested()) { pause_all_vcpus(); - qemu_system_reset(VMRESET_SILENT); + qemu_system_reset(SHUTDOWN_CAUSE_NONE); notifier_list_notify(&wakeup_notifiers, &wakeup_reason); wakeup_reason = QEMU_WAKEUP_REASON_NONE; resume_all_vcpus(); @@ -3759,6 +3776,8 @@ int main(int argc, char **argv, char **envp) qemu_opts_parse_noisily(olist, "usb=on", false); break; case QEMU_OPTION_usbdevice: + error_report("'-usbdevice' is deprecated, please use " + "'-device usb-...' instead"); olist = qemu_find_opts("machine"); qemu_opts_parse_noisily(olist, "usb=on", false); add_device_config(DEV_USB, optarg); @@ -4697,7 +4716,7 @@ int main(int argc, char **argv, char **envp) reading from the other reads, because timer polling functions query clock values from the log. */ replay_checkpoint(CHECKPOINT_RESET); - qemu_system_reset(VMRESET_SILENT); + qemu_system_reset(SHUTDOWN_CAUSE_NONE); register_global_state(); if (replay_mode != REPLAY_MODE_NONE) { replay_vmstate_init(); |