diff options
258 files changed, 9724 insertions, 3509 deletions
diff --git a/.shippable.yml b/.shippable.yml index 1a1fd7a..653bd75 100644 --- a/.shippable.yml +++ b/.shippable.yml @@ -5,6 +5,8 @@ env: TARGET_LIST=arm-softmmu,arm-linux-user - IMAGE=debian-arm64-cross TARGET_LIST=aarch64-softmmu,aarch64-linux-user + - IMAGE=debian-s390x-cross + TARGET_LIST=s390x-softmmu,s390x-linux-user build: pre_ci: - make docker-image-${IMAGE} @@ -707,6 +707,12 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) return 0; } +static char *bdrv_child_get_parent_desc(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + return g_strdup(bdrv_get_device_or_node_name(parent)); +} + static void bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; @@ -774,6 +780,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_file = { + .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, @@ -794,11 +801,63 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_format = { + .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_fmt_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, }; +static void bdrv_backing_attach(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + BlockDriverState *backing_hd = c->bs; + + assert(!parent->backing_blocker); + error_setg(&parent->backing_blocker, + "node is used as backing hd of '%s'", + bdrv_get_device_or_node_name(parent)); + + parent->open_flags &= ~BDRV_O_NO_BACKING; + pstrcpy(parent->backing_file, sizeof(parent->backing_file), + backing_hd->filename); + pstrcpy(parent->backing_format, sizeof(parent->backing_format), + backing_hd->drv ? backing_hd->drv->format_name : ""); + + bdrv_op_block_all(backing_hd, parent->backing_blocker); + /* Otherwise we won't be able to commit or stream */ + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, + parent->backing_blocker); + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, + parent->backing_blocker); + /* + * We do backup in 3 ways: + * 1. drive backup + * The target bs is new opened, and the source is top BDS + * 2. blockdev backup + * Both the source and the target are top BDSes. + * 3. internal backup(used for block replication) + * Both the source and the target are backing file + * + * In case 1 and 2, neither the source nor the target is the backing file. + * In case 3, we will block the top BDS, so there is only one block job + * for the top BDS and its backing chain. + */ + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, + parent->backing_blocker); + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, + parent->backing_blocker); +} + +static void bdrv_backing_detach(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + + assert(parent->backing_blocker); + bdrv_op_unblock_all(c->bs, parent->backing_blocker); + error_free(parent->backing_blocker); + parent->backing_blocker = NULL; +} + /* * Returns the options and flags that bs->backing should get, based on the * given options and flags for the parent BDS @@ -823,7 +882,10 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options, *child_flags = flags; } -static const BdrvChildRole child_backing = { +const BdrvChildRole child_backing = { + .get_parent_desc = bdrv_child_get_parent_desc, + .attach = bdrv_backing_attach, + .detach = bdrv_backing_detach, .inherit_options = bdrv_backing_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, @@ -1326,15 +1388,352 @@ static int bdrv_fill_options(QDict **options, const char *filename, return 0; } -static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) +/* + * Check whether permissions on this node can be changed in a way that + * @cumulative_perms and @cumulative_shared_perms are the new cumulative + * permissions of all its parents. This involves checking whether all necessary + * permission changes to child nodes can be performed. + * + * A call to this function must always be followed by a call to bdrv_set_perm() + * or bdrv_abort_perm_update(). + */ +static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, + uint64_t cumulative_shared_perms, Error **errp) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + int ret; + + /* Write permissions never work with read-only images */ + if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && + bdrv_is_read_only(bs)) + { + error_setg(errp, "Block node is read-only"); + return -EPERM; + } + + /* Check this node */ + if (!drv) { + return 0; + } + + if (drv->bdrv_check_perm) { + return drv->bdrv_check_perm(bs, cumulative_perms, + cumulative_shared_perms, errp); + } + + /* Drivers that never have children can omit .bdrv_child_perm() */ + if (!drv->bdrv_child_perm) { + assert(QLIST_EMPTY(&bs->children)); + return 0; + } + + /* Check all children */ + QLIST_FOREACH(c, &bs->children, next) { + uint64_t cur_perm, cur_shared; + drv->bdrv_child_perm(bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); + ret = bdrv_child_check_perm(c, cur_perm, cur_shared, errp); + if (ret < 0) { + return ret; + } + } + + return 0; +} + +/* + * Notifies drivers that after a previous bdrv_check_perm() call, the + * permission update is not performed and any preparations made for it (e.g. + * taken file locks) need to be undone. + * + * This function recursively notifies all child nodes. + */ +static void bdrv_abort_perm_update(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + + if (!drv) { + return; + } + + if (drv->bdrv_abort_perm_update) { + drv->bdrv_abort_perm_update(bs); + } + + QLIST_FOREACH(c, &bs->children, next) { + bdrv_child_abort_perm_update(c); + } +} + +static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, + uint64_t cumulative_shared_perms) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + + if (!drv) { + return; + } + + /* Update this node */ + if (drv->bdrv_set_perm) { + drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); + } + + /* Drivers that never have children can omit .bdrv_child_perm() */ + if (!drv->bdrv_child_perm) { + assert(QLIST_EMPTY(&bs->children)); + return; + } + + /* Update all children */ + QLIST_FOREACH(c, &bs->children, next) { + uint64_t cur_perm, cur_shared; + drv->bdrv_child_perm(bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); + bdrv_child_set_perm(c, cur_perm, cur_shared); + } +} + +static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm) +{ + BdrvChild *c; + uint64_t cumulative_perms = 0; + uint64_t cumulative_shared_perms = BLK_PERM_ALL; + + QLIST_FOREACH(c, &bs->parents, next_parent) { + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; + } + + *perm = cumulative_perms; + *shared_perm = cumulative_shared_perms; +} + +static char *bdrv_child_user_desc(BdrvChild *c) +{ + if (c->role->get_parent_desc) { + return c->role->get_parent_desc(c); + } + + return g_strdup("another user"); +} + +static char *bdrv_perm_names(uint64_t perm) +{ + struct perm_name { + uint64_t perm; + const char *name; + } permissions[] = { + { BLK_PERM_CONSISTENT_READ, "consistent read" }, + { BLK_PERM_WRITE, "write" }, + { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, + { BLK_PERM_RESIZE, "resize" }, + { BLK_PERM_GRAPH_MOD, "change children" }, + { 0, NULL } + }; + + char *result = g_strdup(""); + struct perm_name *p; + + for (p = permissions; p->name; p++) { + if (perm & p->perm) { + char *old = result; + result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name); + g_free(old); + } + } + + return result; +} + +/* + * Checks whether a new reference to @bs can be added if the new user requires + * @new_used_perm/@new_shared_perm as its permissions. If @ignore_child is set, + * this old reference is ignored in the calculations; this allows checking + * permission updates for an existing reference. + * + * Needs to be followed by a call to either bdrv_set_perm() or + * bdrv_abort_perm_update(). */ +static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm, + uint64_t new_shared_perm, + BdrvChild *ignore_child, Error **errp) +{ + BdrvChild *c; + uint64_t cumulative_perms = new_used_perm; + uint64_t cumulative_shared_perms = new_shared_perm; + + /* There is no reason why anyone couldn't tolerate write_unchanged */ + assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c == ignore_child) { + continue; + } + + if ((new_used_perm & c->shared_perm) != new_used_perm) { + char *user = bdrv_child_user_desc(c); + char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm); + error_setg(errp, "Conflicts with use by %s as '%s', which does not " + "allow '%s' on %s", + user, c->name, perm_names, bdrv_get_node_name(c->bs)); + g_free(user); + g_free(perm_names); + return -EPERM; + } + + if ((c->perm & new_shared_perm) != c->perm) { + char *user = bdrv_child_user_desc(c); + char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm); + error_setg(errp, "Conflicts with use by %s as '%s', which uses " + "'%s' on %s", + user, c->name, perm_names, bdrv_get_node_name(c->bs)); + g_free(user); + g_free(perm_names); + return -EPERM; + } + + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; + } + + return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms, errp); +} + +/* Needs to be followed by a call to either bdrv_child_set_perm() or + * bdrv_child_abort_perm_update(). */ +int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp) +{ + return bdrv_check_update_perm(c->bs, perm, shared, c, errp); +} + +void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared) +{ + uint64_t cumulative_perms, cumulative_shared_perms; + + c->perm = perm; + c->shared_perm = shared; + + bdrv_get_cumulative_perm(c->bs, &cumulative_perms, + &cumulative_shared_perms); + bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms); +} + +void bdrv_child_abort_perm_update(BdrvChild *c) +{ + bdrv_abort_perm_update(c->bs); +} + +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp) +{ + int ret; + + ret = bdrv_child_check_perm(c, perm, shared, errp); + if (ret < 0) { + bdrv_child_abort_perm_update(c); + return ret; + } + + bdrv_child_set_perm(c, perm, shared); + + return 0; +} + +#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ + | BLK_PERM_WRITE \ + | BLK_PERM_WRITE_UNCHANGED \ + | BLK_PERM_RESIZE) +#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH) + +void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + if (c == NULL) { + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; + return; + } + + *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) | + (c->perm & DEFAULT_PERM_UNCHANGED); + *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | + (c->shared_perm & DEFAULT_PERM_UNCHANGED); +} + +void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + bool backing = (role == &child_backing); + assert(role == &child_backing || role == &child_file); + + if (!backing) { + /* Apart from the modifications below, the same permissions are + * forwarded and left alone as for filters */ + bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared); + + /* Format drivers may touch metadata even if the guest doesn't write */ + if (!bdrv_is_read_only(bs)) { + perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; + } + + /* bs->file always needs to be consistent because of the metadata. We + * can never allow other users to resize or write to it. */ + perm |= BLK_PERM_CONSISTENT_READ; + shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); + } else { + /* We want consistent read from backing files if the parent needs it. + * No other operations are performed on backing files. */ + perm &= BLK_PERM_CONSISTENT_READ; + + /* If the parent can deal with changing data, we're okay with a + * writable and resizable backing file. */ + /* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */ + if (shared & BLK_PERM_WRITE) { + shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; + } else { + shared = 0; + } + + shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | + BLK_PERM_WRITE_UNCHANGED; + } + + *nperm = perm; + *nshared = shared; +} + +static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, + bool check_new_perm) { BlockDriverState *old_bs = child->bs; + uint64_t perm, shared_perm; if (old_bs) { if (old_bs->quiesce_counter && child->role->drained_end) { child->role->drained_end(child); } + if (child->role->detach) { + child->role->detach(child); + } QLIST_REMOVE(child, next_parent); + + /* Update permissions for old node. This is guaranteed to succeed + * because we're just taking a parent away, so we're loosening + * restrictions. */ + bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm); + bdrv_check_perm(old_bs, perm, shared_perm, &error_abort); + bdrv_set_perm(old_bs, perm, shared_perm); } child->bs = new_bs; @@ -1344,23 +1743,46 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) if (new_bs->quiesce_counter && child->role->drained_begin) { child->role->drained_begin(child); } + + bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm); + if (check_new_perm) { + bdrv_check_perm(new_bs, perm, shared_perm, &error_abort); + } + bdrv_set_perm(new_bs, perm, shared_perm); + + if (child->role->attach) { + child->role->attach(child); + } } } BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, - void *opaque) + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp) { - BdrvChild *child = g_new(BdrvChild, 1); + BdrvChild *child; + int ret; + + ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp); + if (ret < 0) { + bdrv_abort_perm_update(child_bs); + return NULL; + } + + child = g_new(BdrvChild, 1); *child = (BdrvChild) { - .bs = NULL, - .name = g_strdup(child_name), - .role = child_role, - .opaque = opaque, + .bs = NULL, + .name = g_strdup(child_name), + .role = child_role, + .perm = perm, + .shared_perm = shared_perm, + .opaque = opaque, }; - bdrv_replace_child(child, child_bs); + /* This performs the matching bdrv_set_perm() for the above check. */ + bdrv_replace_child(child, child_bs, false); return child; } @@ -1368,10 +1790,24 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, - const BdrvChildRole *child_role) + const BdrvChildRole *child_role, + Error **errp) { - BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role, - parent_bs); + BdrvChild *child; + uint64_t perm, shared_perm; + + bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); + + assert(parent_bs->drv); + parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role, + perm, shared_perm, &perm, &shared_perm); + + child = bdrv_root_attach_child(child_bs, child_name, child_role, + perm, shared_perm, parent_bs, errp); + if (child == NULL) { + return NULL; + } + QLIST_INSERT_HEAD(&parent_bs->children, child, next); return child; } @@ -1383,7 +1819,7 @@ static void bdrv_detach_child(BdrvChild *child) child->next.le_prev = NULL; } - bdrv_replace_child(child, NULL); + bdrv_replace_child(child, NULL, false); g_free(child->name); g_free(child); @@ -1447,57 +1883,28 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs) * Sets the backing file link of a BDS. A new reference is created; callers * which don't need their own reference any more must call bdrv_unref(). */ -void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp) { if (backing_hd) { bdrv_ref(backing_hd); } if (bs->backing) { - assert(bs->backing_blocker); - bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker); bdrv_unref_child(bs, bs->backing); - } else if (backing_hd) { - error_setg(&bs->backing_blocker, - "node is used as backing hd of '%s'", - bdrv_get_device_or_node_name(bs)); } if (!backing_hd) { - error_free(bs->backing_blocker); - bs->backing_blocker = NULL; bs->backing = NULL; goto out; } - bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing); - bs->open_flags &= ~BDRV_O_NO_BACKING; - pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); - pstrcpy(bs->backing_format, sizeof(bs->backing_format), - backing_hd->drv ? backing_hd->drv->format_name : ""); - bdrv_op_block_all(backing_hd, bs->backing_blocker); - /* Otherwise we won't be able to commit or stream */ - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, - bs->backing_blocker); - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, - bs->backing_blocker); - /* - * We do backup in 3 ways: - * 1. drive backup - * The target bs is new opened, and the source is top BDS - * 2. blockdev backup - * Both the source and the target are top BDSes. - * 3. internal backup(used for block replication) - * Both the source and the target are backing file - * - * In case 1 and 2, neither the source nor the target is the backing file. - * In case 3, we will block the top BDS, so there is only one block job - * for the top BDS and its backing chain. - */ - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, - bs->backing_blocker); - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, - bs->backing_blocker); + bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing, + errp); + if (!bs->backing) { + bdrv_unref(backing_hd); + } + out: bdrv_refresh_limits(bs, NULL); } @@ -1580,8 +1987,12 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, /* Hook up the backing file link; drop our reference, bs owns the * backing_hd reference now */ - bdrv_set_backing_hd(bs, backing_hd); + bdrv_set_backing_hd(bs, backing_hd, &local_err); bdrv_unref(backing_hd); + if (local_err) { + ret = -EINVAL; + goto free_exit; + } qdict_del(parent_options, bdref_key); @@ -1648,6 +2059,7 @@ BdrvChild *bdrv_open_child(const char *filename, const BdrvChildRole *child_role, bool allow_none, Error **errp) { + BdrvChild *c; BlockDriverState *bs; bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role, @@ -1656,7 +2068,13 @@ BdrvChild *bdrv_open_child(const char *filename, return NULL; } - return bdrv_attach_child(parent, bs, bdref_key, child_role); + c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp); + if (!c) { + bdrv_unref(bs); + return NULL; + } + + return c; } static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, @@ -1669,6 +2087,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, int64_t total_size; QemuOpts *opts = NULL; BlockDriverState *bs_snapshot; + Error *local_err = NULL; int ret; /* if snapshot, we create a temporary backing file and open it @@ -1718,7 +2137,12 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, * call bdrv_unref() on it), so in order to be able to return one, we have * to increase bs_snapshot's refcount here */ bdrv_ref(bs_snapshot); - bdrv_append(bs_snapshot, bs); + bdrv_append(bs_snapshot, bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto out; + } g_free(tmp_filename); return bs_snapshot; @@ -1862,9 +2286,12 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, goto fail; } if (file_bs != NULL) { - file = blk_new(); - blk_insert_bs(file, file_bs); + file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + blk_insert_bs(file, file_bs, &local_err); bdrv_unref(file_bs); + if (local_err) { + goto fail; + } qdict_put(options, "file", qstring_from_str(bdrv_get_node_name(file_bs))); @@ -2405,7 +2832,7 @@ static void bdrv_close(BlockDriverState *bs) bs->drv->bdrv_close(bs); bs->drv = NULL; - bdrv_set_backing_hd(bs, NULL); + bdrv_set_backing_hd(bs, NULL, &error_abort); if (bs->file != NULL) { bdrv_unref_child(bs, bs->file); @@ -2465,10 +2892,13 @@ static void change_parent_backing_link(BlockDriverState *from, BdrvChild *c, *next, *to_c; QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + if (c->role->stay_at_node) { + continue; + } if (c->role == &child_backing) { - /* @from is generally not allowed to be a backing file, except for - * when @to is the overlay. In that case, @from may not be replaced - * by @to as @to's backing node. */ + /* If @from is a backing file of @to, ignore the child to avoid + * creating a loop. We only want to change the pointer of other + * parents. */ QLIST_FOREACH(to_c, &to->children, next) { if (to_c == c) { break; @@ -2479,9 +2909,10 @@ static void change_parent_backing_link(BlockDriverState *from, } } - assert(c->role != &child_backing); bdrv_ref(to); - bdrv_replace_child(c, to); + /* FIXME Are we sure that bdrv_replace_child() can't run into + * &error_abort because of permissions? */ + bdrv_replace_child(c, to, true); bdrv_unref(from); } } @@ -2502,19 +2933,25 @@ static void change_parent_backing_link(BlockDriverState *from, * parents of bs_top after bdrv_append() returns. If the caller needs to keep a * reference of its own, it must call bdrv_ref(). */ -void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp) { - assert(!bdrv_requests_pending(bs_top)); - assert(!bdrv_requests_pending(bs_new)); + Error *local_err = NULL; - bdrv_ref(bs_top); + assert(!atomic_read(&bs_top->in_flight)); + assert(!atomic_read(&bs_new->in_flight)); + + bdrv_set_backing_hd(bs_new, bs_top, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } change_parent_backing_link(bs_top, bs_new); - bdrv_set_backing_hd(bs_new, bs_top); - bdrv_unref(bs_top); /* bs_new is now referenced by its new parents, we don't need the * additional reference any more. */ +out: bdrv_unref(bs_new); } @@ -2658,6 +3095,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, BlockDriverState *base, const char *backing_file_str) { BlockDriverState *new_top_bs = NULL; + Error *local_err = NULL; int ret = -EIO; if (!top->drv || !base->drv) { @@ -2690,7 +3128,13 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, if (ret) { goto exit; } - bdrv_set_backing_hd(new_top_bs, base); + + bdrv_set_backing_hd(new_top_bs, base, &local_err); + if (local_err) { + ret = -EPERM; + error_report_err(local_err); + goto exit; + } ret = 0; exit: @@ -2705,6 +3149,9 @@ int bdrv_truncate(BdrvChild *child, int64_t offset) BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; int ret; + + assert(child->perm & BLK_PERM_RESIZE); + if (!drv) return -ENOMEDIUM; if (!drv->bdrv_truncate) diff --git a/block/backup.c b/block/backup.c index fe010e7..d1ab617 100644 --- a/block/backup.c +++ b/block/backup.c @@ -618,14 +618,24 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, goto error; } - job = block_job_create(job_id, &backup_job_driver, bs, speed, - creation_flags, cb, opaque, errp); + /* job->common.len is fixed, so we can't allow resize */ + job = block_job_create(job_id, &backup_job_driver, bs, + BLK_PERM_CONSISTENT_READ, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD, + speed, creation_flags, cb, opaque, errp); if (!job) { goto error; } - job->target = blk_new(); - blk_insert_bs(job->target, target); + /* The target must match the source in size, so no resize here either */ + job->target = blk_new(BLK_PERM_WRITE, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD); + ret = blk_insert_bs(job->target, target, errp); + if (ret < 0) { + goto error; + } job->on_source_error = on_source_error; job->on_target_error = on_target_error; @@ -652,7 +662,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); } - block_job_add_bdrv(&job->common, target); + /* Required permissions are already taken with target's blk_new() */ + block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); job->common.len = len; block_job_txn_add_job(txn, &job->common); diff --git a/block/blkdebug.c b/block/blkdebug.c index 6117ce5..67e8024 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -734,6 +734,8 @@ static BlockDriver bdrv_blkdebug = { .bdrv_file_open = blkdebug_open, .bdrv_close = blkdebug_close, .bdrv_reopen_prepare = blkdebug_reopen_prepare, + .bdrv_child_perm = bdrv_filter_default_perms, + .bdrv_getlength = blkdebug_getlength, .bdrv_truncate = blkdebug_truncate, .bdrv_refresh_filename = blkdebug_refresh_filename, diff --git a/block/blkreplay.c b/block/blkreplay.c index cfc8c5b..e110211 100755 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = { .bdrv_file_open = blkreplay_open, .bdrv_close = blkreplay_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = blkreplay_getlength, .bdrv_co_preadv = blkreplay_co_preadv, diff --git a/block/blkverify.c b/block/blkverify.c index 43a940c..9a1e21c 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = { .bdrv_parse_filename = blkverify_parse_filename, .bdrv_file_open = blkverify_open, .bdrv_close = blkverify_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = blkverify_getlength, .bdrv_refresh_filename = blkverify_refresh_filename, diff --git a/block/block-backend.c b/block/block-backend.c index 492e71e..daa7908 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -59,6 +59,9 @@ struct BlockBackend { bool iostatus_enabled; BlockDeviceIoStatus iostatus; + uint64_t perm; + uint64_t shared_perm; + bool allow_write_beyond_eof; NotifierList remove_bs_notifiers, insert_bs_notifiers; @@ -77,6 +80,7 @@ static const AIOCBInfo block_backend_aiocb_info = { static void drive_info_del(DriveInfo *dinfo); static BlockBackend *bdrv_first_blk(BlockDriverState *bs); +static char *blk_get_attached_dev_id(BlockBackend *blk); /* All BlockBackends */ static QTAILQ_HEAD(, BlockBackend) block_backends = @@ -99,6 +103,25 @@ static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); +static char *blk_root_get_parent_desc(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + char *dev_id; + + if (blk->name) { + return g_strdup(blk->name); + } + + dev_id = blk_get_attached_dev_id(blk); + if (*dev_id) { + return dev_id; + } else { + /* TODO Callback into the BB owner for something more detailed */ + g_free(dev_id); + return g_strdup("a block device"); + } +} + static const char *blk_root_get_name(BdrvChild *child) { return blk_name(child->opaque); @@ -110,6 +133,7 @@ static const BdrvChildRole child_root = { .change_media = blk_root_change_media, .resize = blk_root_resize, .get_name = blk_root_get_name, + .get_parent_desc = blk_root_get_parent_desc, .drained_begin = blk_root_drained_begin, .drained_end = blk_root_drained_end, @@ -117,15 +141,23 @@ static const BdrvChildRole child_root = { /* * Create a new BlockBackend with a reference count of one. - * Store an error through @errp on failure, unless it's null. + * + * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions + * to request for a block driver node that is attached to this BlockBackend. + * @shared_perm is a bitmask which describes which permissions may be granted + * to other users of the attached node. + * Both sets of permissions can be changed later using blk_set_perm(). + * * Return the new BlockBackend on success, null on failure. */ -BlockBackend *blk_new(void) +BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) { BlockBackend *blk; blk = g_new0(BlockBackend, 1); blk->refcnt = 1; + blk->perm = perm; + blk->shared_perm = shared_perm; blk_set_enable_write_cache(blk, true); qemu_co_queue_init(&blk->public.throttled_reqs[0]); @@ -155,15 +187,33 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, { BlockBackend *blk; BlockDriverState *bs; + uint64_t perm; + + /* blk_new_open() is mainly used in .bdrv_create implementations and the + * tools where sharing isn't a concern because the BDS stays private, so we + * just request permission according to the flags. + * + * The exceptions are xen_disk and blockdev_init(); in these cases, the + * caller of blk_new_open() doesn't make use of the permissions, but they + * shouldn't hurt either. We can still share everything here because the + * guest devices will add their own blockers if they can't share. */ + perm = BLK_PERM_CONSISTENT_READ; + if (flags & BDRV_O_RDWR) { + perm |= BLK_PERM_WRITE; + } + if (flags & BDRV_O_RESIZE) { + perm |= BLK_PERM_RESIZE; + } - blk = blk_new(); + blk = blk_new(perm, BLK_PERM_ALL); bs = bdrv_open(filename, reference, options, flags, errp); if (!bs) { blk_unref(blk); return NULL; } - blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk); + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + perm, BLK_PERM_ALL, blk, &error_abort); return blk; } @@ -495,16 +545,49 @@ void blk_remove_bs(BlockBackend *blk) /* * Associates a new BlockDriverState with @blk. */ -void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs) +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + blk->perm, blk->shared_perm, blk, errp); + if (blk->root == NULL) { + return -EPERM; + } bdrv_ref(bs); - blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk); notifier_list_notify(&blk->insert_bs_notifiers, blk); if (blk->public.throttle_state) { throttle_timers_attach_aio_context( &blk->public.throttle_timers, bdrv_get_aio_context(bs)); } + + return 0; +} + +/* + * Sets the permission bitmasks that the user of the BlockBackend needs. + */ +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp) +{ + int ret; + + if (blk->root) { + ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp); + if (ret < 0) { + return ret; + } + } + + blk->perm = perm; + blk->shared_perm = shared_perm; + + return 0; +} + +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) +{ + *perm = blk->perm; + *shared_perm = blk->shared_perm; } static int blk_do_attach_dev(BlockBackend *blk, void *dev) @@ -553,6 +636,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev) blk->dev_ops = NULL; blk->dev_opaque = NULL; blk->guest_block_size = 512; + blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort); blk_unref(blk); } @@ -620,19 +704,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, /* * Notify @blk's attached device model of media change. - * If @load is true, notify of media load. - * Else, notify of media eject. + * + * If @load is true, notify of media load. This action can fail, meaning that + * the medium cannot be loaded. @errp is set then. + * + * If @load is false, notify of media eject. This can never fail. + * * Also send DEVICE_TRAY_MOVED events as appropriate. */ -void blk_dev_change_media_cb(BlockBackend *blk, bool load) +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp) { if (blk->dev_ops && blk->dev_ops->change_media_cb) { bool tray_was_open, tray_is_open; + Error *local_err = NULL; assert(!blk->legacy_dev); tray_was_open = blk_dev_is_tray_open(blk); - blk->dev_ops->change_media_cb(blk->dev_opaque, load); + blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err); + if (local_err) { + assert(load == true); + error_propagate(errp, local_err); + return; + } tray_is_open = blk_dev_is_tray_open(blk); if (tray_was_open != tray_is_open) { @@ -646,7 +740,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load) static void blk_root_change_media(BdrvChild *child, bool load) { - blk_dev_change_media_cb(child->opaque, load); + blk_dev_change_media_cb(child->opaque, load, NULL); } /* diff --git a/block/bochs.c b/block/bochs.c index 7dd2ac4..516da56 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -293,6 +293,7 @@ static BlockDriver bdrv_bochs = { .instance_size = sizeof(BDRVBochsState), .bdrv_probe = bochs_probe, .bdrv_open = bochs_open, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_refresh_limits = bochs_refresh_limits, .bdrv_co_preadv = bochs_co_preadv, .bdrv_close = bochs_close, diff --git a/block/cloop.c b/block/cloop.c index 877c9b0..a6c7b9d 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -290,6 +290,7 @@ static BlockDriver bdrv_cloop = { .instance_size = sizeof(BDRVCloopState), .bdrv_probe = cloop_probe, .bdrv_open = cloop_open, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_refresh_limits = cloop_refresh_limits, .bdrv_co_preadv = cloop_co_preadv, .bdrv_close = cloop_close, diff --git a/block/commit.c b/block/commit.c index c284e85..22a0a4d 100644 --- a/block/commit.c +++ b/block/commit.c @@ -36,6 +36,7 @@ typedef struct CommitBlockJob { BlockJob common; RateLimit limit; BlockDriverState *active; + BlockDriverState *commit_top_bs; BlockBackend *top; BlockBackend *base; BlockdevOnError on_error; @@ -83,12 +84,23 @@ static void commit_complete(BlockJob *job, void *opaque) BlockDriverState *active = s->active; BlockDriverState *top = blk_bs(s->top); BlockDriverState *base = blk_bs(s->base); - BlockDriverState *overlay_bs = bdrv_find_overlay(active, top); + BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs); int ret = data->ret; + bool remove_commit_top_bs = false; + + /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before + * the normal backing chain can be restored. */ + blk_unref(s->base); if (!block_job_is_cancelled(&s->common) && ret == 0) { /* success */ - ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str); + ret = bdrv_drop_intermediate(active, s->commit_top_bs, base, + s->backing_file_str); + } else if (overlay_bs) { + /* XXX Can (or should) we somehow keep 'consistent read' blocked even + * after the failed/cancelled commit job is gone? If we already wrote + * something to base, the intermediate images aren't valid any more. */ + remove_commit_top_bs = true; } /* restore base open flags here if appropriate (e.g., change the base back @@ -102,9 +114,15 @@ static void commit_complete(BlockJob *job, void *opaque) } g_free(s->backing_file_str); blk_unref(s->top); - blk_unref(s->base); block_job_completed(&s->common, ret); g_free(data); + + /* If bdrv_drop_intermediate() didn't already do that, remove the commit + * filter driver from the backing chain. Do this as the final step so that + * the 'consistent read' permission can be granted. */ + if (remove_commit_top_bs) { + bdrv_set_backing_hd(overlay_bs, top, &error_abort); + } } static void coroutine_fn commit_run(void *opaque) @@ -208,10 +226,38 @@ static const BlockJobDriver commit_job_driver = { .start = commit_run, }; +static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); +} + +static void bdrv_commit_top_close(BlockDriverState *bs) +{ +} + +static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + *nperm = 0; + *nshared = BLK_PERM_ALL; +} + +/* Dummy node that provides consistent read to its users without requiring it + * from its backing file and that allows writes on the backing file chain. */ +static BlockDriver bdrv_commit_top = { + .format_name = "commit_top", + .bdrv_co_preadv = bdrv_commit_top_preadv, + .bdrv_close = bdrv_commit_top_close, + .bdrv_child_perm = bdrv_commit_top_child_perm, +}; + void commit_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, BlockdevOnError on_error, const char *backing_file_str, - Error **errp) + const char *filter_node_name, Error **errp) { CommitBlockJob *s; BlockReopenQueue *reopen_queue = NULL; @@ -219,7 +265,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, int orig_base_flags; BlockDriverState *iter; BlockDriverState *overlay_bs; + BlockDriverState *commit_top_bs = NULL; Error *local_err = NULL; + int ret; assert(top != bs); if (top == base) { @@ -234,8 +282,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, return; } - s = block_job_create(job_id, &commit_job_driver, bs, speed, - BLOCK_JOB_DEFAULT, NULL, NULL, errp); + s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL, + speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); if (!s) { return; } @@ -256,30 +304,70 @@ void commit_start(const char *job_id, BlockDriverState *bs, bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); - block_job_unref(&s->common); - return; + goto fail; } } + /* Insert commit_top block node above top, so we can block consistent read + * on the backing chain below it */ + commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0, + errp); + if (commit_top_bs == NULL) { + goto fail; + } + + bdrv_set_backing_hd(commit_top_bs, top, &error_abort); + bdrv_set_backing_hd(overlay_bs, commit_top_bs, &error_abort); + + s->commit_top_bs = commit_top_bs; + bdrv_unref(commit_top_bs); /* Block all nodes between top and base, because they will * disappear from the chain after this operation. */ assert(bdrv_chain_contains(top, base)); - for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + for (iter = top; iter != base; iter = backing_bs(iter)) { + /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves + * at s->base (if writes are blocked for a node, they are also blocked + * for its backing file). The other options would be a second filter + * driver above s->base. */ + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, + errp); + if (ret < 0) { + goto fail; + } + } + + ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); + if (ret < 0) { + goto fail; } + /* overlay_bs must be blocked because it needs to be modified to - * update the backing image string, but if it's the root node then - * don't block it again */ - if (bs != overlay_bs) { - block_job_add_bdrv(&s->common, overlay_bs); + * update the backing image string. */ + ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs, + BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp); + if (ret < 0) { + goto fail; } - s->base = blk_new(); - blk_insert_bs(s->base, base); + s->base = blk_new(BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_RESIZE, + BLK_PERM_CONSISTENT_READ + | BLK_PERM_GRAPH_MOD + | BLK_PERM_WRITE_UNCHANGED); + ret = blk_insert_bs(s->base, base, errp); + if (ret < 0) { + goto fail; + } - s->top = blk_new(); - blk_insert_bs(s->top, top); + /* Required permissions are already taken with block_job_add_bdrv() */ + s->top = blk_new(0, BLK_PERM_ALL); + blk_insert_bs(s->top, top, errp); + if (ret < 0) { + goto fail; + } s->active = bs; @@ -292,6 +380,19 @@ void commit_start(const char *job_id, BlockDriverState *bs, trace_commit_start(bs, base, top, s); block_job_start(&s->common); + return; + +fail: + if (s->base) { + blk_unref(s->base); + } + if (s->top) { + blk_unref(s->top); + } + if (commit_top_bs) { + bdrv_set_backing_hd(overlay_bs, top, &error_abort); + } + block_job_unref(&s->common); } @@ -301,11 +402,14 @@ void commit_start(const char *job_id, BlockDriverState *bs, int bdrv_commit(BlockDriverState *bs) { BlockBackend *src, *backing; + BlockDriverState *backing_file_bs = NULL; + BlockDriverState *commit_top_bs = NULL; BlockDriver *drv = bs->drv; int64_t sector, total_sectors, length, backing_length; int n, ro, open_flags; int ret = 0; uint8_t *buf = NULL; + Error *local_err = NULL; if (!drv) return -ENOMEDIUM; @@ -328,11 +432,33 @@ int bdrv_commit(BlockDriverState *bs) } } - src = blk_new(); - blk_insert_bs(src, bs); + src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); - backing = blk_new(); - blk_insert_bs(backing, bs->backing->bs); + ret = blk_insert_bs(src, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto ro_cleanup; + } + + /* Insert commit_top block node above backing, so we can write to it */ + backing_file_bs = backing_bs(bs); + + commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR, + &local_err); + if (commit_top_bs == NULL) { + error_report_err(local_err); + goto ro_cleanup; + } + + bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort); + bdrv_set_backing_hd(bs, commit_top_bs, &error_abort); + + ret = blk_insert_bs(backing, backing_file_bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto ro_cleanup; + } length = blk_getlength(src); if (length < 0) { @@ -404,8 +530,12 @@ int bdrv_commit(BlockDriverState *bs) ro_cleanup: qemu_vfree(buf); - blk_unref(src); blk_unref(backing); + if (backing_file_bs) { + bdrv_set_backing_hd(bs, backing_file_bs, &error_abort); + } + bdrv_unref(commit_top_bs); + blk_unref(src); if (ro) { /* ignoring error return here */ diff --git a/block/crypto.c b/block/crypto.c index 7cb2ff2..4a20388 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -628,6 +628,7 @@ BlockDriver bdrv_crypto_luks = { .bdrv_probe = block_crypto_probe_luks, .bdrv_open = block_crypto_open_luks, .bdrv_close = block_crypto_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = block_crypto_create_luks, .bdrv_truncate = block_crypto_truncate, .create_opts = &block_crypto_create_opts_luks, diff --git a/block/dmg.c b/block/dmg.c index 8e387cd..a7d25fc 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -697,6 +697,7 @@ static BlockDriver bdrv_dmg = { .bdrv_probe = dmg_probe, .bdrv_open = dmg_open, .bdrv_refresh_limits = dmg_refresh_limits, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_preadv = dmg_co_preadv, .bdrv_close = dmg_close, }; @@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset, return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov); } -static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, +static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov) { + BlockDriverState *bs = child->bs; + /* Perform I/O through a temporary buffer so that users who scribble over * their read buffer while the operation is in progress do not end up * modifying the image file. This is critical for zero-copy guest I/O @@ -943,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, size_t skip_bytes; int ret; + assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); + /* Cover entire cluster so no additional backing file I/O is required when * allocating cluster in the image file. */ @@ -1001,10 +1005,11 @@ err: * handles copy on read, zeroing after EOF, and fragmentation of large * reads; any other features must be implemented by the caller. */ -static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, +static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { + BlockDriverState *bs = child->bs; int64_t total_bytes, max_bytes; int ret = 0; uint64_t bytes_remaining = bytes; @@ -1050,7 +1055,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, } if (!ret || pnum != nb_sectors) { - ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov); + ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov); goto out; } } @@ -1158,7 +1163,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, } tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); - ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, + ret = bdrv_aligned_preadv(child, &req, offset, bytes, align, use_local_qiov ? &local_qiov : qiov, flags); tracked_request_end(&req); @@ -1306,10 +1311,11 @@ fail: * Forwards an already correctly aligned write request to the BlockDriver, * after possibly fragmenting it. */ -static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, +static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { + BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; bool waited; int ret; @@ -1332,6 +1338,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, assert(!waited || !req->serialising); assert(req->overlap_offset <= offset); assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); + assert(child->perm & BLK_PERM_WRITE); + assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); @@ -1397,12 +1405,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, return ret; } -static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, +static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, BdrvRequestFlags flags, BdrvTrackedRequest *req) { + BlockDriverState *bs = child->bs; uint8_t *buf = NULL; QEMUIOVector local_qiov; struct iovec iov; @@ -1430,7 +1439,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, mark_request_serialising(req, align); wait_serialising_requests(req); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align, + ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align, align, &local_qiov, 0); if (ret < 0) { goto fail; @@ -1438,7 +1447,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); memset(buf + head_padding_bytes, 0, zero_bytes); - ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align, + ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align, align, &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); if (ret < 0) { @@ -1452,7 +1461,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, if (bytes >= align) { /* Write the aligned part in the middle. */ uint64_t aligned_bytes = bytes & ~(align - 1); - ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align, + ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, NULL, flags); if (ret < 0) { goto fail; @@ -1468,7 +1477,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, mark_request_serialising(req, align); wait_serialising_requests(req); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(bs, req, offset, align, + ret = bdrv_aligned_preadv(child, req, offset, align, align, &local_qiov, 0); if (ret < 0) { goto fail; @@ -1476,7 +1485,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); memset(buf, 0, bytes); - ret = bdrv_aligned_pwritev(bs, req, offset, align, align, + ret = bdrv_aligned_pwritev(child, req, offset, align, align, &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); } fail: @@ -1523,7 +1532,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE); if (!qiov) { - ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req); + ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req); goto out; } @@ -1542,7 +1551,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, qemu_iovec_init_external(&head_qiov, &head_iov, 1); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, + ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, align, &head_qiov, 0); if (ret < 0) { goto fail; @@ -1584,8 +1593,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, - align, &tail_qiov, 0); + ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), + align, align, &tail_qiov, 0); if (ret < 0) { goto fail; } @@ -1603,7 +1612,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, bytes = ROUND_UP(bytes, align); } - ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align, + ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, use_local_qiov ? &local_qiov : qiov, flags); diff --git a/block/mirror.c b/block/mirror.c index 1b34b36..57f26c3 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -38,7 +38,10 @@ typedef struct MirrorBlockJob { BlockJob common; RateLimit limit; BlockBackend *target; + BlockDriverState *mirror_top_bs; + BlockDriverState *source; BlockDriverState *base; + /* The name of the graph node to replace */ char *replaces; /* The BDS to replace */ @@ -327,7 +330,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s, static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { - BlockDriverState *source = blk_bs(s->common.blk); + BlockDriverState *source = s->source; int64_t sector_num, first_chunk; uint64_t delay_ns = 0; /* At least the first dirty chunk is mirrored in one iteration. */ @@ -497,12 +500,30 @@ static void mirror_exit(BlockJob *job, void *opaque) MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); MirrorExitData *data = opaque; AioContext *replace_aio_context = NULL; - BlockDriverState *src = blk_bs(s->common.blk); + BlockDriverState *src = s->source; BlockDriverState *target_bs = blk_bs(s->target); + BlockDriverState *mirror_top_bs = s->mirror_top_bs; + Error *local_err = NULL; /* Make sure that the source BDS doesn't go away before we called * block_job_completed(). */ bdrv_ref(src); + bdrv_ref(mirror_top_bs); + + /* We don't access the source any more. Dropping any WRITE/RESIZE is + * required before it could become a backing file of target_bs. */ + bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, + &error_abort); + if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { + BlockDriverState *backing = s->is_none_mode ? src : s->base; + if (backing_bs(target_bs) != backing) { + bdrv_set_backing_hd(target_bs, backing, &local_err); + if (local_err) { + error_report_err(local_err); + data->ret = -EPERM; + } + } + } if (s->to_replace) { replace_aio_context = bdrv_get_aio_context(s->to_replace); @@ -524,10 +545,6 @@ static void mirror_exit(BlockJob *job, void *opaque) bdrv_drained_begin(target_bs); bdrv_replace_in_backing_chain(to_replace, target_bs); bdrv_drained_end(target_bs); - - /* We just changed the BDS the job BB refers to */ - blk_remove_bs(job->blk); - blk_insert_bs(job->blk, src); } if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); @@ -540,9 +557,26 @@ static void mirror_exit(BlockJob *job, void *opaque) g_free(s->replaces); blk_unref(s->target); s->target = NULL; + + /* Remove the mirror filter driver from the graph. Before this, get rid of + * the blockers on the intermediate nodes so that the resulting state is + * valid. */ + block_job_remove_all_bdrv(job); + bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs)); + + /* We just changed the BDS the job BB refers to (with either or both of the + * bdrv_replace_in_backing_chain() calls), so switch the BB back so the + * cleanup does the right thing. We don't need any permissions any more + * now. */ + blk_remove_bs(job->blk); + blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); + blk_insert_bs(job->blk, mirror_top_bs, &error_abort); + block_job_completed(&s->common, data->ret); + g_free(data); bdrv_drained_end(src); + bdrv_unref(mirror_top_bs); bdrv_unref(src); } @@ -562,7 +596,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) { int64_t sector_num, end; BlockDriverState *base = s->base; - BlockDriverState *bs = blk_bs(s->common.blk); + BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); int ret, n; @@ -644,7 +678,7 @@ static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; - BlockDriverState *bs = blk_bs(s->common.blk); + BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); bool need_drain = true; int64_t length; @@ -876,9 +910,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) static void mirror_complete(BlockJob *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); - BlockDriverState *src, *target; + BlockDriverState *target; - src = blk_bs(job->blk); target = blk_bs(s->target); if (!s->synced) { @@ -910,6 +943,10 @@ static void mirror_complete(BlockJob *job, Error **errp) replace_aio_context = bdrv_get_aio_context(s->to_replace); aio_context_acquire(replace_aio_context); + /* TODO Translate this into permission system. Current definition of + * GRAPH_MOD would require to request it for the parents; they might + * not even be BlockDriverStates, however, so a BdrvChild can't address + * them. May need redefinition of GRAPH_MOD. */ error_setg(&s->replace_blocker, "block device is in use by block-job-complete"); bdrv_op_block_all(s->to_replace, s->replace_blocker); @@ -918,13 +955,6 @@ static void mirror_complete(BlockJob *job, Error **errp) aio_context_release(replace_aio_context); } - if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { - BlockDriverState *backing = s->is_none_mode ? src : s->base; - if (backing_bs(target) != backing) { - bdrv_set_backing_hd(target, backing); - } - } - s->should_complete = true; block_job_enter(&s->common); } @@ -980,6 +1010,77 @@ static const BlockJobDriver commit_active_job_driver = { .drain = mirror_drain, }; +static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); +} + +static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); +} + +static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) +{ + return bdrv_co_flush(bs->backing->bs); +} + +static int64_t coroutine_fn bdrv_mirror_top_get_block_status( + BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, + BlockDriverState **file) +{ + *pnum = nb_sectors; + *file = bs->backing->bs; + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | + (sector_num << BDRV_SECTOR_BITS); +} + +static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int count, BdrvRequestFlags flags) +{ + return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags); +} + +static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, + int64_t offset, int count) +{ + return bdrv_co_pdiscard(bs->backing->bs, offset, count); +} + +static void bdrv_mirror_top_close(BlockDriverState *bs) +{ +} + +static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + /* Must be able to forward guest writes to the real image */ + *nperm = 0; + if (perm & BLK_PERM_WRITE) { + *nperm |= BLK_PERM_WRITE; + } + + *nshared = BLK_PERM_ALL; +} + +/* Dummy node that provides consistent read to its users without requiring it + * from its backing file and that allows writes on the backing file chain. */ +static BlockDriver bdrv_mirror_top = { + .format_name = "mirror_top", + .bdrv_co_preadv = bdrv_mirror_top_preadv, + .bdrv_co_pwritev = bdrv_mirror_top_pwritev, + .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes, + .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard, + .bdrv_co_flush = bdrv_mirror_top_flush, + .bdrv_co_get_block_status = bdrv_mirror_top_get_block_status, + .bdrv_close = bdrv_mirror_top_close, + .bdrv_child_perm = bdrv_mirror_top_child_perm, +}; + static void mirror_start_job(const char *job_id, BlockDriverState *bs, int creation_flags, BlockDriverState *target, const char *replaces, int64_t speed, @@ -992,9 +1093,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, void *opaque, Error **errp, const BlockJobDriver *driver, bool is_none_mode, BlockDriverState *base, - bool auto_complete) + bool auto_complete, const char *filter_node_name) { MirrorBlockJob *s; + BlockDriverState *mirror_top_bs; + bool target_graph_mod; + bool target_is_backing; + Error *local_err = NULL; + int ret; if (granularity == 0) { granularity = bdrv_get_default_bitmap_granularity(target); @@ -1011,14 +1117,62 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, buf_size = DEFAULT_MIRROR_BUF_SIZE; } - s = block_job_create(job_id, driver, bs, speed, creation_flags, - cb, opaque, errp); - if (!s) { + /* In the case of active commit, add dummy driver to provide consistent + * reads on the top, while disabling it in the intermediate nodes, and make + * the backing chain writable. */ + mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name, + BDRV_O_RDWR, errp); + if (mirror_top_bs == NULL) { + return; + } + mirror_top_bs->total_sectors = bs->total_sectors; + + /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep + * it alive until block_job_create() even if bs has no parent. */ + bdrv_ref(mirror_top_bs); + bdrv_drained_begin(bs); + bdrv_append(mirror_top_bs, bs, &local_err); + bdrv_drained_end(bs); + + if (local_err) { + bdrv_unref(mirror_top_bs); + error_propagate(errp, local_err); return; } - s->target = blk_new(); - blk_insert_bs(s->target, target); + /* Make sure that the source is not resized while the job is running */ + s = block_job_create(job_id, driver, mirror_top_bs, + BLK_PERM_CONSISTENT_READ, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed, + creation_flags, cb, opaque, errp); + bdrv_unref(mirror_top_bs); + if (!s) { + goto fail; + } + s->source = bs; + s->mirror_top_bs = mirror_top_bs; + + /* No resize for the target either; while the mirror is still running, a + * consistent read isn't necessarily possible. We could possibly allow + * writes and graph modifications, though it would likely defeat the + * purpose of a mirror, so leave them blocked for now. + * + * In the case of active commit, things look a bit different, though, + * because the target is an already populated backing file in active use. + * We can allow anything except resize there.*/ + target_is_backing = bdrv_chain_contains(bs, target); + target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN); + s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE | + (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0), + BLK_PERM_WRITE_UNCHANGED | + (target_is_backing ? BLK_PERM_CONSISTENT_READ | + BLK_PERM_WRITE | + BLK_PERM_GRAPH_MOD : 0)); + ret = blk_insert_bs(s->target, target, errp); + if (ret < 0) { + goto fail; + } s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; @@ -1041,18 +1195,40 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, return; } - block_job_add_bdrv(&s->common, target); + /* Required permissions are already taken with blk_new() */ + block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); + /* In commit_active_start() all intermediate nodes disappear, so * any jobs in them must be blocked */ - if (bdrv_chain_contains(bs, target)) { + if (target_is_backing) { BlockDriverState *iter; for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + /* XXX BLK_PERM_WRITE needs to be allowed so we don't block + * ourselves at s->base (if writes are blocked for a node, they are + * also blocked for its backing file). The other options would be a + * second filter driver above s->base (== target). */ + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, + errp); + if (ret < 0) { + goto fail; + } } } trace_mirror_start(bs, s, opaque); block_job_start(&s->common); + return; + +fail: + if (s) { + g_free(s->replaces); + blk_unref(s->target); + block_job_unref(&s->common); + } + + bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs)); } void mirror_start(const char *job_id, BlockDriverState *bs, @@ -1061,7 +1237,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, Error **errp) + bool unmap, const char *filter_node_name, Error **errp) { bool is_none_mode; BlockDriverState *base; @@ -1075,12 +1251,14 @@ void mirror_start(const char *job_id, BlockDriverState *bs, mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces, speed, granularity, buf_size, backing_mode, on_source_error, on_target_error, unmap, NULL, NULL, errp, - &mirror_job_driver, is_none_mode, base, false); + &mirror_job_driver, is_none_mode, base, false, + filter_node_name); } void commit_active_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, int creation_flags, int64_t speed, BlockdevOnError on_error, + const char *filter_node_name, BlockCompletionFunc *cb, void *opaque, Error **errp, bool auto_complete) { @@ -1096,7 +1274,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN, on_error, on_error, true, cb, opaque, &local_err, - &commit_active_job_driver, false, base, auto_complete); + &commit_active_job_driver, false, base, auto_complete, + filter_node_name); if (local_err) { error_propagate(errp, local_err); goto error_restore_flags; diff --git a/block/parallels.c b/block/parallels.c index b2ec09f..19935e2 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -488,7 +488,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) } file = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (file == NULL) { error_propagate(errp, local_err); return -EIO; @@ -762,6 +763,7 @@ static BlockDriver bdrv_parallels = { .bdrv_probe = parallels_probe, .bdrv_open = parallels_open, .bdrv_close = parallels_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_get_block_status = parallels_co_get_block_status, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_flush_to_os = parallels_co_flush_to_os, diff --git a/block/qcow.c b/block/qcow.c index 038b05a..9d6ac83 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -823,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) } qcow_blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (qcow_blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -1052,6 +1053,7 @@ static BlockDriver bdrv_qcow = { .bdrv_probe = qcow_probe, .bdrv_open = qcow_open, .bdrv_close = qcow_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_reopen_prepare = qcow_reopen_prepare, .bdrv_create = qcow_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, diff --git a/block/qcow2.c b/block/qcow2.c index 21e6142..6a92d2e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2202,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); return -EIO; @@ -2266,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow2")); blk = blk_new_open(filename, NULL, options, - BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -3113,6 +3115,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, uint64_t cluster_size = s->cluster_size; bool encrypt; int refcount_bits = s->refcount_bits; + Error *local_err = NULL; int ret; QemuOptDesc *desc = opts->list->desc; Qcow2AmendHelperCBInfo helper_cb_info; @@ -3262,11 +3265,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } if (new_size) { - BlockBackend *blk = blk_new(); - blk_insert_bs(blk, bs); + BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + blk_unref(blk); + return ret; + } + ret = blk_truncate(blk, new_size); blk_unref(blk); - if (ret < 0) { return ret; } @@ -3403,6 +3411,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_reopen_commit = qcow2_reopen_commit, .bdrv_reopen_abort = qcow2_reopen_abort, .bdrv_join_options = qcow2_join_options, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = qcow2_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = qcow2_co_get_block_status, diff --git a/block/qed.c b/block/qed.c index 62a0a09..5ec7fd8 100644 --- a/block/qed.c +++ b/block/qed.c @@ -625,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); return -EIO; @@ -1704,6 +1705,7 @@ static BlockDriver bdrv_qed = { .bdrv_open = bdrv_qed_open, .bdrv_close = bdrv_qed_close, .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = bdrv_qed_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = bdrv_qed_co_get_block_status, diff --git a/block/quorum.c b/block/quorum.c index 86e2072..40205fb 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, /* We can safely add the child now */ bdrv_ref(child_bs); - child = bdrv_attach_child(bs, child_bs, indexstr, &child_format); + + child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp); + if (child == NULL) { + s->next_child_index--; + bdrv_unref(child_bs); + goto out; + } s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); s->children[s->num_children++] = child; +out: bdrv_drained_end(bs); } @@ -1126,6 +1133,8 @@ static BlockDriver bdrv_quorum = { .bdrv_add_child = quorum_add_child, .bdrv_del_child = quorum_del_child, + .bdrv_child_perm = bdrv_filter_default_perms, + .is_filter = true, .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, }; diff --git a/block/raw-format.c b/block/raw-format.c index ce34d1b..86fbc65 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -467,6 +467,7 @@ BlockDriver bdrv_raw = { .bdrv_reopen_abort = &raw_reopen_abort, .bdrv_open = &raw_open, .bdrv_close = &raw_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_create = &raw_create, .bdrv_co_preadv = &raw_co_preadv, .bdrv_co_pwritev = &raw_co_pwritev, diff --git a/block/replication.c b/block/replication.c index eff85c7..22f170f 100644 --- a/block/replication.c +++ b/block/replication.c @@ -644,7 +644,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) s->replication_state = BLOCK_REPLICATION_FAILOVER; commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs, BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT, - replication_done, bs, errp, true); + NULL, replication_done, bs, errp, true); break; default: aio_context_release(aio_context); @@ -660,6 +660,7 @@ BlockDriver bdrv_replication = { .bdrv_open = replication_open, .bdrv_close = replication_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = replication_getlength, .bdrv_co_readv = replication_co_readv, diff --git a/block/sheepdog.c b/block/sheepdog.c index 860ba61..7434710 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1609,7 +1609,7 @@ static int sd_prealloc(const char *filename, Error **errp) int ret; blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, errp); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); if (blk == NULL) { ret = -EIO; goto out_with_err_set; diff --git a/block/stream.c b/block/stream.c index 1523ba7..0113710 100644 --- a/block/stream.c +++ b/block/stream.c @@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque) StreamCompleteData *data = opaque; BlockDriverState *bs = blk_bs(job->blk); BlockDriverState *base = s->base; + Error *local_err = NULL; if (!block_job_is_cancelled(&s->common) && data->reached_end && data->ret == 0) { @@ -79,11 +80,19 @@ static void stream_complete(BlockJob *job, void *opaque) } } data->ret = bdrv_change_backing_file(bs, base_id, base_fmt); - bdrv_set_backing_hd(bs, base); + bdrv_set_backing_hd(bs, base, &local_err); + if (local_err) { + error_report_err(local_err); + data->ret = -EPERM; + goto out; + } } +out: /* Reopen the image back in read-only mode if necessary */ if (s->bs_flags != bdrv_get_flags(bs)) { + /* Give up write permissions before making it read-only */ + blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); bdrv_reopen(bs, s->bs_flags, NULL); } @@ -229,25 +238,35 @@ void stream_start(const char *job_id, BlockDriverState *bs, BlockDriverState *iter; int orig_bs_flags; - s = block_job_create(job_id, &stream_job_driver, bs, speed, - BLOCK_JOB_DEFAULT, NULL, NULL, errp); - if (!s) { - return; - } - /* Make sure that the image is opened in read-write mode */ orig_bs_flags = bdrv_get_flags(bs); if (!(orig_bs_flags & BDRV_O_RDWR)) { if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) { - block_job_unref(&s->common); return; } } - /* Block all intermediate nodes between bs and base, because they - * will disappear from the chain after this operation */ + /* Prevent concurrent jobs trying to modify the graph structure here, we + * already have our own plans. Also don't allow resize as the image size is + * queried only at the job start and then cached. */ + s = block_job_create(job_id, &stream_job_driver, bs, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_GRAPH_MOD, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE, + speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); + if (!s) { + goto fail; + } + + /* Block all intermediate nodes between bs and base, because they will + * disappear from the chain after this operation. The streaming job reads + * every block only once, assuming that it doesn't change, so block writes + * and resizes. */ for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED, + &error_abort); } s->base = base; @@ -257,4 +276,10 @@ void stream_start(const char *job_id, BlockDriverState *bs, s->on_error = on_error; trace_stream_start(bs, base, s); block_job_start(&s->common); + return; + +fail: + if (orig_bs_flags != bdrv_get_flags(bs)) { + bdrv_reopen(bs, s->bs_flags, NULL); + } } diff --git a/block/vdi.c b/block/vdi.c index 18b4773..9b4f70e 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -763,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -891,6 +892,7 @@ static BlockDriver bdrv_vdi = { .bdrv_open = vdi_open, .bdrv_close = vdi_close, .bdrv_reopen_prepare = vdi_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = vdi_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = vdi_co_get_block_status, diff --git a/block/vhdx.c b/block/vhdx.c index 9918ee9..052a753 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1859,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -1983,6 +1984,7 @@ static BlockDriver bdrv_vhdx = { .bdrv_open = vhdx_open, .bdrv_close = vhdx_close, .bdrv_reopen_prepare = vhdx_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_readv = vhdx_co_readv, .bdrv_co_writev = vhdx_co_writev, .bdrv_create = vhdx_create, diff --git a/block/vmdk.c b/block/vmdk.c index 9d68ec5..a9bd22b 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1703,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -2071,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) } new_blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (new_blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -2359,6 +2361,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_open = vmdk_open, .bdrv_check = vmdk_check, .bdrv_reopen_prepare = vmdk_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_preadv = vmdk_co_preadv, .bdrv_co_pwritev = vmdk_co_pwritev, .bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed, diff --git a/block/vpc.c b/block/vpc.c index d0df2a1..f591d4b 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -915,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -1067,6 +1068,7 @@ static BlockDriver bdrv_vpc = { .bdrv_open = vpc_open, .bdrv_close = vpc_close, .bdrv_reopen_prepare = vpc_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = vpc_create, .bdrv_co_preadv = vpc_co_preadv, diff --git a/block/vvfat.c b/block/vvfat.c index 7f230be..aa61c32 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3041,7 +3041,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) &error_abort); *(void**) backing->opaque = s; - bdrv_set_backing_hd(s->bs, backing); + bdrv_set_backing_hd(s->bs, backing, &error_abort); bdrv_unref(backing); return 0; @@ -3052,6 +3052,27 @@ err: return ret; } +static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + BDRVVVFATState *s = bs->opaque; + + assert(c == s->qcow || role == &child_backing); + + if (c == s->qcow) { + /* This is a private node, nobody should try to attach to it */ + *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; + *nshared = BLK_PERM_WRITE_UNCHANGED; + } else { + /* The backing file is there so 'commit' can use it. vvfat doesn't + * access it in any way. */ + *nperm = 0; + *nshared = BLK_PERM_ALL; + } +} + static void vvfat_close(BlockDriverState *bs) { BDRVVVFATState *s = bs->opaque; @@ -3077,6 +3098,7 @@ static BlockDriver bdrv_vvfat = { .bdrv_file_open = vvfat_open, .bdrv_refresh_limits = vvfat_refresh_limits, .bdrv_close = vvfat_close, + .bdrv_child_perm = vvfat_child_perm, .bdrv_co_preadv = vvfat_co_preadv, .bdrv_co_pwritev = vvfat_co_pwritev, @@ -558,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, if ((!file || !*file) && !qdict_size(bs_opts)) { BlockBackendRootState *blk_rs; - blk = blk_new(); + blk = blk_new(0, BLK_PERM_ALL); blk_rs = blk_get_root_state(blk); blk_rs->open_flags = bdrv_flags; blk_rs->read_only = read_only; @@ -1768,6 +1768,17 @@ static void external_snapshot_prepare(BlkActionState *common, if (!state->new_bs->drv->supports_backing) { error_setg(errp, "The snapshot does not support backing images"); + return; + } + + /* This removes our old bs and adds the new bs. This is an operation that + * can fail, so we need to do it in .prepare; undoing it for abort is + * always possible. */ + bdrv_ref(state->new_bs); + bdrv_append(state->new_bs, state->old_bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; } } @@ -1778,8 +1789,6 @@ static void external_snapshot_commit(BlkActionState *common) bdrv_set_aio_context(state->new_bs, state->aio_context); - /* This removes our old bs and adds the new bs */ - bdrv_append(state->new_bs, state->old_bs); /* We don't need (or want) to use the transactional * bdrv_reopen_multiple() across all the entries at once, because we * don't want to abort all of them if one of them fails the reopen */ @@ -1794,7 +1803,9 @@ static void external_snapshot_abort(BlkActionState *common) ExternalSnapshotState *state = DO_UPCAST(ExternalSnapshotState, common, common); if (state->new_bs) { - bdrv_unref(state->new_bs); + if (state->new_bs->backing) { + bdrv_replace_in_backing_chain(state->new_bs, state->old_bs); + } } } @@ -1805,6 +1816,7 @@ static void external_snapshot_clean(BlkActionState *common) if (state->aio_context) { bdrv_drained_end(state->old_bs); aio_context_release(state->aio_context); + bdrv_unref(state->new_bs); } } @@ -2311,7 +2323,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id, } if (!locked || force) { - blk_dev_change_media_cb(blk, false); + blk_dev_change_media_cb(blk, false, &error_abort); } if (locked && !force) { @@ -2349,6 +2361,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device, Error **errp) { BlockBackend *blk; + Error *local_err = NULL; device = has_device ? device : NULL; id = has_id ? id : NULL; @@ -2372,7 +2385,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device, return; } - blk_dev_change_media_cb(blk, true); + blk_dev_change_media_cb(blk, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } void qmp_x_blockdev_remove_medium(bool has_device, const char *device, @@ -2425,7 +2442,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device, * called at all); therefore, the medium needs to be ejected here. * Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load * value passed here (i.e. false). */ - blk_dev_change_media_cb(blk, false); + blk_dev_change_media_cb(blk, false, &error_abort); } out: @@ -2435,7 +2452,9 @@ out: static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, BlockDriverState *bs, Error **errp) { + Error *local_err = NULL; bool has_device; + int ret; /* For BBs without a device, we can exchange the BDS tree at will */ has_device = blk_get_attached_dev(blk); @@ -2455,7 +2474,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, return; } - blk_insert_bs(blk, bs); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + return; + } if (!blk_dev_has_tray(blk)) { /* For tray-less devices, blockdev-close-tray is a no-op (or may not be @@ -2463,7 +2485,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, * slot here. * Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load * value passed here (i.e. true). */ - blk_dev_change_media_cb(blk, true); + blk_dev_change_media_cb(blk, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk_remove_bs(blk); + return; + } } } @@ -2890,8 +2917,11 @@ void qmp_block_resize(bool has_device, const char *device, goto out; } - blk = blk_new(); - blk_insert_bs(blk, bs); + blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto out; + } /* complete all in-flight operations before resizing the device */ bdrv_drain_all(); @@ -3014,6 +3044,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, bool has_top, const char *top, bool has_backing_file, const char *backing_file, bool has_speed, int64_t speed, + bool has_filter_node_name, const char *filter_node_name, Error **errp) { BlockDriverState *bs; @@ -3029,6 +3060,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, if (!has_speed) { speed = 0; } + if (!has_filter_node_name) { + filter_node_name = NULL; + } /* Important Note: * libvirt relies on the DeviceNotFound error class in order to probe for @@ -3103,8 +3137,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, goto out; } commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, - BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL, - &local_err, false); + BLOCK_JOB_DEFAULT, speed, on_error, + filter_node_name, NULL, NULL, &local_err, false); } else { BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { @@ -3112,7 +3146,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, } commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed, on_error, has_backing_file ? backing_file : NULL, - &local_err); + filter_node_name, &local_err); } if (local_err != NULL) { error_propagate(errp, local_err); @@ -3348,6 +3382,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, bool has_on_target_error, BlockdevOnError on_target_error, bool has_unmap, bool unmap, + bool has_filter_node_name, + const char *filter_node_name, Error **errp) { @@ -3369,6 +3405,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, if (!has_unmap) { unmap = true; } + if (!has_filter_node_name) { + filter_node_name = NULL; + } if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", @@ -3398,7 +3437,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, mirror_start(job_id, bs, target, has_replaces ? replaces : NULL, speed, granularity, buf_size, sync, backing_mode, - on_source_error, on_target_error, unmap, errp); + on_source_error, on_target_error, unmap, filter_node_name, + errp); } void qmp_drive_mirror(DriveMirror *arg, Error **errp) @@ -3536,6 +3576,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) arg->has_on_source_error, arg->on_source_error, arg->has_on_target_error, arg->on_target_error, arg->has_unmap, arg->unmap, + false, NULL, &local_err); bdrv_unref(target_bs); error_propagate(errp, local_err); @@ -3554,6 +3595,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, + bool has_filter_node_name, + const char *filter_node_name, Error **errp) { BlockDriverState *bs; @@ -3585,6 +3628,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, has_on_source_error, on_source_error, has_on_target_error, on_target_error, true, true, + has_filter_node_name, filter_node_name, &local_err); error_propagate(errp, local_err); @@ -55,6 +55,19 @@ struct BlockJobTxn { static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); +static char *child_job_get_parent_desc(BdrvChild *c) +{ + BlockJob *job = c->opaque; + return g_strdup_printf("%s job '%s'", + BlockJobType_lookup[job->driver->job_type], + job->id); +} + +static const BdrvChildRole child_job = { + .get_parent_desc = child_job_get_parent_desc, + .stay_at_node = true, +}; + BlockJob *block_job_next(BlockJob *job) { if (!job) { @@ -115,19 +128,44 @@ static void block_job_detach_aio_context(void *opaque) block_job_unref(job); } -void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs) +void block_job_remove_all_bdrv(BlockJob *job) +{ + GSList *l; + for (l = job->nodes; l; l = l->next) { + BdrvChild *c = l->data; + bdrv_op_unblock_all(c->bs, job->blocker); + bdrv_root_unref_child(c); + } + g_slist_free(job->nodes); + job->nodes = NULL; +} + +int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp) { - job->nodes = g_slist_prepend(job->nodes, bs); + BdrvChild *c; + + c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm, + job, errp); + if (c == NULL) { + return -EPERM; + } + + job->nodes = g_slist_prepend(job->nodes, c); bdrv_ref(bs); bdrv_op_block_all(bs, job->blocker); + + return 0; } void *block_job_create(const char *job_id, const BlockJobDriver *driver, - BlockDriverState *bs, int64_t speed, int flags, + BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, BlockCompletionFunc *cb, void *opaque, Error **errp) { BlockBackend *blk; BlockJob *job; + int ret; if (bs->job) { error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); @@ -159,13 +197,17 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, } } - blk = blk_new(); - blk_insert_bs(blk, bs); + blk = blk_new(perm, shared_perm); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + blk_unref(blk); + return NULL; + } job = g_malloc0(driver->instance_size); error_setg(&job->blocker, "block device is in use by block job: %s", BlockJobType_lookup[driver->job_type]); - block_job_add_bdrv(job, bs); + block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); job->driver = driver; @@ -228,15 +270,9 @@ void block_job_ref(BlockJob *job) void block_job_unref(BlockJob *job) { if (--job->refcnt == 0) { - GSList *l; BlockDriverState *bs = blk_bs(job->blk); bs->job = NULL; - for (l = job->nodes; l; l = l->next) { - bs = l->data; - bdrv_op_unblock_all(bs, job->blocker); - bdrv_unref(bs); - } - g_slist_free(job->nodes); + block_job_remove_all_bdrv(job); blk_remove_aio_context_notifier(job->blk, block_job_attached_aio_context, block_job_detach_aio_context, job); diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak index fdf4089..1e3bd2b 100644 --- a/default-configs/arm-softmmu.mak +++ b/default-configs/arm-softmmu.mak @@ -42,6 +42,8 @@ CONFIG_ARM11MPCORE=y CONFIG_A9MPCORE=y CONFIG_A15MPCORE=y +CONFIG_ARM_V7M=y + CONFIG_ARM_GIC=y CONFIG_ARM_GIC_KVM=$(CONFIG_KVM) CONFIG_ARM_TIMER=y diff --git a/docs/mach-virt-graphical.cfg b/docs/mach-virt-graphical.cfg new file mode 100644 index 0000000..0fdf684 --- /dev/null +++ b/docs/mach-virt-graphical.cfg @@ -0,0 +1,281 @@ +# mach-virt - VirtIO guest (graphical console) +# ========================================================= +# +# Usage: +# +# $ qemu-system-aarch64 \ +# -nodefaults \ +# -readconfig mach-virt-graphical.cfg \ +# -cpu host +# +# You will probably need to tweak the lines marked as +# CHANGE ME before being able to use this configuration! +# +# The guest will have a selection of VirtIO devices +# tailored towards optimal performance with modern guests, +# and will be accessed through a graphical console. +# +# --------------------------------------------------------- +# +# Using -nodefaults is required to have full control over +# the virtual hardware: when it's specified, QEMU will +# populate the board with only the builtin peripherals, +# such as the PL011 UART, plus a PCI Express Root Bus; the +# user will then have to explicitly add further devices. +# +# The PCI Express Root Bus shows up in the guest as: +# +# 00:00.0 Host bridge +# +# This configuration file adds a number of other useful +# devices, more specifically: +# +# 00:01.0 Display controller +# 00.1c.* PCI bridge (PCI Express Root Ports) +# 01:00.0 SCSI storage controller +# 02:00.0 Ethernet controller +# 03:00.0 USB controller +# +# More information about these devices is available below. + + +# Machine options +# ========================================================= +# +# We use the virt machine type and enable KVM acceleration +# for better performance. +# +# Using less than 1 GiB of memory is probably not going to +# yield good performance in the guest, and might even lead +# to obscure boot issues in some cases. +# +# Unfortunately, there is no way to configure the CPU model +# in this file, so it will have to be provided on the +# command line, but we can configure the guest to use the +# same GIC version as the host. + +[machine] + type = "virt" + accel = "kvm" + gic-version = "host" + +[memory] + size = "1024" + + +# Firmware configuration +# ========================================================= +# +# There are two parts to the firmware: a read-only image +# containing the executable code, which is shared between +# guests, and a read/write variable store that is owned +# by one specific guest, exclusively, and is used to +# record information such as the UEFI boot order. +# +# For any new guest, its permanent, private variable store +# should initially be copied from the template file +# provided along with the firmware binary. +# +# Depending on the OS distribution you're using on the +# host, the name of the package containing the firmware +# binary and variable store template, as well as the paths +# to the files themselves, will be different. For example: +# +# Fedora +# edk2-aarch64 (pkg) +# /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw (bin) +# /usr/share/edk2/aarch64/vars-template-pflash.raw (var) +# +# RHEL +# AAVMF (pkg) +# /usr/share/AAVMF/AAVMF_CODE.fd (bin) +# /usr/share/AAVMF/AAVMF_VARS.fd (var) +# +# Debian/Ubuntu +# qemu-efi (pkg) +# /usr/share/AAVMF/AAVMF_CODE.fd (bin) +# /usr/share/AAVMF/AAVMF_VARS.fd (var) + +[drive "uefi-binary"] + file = "/usr/share/AAVMF/AAVMF_CODE.fd" # CHANGE ME + format = "raw" + if = "pflash" + unit = "0" + readonly = "on" + +[drive "uefi-varstore"] + file = "guest_VARS.fd" # CHANGE ME + format = "raw" + if = "pflash" + unit = "1" + + +# PCI bridge (PCI Express Root Ports) +# ========================================================= +# +# We create eight PCI Express Root Ports, and we plug them +# all into separate functions of the same slot. Some of +# them will be used by devices, the rest will remain +# available for hotplug. + +[device "pcie.1"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.0" + port = "1" + chassis = "1" + multifunction = "on" + +[device "pcie.2"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.1" + port = "2" + chassis = "2" + +[device "pcie.3"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.2" + port = "3" + chassis = "3" + +[device "pcie.4"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.3" + port = "4" + chassis = "4" + +[device "pcie.5"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.4" + port = "5" + chassis = "5" + +[device "pcie.6"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.5" + port = "6" + chassis = "6" + +[device "pcie.7"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.6" + port = "7" + chassis = "7" + +[device "pcie.8"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.7" + port = "8" + chassis = "8" + + +# SCSI storage controller (and storage) +# ========================================================= +# +# We use virtio-scsi here so that we can (hot)plug a large +# number of disks without running into issues; a SCSI disk, +# backed by a qcow2 disk image on the host's filesystem, is +# attached to it. +# +# We also create an optical disk, mostly for installation +# purposes: once the guest OS has been succesfully +# installed, the guest will no longer boot from optical +# media. If you don't want, or no longer want, to have an +# optical disk in the guest you can safely comment out +# all relevant sections below. + +[device "scsi"] + driver = "virtio-scsi-pci" + bus = "pcie.1" + addr = "00.0" + +[device "scsi-disk"] + driver = "scsi-hd" + bus = "scsi.0" + drive = "disk" + bootindex = "1" + +[drive "disk"] + file = "guest.qcow2" # CHANGE ME + format = "qcow2" + if = "none" + +[device "scsi-optical-disk"] + driver = "scsi-cd" + bus = "scsi.0" + drive = "optical-disk" + bootindex = "2" + +[drive "optical-disk"] + file = "install.iso" # CHANGE ME + format = "raw" + if = "none" + + +# Ethernet controller +# ========================================================= +# +# We use virtio-net for improved performance over emulated +# hardware; on the host side, we take advantage of user +# networking so that the QEMU process doesn't require any +# additional privileges. + +[netdev "hostnet"] + type = "user" + +[device "net"] + driver = "virtio-net-pci" + netdev = "hostnet" + bus = "pcie.2" + addr = "00.0" + + +# USB controller (and input devices) +# ========================================================= +# +# We add a virtualization-friendly USB 3.0 controller and +# a USB keyboard / USB tablet combo so that graphical +# guests can be controlled appropriately. + +[device "usb"] + driver = "nec-usb-xhci" + bus = "pcie.3" + addr = "00.0" + +[device "keyboard"] + driver = "usb-kbd" + bus = "usb.0" + +[device "tablet"] + driver = "usb-tablet" + bus = "usb.0" + + +# Display controller +# ========================================================= +# +# We use virtio-gpu because the legacy VGA framebuffer is +# very troublesome on aarch64, and virtio-gpu is the only +# video device that doesn't implement it. +# +# If you're running the guest on a remote, potentially +# headless host, you will probably want to append something +# like +# +# -display vnc=127.0.0.1:0 +# +# to the command line in order to prevent QEMU from +# creating a graphical display window on the host and +# enable remote access instead. + +[device "video"] + driver = "virtio-gpu" + bus = "pcie.0" + addr = "01.0" diff --git a/docs/mach-virt-serial.cfg b/docs/mach-virt-serial.cfg new file mode 100644 index 0000000..aee9f1c --- /dev/null +++ b/docs/mach-virt-serial.cfg @@ -0,0 +1,243 @@ +# mach-virt - VirtIO guest (serial console) +# ========================================================= +# +# Usage: +# +# $ qemu-system-aarch64 \ +# -nodefaults \ +# -readconfig mach-virt-serial.cfg \ +# -display none -serial mon:stdio \ +# -cpu host +# +# You will probably need to tweak the lines marked as +# CHANGE ME before being able to use this configuration! +# +# The guest will have a selection of VirtIO devices +# tailored towards optimal performance with modern guests, +# and will be accessed through the serial console. +# +# --------------------------------------------------------- +# +# Using -nodefaults is required to have full control over +# the virtual hardware: when it's specified, QEMU will +# populate the board with only the builtin peripherals, +# such as the PL011 UART, plus a PCI Express Root Bus; the +# user will then have to explicitly add further devices. +# +# The PCI Express Root Bus shows up in the guest as: +# +# 00:00.0 Host bridge +# +# This configuration file adds a number of other useful +# devices, more specifically: +# +# 00.1c.* PCI bridge (PCI Express Root Ports) +# 01:00.0 SCSI storage controller +# 02:00.0 Ethernet controller +# +# More information about these devices is available below. +# +# We use '-display none' to prevent QEMU from creating a +# graphical display window, which would serve no use in +# this specific configuration, and '-serial mon:stdio' to +# multiplex the guest's serial console and the QEMU monitor +# to the host's stdio; use 'Ctrl+A h' to learn how to +# switch between the two and more. + + +# Machine options +# ========================================================= +# +# We use the virt machine type and enable KVM acceleration +# for better performance. +# +# Using less than 1 GiB of memory is probably not going to +# yield good performance in the guest, and might even lead +# to obscure boot issues in some cases. +# +# Unfortunately, there is no way to configure the CPU model +# in this file, so it will have to be provided on the +# command line, but we can configure the guest to use the +# same GIC version as the host. + +[machine] + type = "virt" + accel = "kvm" + gic-version = "host" + +[memory] + size = "1024" + + +# Firmware configuration +# ========================================================= +# +# There are two parts to the firmware: a read-only image +# containing the executable code, which is shared between +# guests, and a read/write variable store that is owned +# by one specific guest, exclusively, and is used to +# record information such as the UEFI boot order. +# +# For any new guest, its permanent, private variable store +# should initially be copied from the template file +# provided along with the firmware binary. +# +# Depending on the OS distribution you're using on the +# host, the name of the package containing the firmware +# binary and variable store template, as well as the paths +# to the files themselves, will be different. For example: +# +# Fedora +# edk2-aarch64 (pkg) +# /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw (bin) +# /usr/share/edk2/aarch64/vars-template-pflash.raw (var) +# +# RHEL +# AAVMF (pkg) +# /usr/share/AAVMF/AAVMF_CODE.fd (bin) +# /usr/share/AAVMF/AAVMF_VARS.fd (var) +# +# Debian/Ubuntu +# qemu-efi (pkg) +# /usr/share/AAVMF/AAVMF_CODE.fd (bin) +# /usr/share/AAVMF/AAVMF_VARS.fd (var) + +[drive "uefi-binary"] + file = "/usr/share/AAVMF/AAVMF_CODE.fd" # CHANGE ME + format = "raw" + if = "pflash" + unit = "0" + readonly = "on" + +[drive "uefi-varstore"] + file = "guest_VARS.fd" # CHANGE ME + format = "raw" + if = "pflash" + unit = "1" + + +# PCI bridge (PCI Express Root Ports) +# ========================================================= +# +# We create eight PCI Express Root Ports, and we plug them +# all into separate functions of the same slot. Some of +# them will be used by devices, the rest will remain +# available for hotplug. + +[device "pcie.1"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.0" + port = "1" + chassis = "1" + multifunction = "on" + +[device "pcie.2"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.1" + port = "2" + chassis = "2" + +[device "pcie.3"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.2" + port = "3" + chassis = "3" + +[device "pcie.4"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.3" + port = "4" + chassis = "4" + +[device "pcie.5"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.4" + port = "5" + chassis = "5" + +[device "pcie.6"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.5" + port = "6" + chassis = "6" + +[device "pcie.7"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.6" + port = "7" + chassis = "7" + +[device "pcie.8"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.7" + port = "8" + chassis = "8" + + +# SCSI storage controller (and storage) +# ========================================================= +# +# We use virtio-scsi here so that we can (hot)plug a large +# number of disks without running into issues; a SCSI disk, +# backed by a qcow2 disk image on the host's filesystem, is +# attached to it. +# +# We also create an optical disk, mostly for installation +# purposes: once the guest OS has been succesfully +# installed, the guest will no longer boot from optical +# media. If you don't want, or no longer want, to have an +# optical disk in the guest you can safely comment out +# all relevant sections below. + +[device "scsi"] + driver = "virtio-scsi-pci" + bus = "pcie.1" + addr = "00.0" + +[device "scsi-disk"] + driver = "scsi-hd" + bus = "scsi.0" + drive = "disk" + bootindex = "1" + +[drive "disk"] + file = "guest.qcow2" # CHANGE ME + format = "qcow2" + if = "none" + +[device "scsi-optical-disk"] + driver = "scsi-cd" + bus = "scsi.0" + drive = "optical-disk" + bootindex = "2" + +[drive "optical-disk"] + file = "install.iso" # CHANGE ME + format = "raw" + if = "none" + + +# Ethernet controller +# ========================================================= +# +# We use virtio-net for improved performance over emulated +# hardware; on the host side, we take advantage of user +# networking so that the QEMU process doesn't require any +# additional privileges. + +[netdev "hostnet"] + type = "user" + +[device "net"] + driver = "virtio-net-pci" + netdev = "hostnet" + bus = "pcie.2" + addr = "00.0" diff --git a/docs/migration.txt b/docs/migration.txt index 6503c17..1b940a8 100644 --- a/docs/migration.txt +++ b/docs/migration.txt @@ -161,6 +161,11 @@ include/hw/hw.h. === More about versions === +Version numbers are intended for major incompatible changes to the +migration of a device, and using them breaks backwards-migration +compatibility; in general most changes can be made by adding Subsections +(see below) or _TEST macros (see below) which won't break compatibility. + You can see that there are several version fields: - version_id: the maximum version_id supported by VMState for that device. @@ -175,6 +180,9 @@ version_id. And the function load_state_old() (if present) is able to load state from minimum_version_id_old to minimum_version_id. This function is deprecated and will be removed when no more users are left. +Saving state will always create a section with the 'version_id' value +and thus can't be loaded by any older QEMU. + === Massaging functions === Sometimes, it is not enough to be able to save the state directly @@ -292,6 +300,56 @@ save/send this state when we are in the middle of a pio operation not enabled, the values on that fields are garbage and don't need to be sent. +Using a condition function that checks a 'property' to determine whether +to send a subsection allows backwards migration compatibility when +new subsections are added. + +For example; + a) Add a new property using DEFINE_PROP_BOOL - e.g. support-foo and + default it to true. + b) Add an entry to the HW_COMPAT_ for the previous version + that sets the property to false. + c) Add a static bool support_foo function that tests the property. + d) Add a subsection with a .needed set to the support_foo function + e) (potentially) Add a pre_load that sets up a default value for 'foo' + to be used if the subsection isn't loaded. + +Now that subsection will not be generated when using an older +machine type and the migration stream will be accepted by older +QEMU versions. pre-load functions can be used to initialise state +on the newer version so that they default to suitable values +when loading streams created by older QEMU versions that do not +generate the subsection. + +In some cases subsections are added for data that had been accidentally +omitted by earlier versions; if the missing data causes the migration +process to succeed but the guest to behave badly then it may be better +to send the subsection and cause the migration to explicitly fail +with the unknown subsection error. If the bad behaviour only happens +with certain data values, making the subsection conditional on +the data value (rather than the machine type) allows migrations to succeed +in most cases. In general the preference is to tie the subsection to +the machine type, and allow reliable migrations, unless the behaviour +from omission of the subsection is really bad. + += Not sending existing elements = + +Sometimes members of the VMState are no longer needed; + removing them will break migration compatibility + making them version dependent and bumping the version will break backwards + migration compatibility. + +The best way is to: + a) Add a new property/compatibility/function in the same way for subsections + above. + b) replace the VMSTATE macro with the _TEST version of the macro, e.g.: + VMSTATE_UINT32(foo, barstruct) + becomes + VMSTATE_UINT32_TEST(foo, barstruct, pre_version_baz) + + Sometime in the future when we no longer care about the ancient +versions these can be killed off. + = Return path = In most migration scenarios there is only a single data path that runs @@ -482,3 +540,16 @@ request for a page that has already been sent is ignored. Duplicate requests such as this can happen as a page is sent at about the same time the destination accesses it. +=== Postcopy with hugepages === + +Postcopy now works with hugetlbfs backed memory: + a) The linux kernel on the destination must support userfault on hugepages. + b) The huge-page configuration on the source and destination VMs must be + identical; i.e. RAMBlocks on both sides must use the same page size. + c) Note that -mem-path /dev/hugepages will fall back to allocating normal + RAM if it doesn't have enough hugepages, triggering (b) to fail. + Using -mem-prealloc enforces the allocation using hugepages. + d) Care should be taken with the size of hugepage used; postcopy with 2MB + hugepages works well, however 1GB hugepages are likely to be problematic + since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link, + and until the full page is transferred the destination thread is blocked. diff --git a/docs/q35-chipset.cfg b/docs/q35-chipset.cfg deleted file mode 100644 index e4ddb7d..0000000 --- a/docs/q35-chipset.cfg +++ /dev/null @@ -1,152 +0,0 @@ -################################################################ -# -# qemu -M q35 creates a bare machine with just the very essential -# chipset devices being present: -# -# 00.0 - Host bridge -# 1f.0 - ISA bridge / LPC -# 1f.2 - SATA (AHCI) controller -# 1f.3 - SMBus controller -# -# This config file documents the other devices and how they are -# created. You can simply use "-readconfig $thisfile" to create -# them all. Here is a overview: -# -# 19.0 - Ethernet controller (not created, our e1000 emulation -# doesn't emulate the ich9 device). -# 1a.* - USB Controller #2 (ehci + uhci companions) -# 1b.0 - HD Audio Controller -# 1c.* - PCI Express Ports -# 1d.* - USB Controller #1 (ehci + uhci companions, -# "qemu -M q35 -usb" creates these too) -# 1e.0 - PCI Bridge -# - -[device "ich9-ehci-2"] - driver = "ich9-usb-ehci2" - multifunction = "on" - bus = "pcie.0" - addr = "1a.7" - -[device "ich9-uhci-4"] - driver = "ich9-usb-uhci4" - multifunction = "on" - bus = "pcie.0" - addr = "1a.0" - masterbus = "ich9-ehci-2.0" - firstport = "0" - -[device "ich9-uhci-5"] - driver = "ich9-usb-uhci5" - multifunction = "on" - bus = "pcie.0" - addr = "1a.1" - masterbus = "ich9-ehci-2.0" - firstport = "2" - -[device "ich9-uhci-6"] - driver = "ich9-usb-uhci6" - multifunction = "on" - bus = "pcie.0" - addr = "1a.2" - masterbus = "ich9-ehci-2.0" - firstport = "4" - - -[device "ich9-hda-audio"] - driver = "ich9-intel-hda" - bus = "pcie.0" - addr = "1b.0" - - -[device "ich9-pcie-port-1"] - driver = "ioh3420" - multifunction = "on" - bus = "pcie.0" - addr = "1c.0" - port = "1" - chassis = "1" - -[device "ich9-pcie-port-2"] - driver = "ioh3420" - multifunction = "on" - bus = "pcie.0" - addr = "1c.1" - port = "2" - chassis = "2" - -[device "ich9-pcie-port-3"] - driver = "ioh3420" - multifunction = "on" - bus = "pcie.0" - addr = "1c.2" - port = "3" - chassis = "3" - -[device "ich9-pcie-port-4"] - driver = "ioh3420" - multifunction = "on" - bus = "pcie.0" - addr = "1c.3" - port = "4" - chassis = "4" - -## -# Example PCIe switch with two downstream ports -# -#[device "pcie-switch-upstream-port-1"] -# driver = "x3130-upstream" -# bus = "ich9-pcie-port-4" -# addr = "00.0" -# -#[device "pcie-switch-downstream-port-1-1"] -# driver = "xio3130-downstream" -# multifunction = "on" -# bus = "pcie-switch-upstream-port-1" -# addr = "00.0" -# port = "1" -# chassis = "5" -# -#[device "pcie-switch-downstream-port-1-2"] -# driver = "xio3130-downstream" -# multifunction = "on" -# bus = "pcie-switch-upstream-port-1" -# addr = "00.1" -# port = "1" -# chassis = "6" - -[device "ich9-ehci-1"] - driver = "ich9-usb-ehci1" - multifunction = "on" - bus = "pcie.0" - addr = "1d.7" - -[device "ich9-uhci-1"] - driver = "ich9-usb-uhci1" - multifunction = "on" - bus = "pcie.0" - addr = "1d.0" - masterbus = "ich9-ehci-1.0" - firstport = "0" - -[device "ich9-uhci-2"] - driver = "ich9-usb-uhci2" - multifunction = "on" - bus = "pcie.0" - addr = "1d.1" - masterbus = "ich9-ehci-1.0" - firstport = "2" - -[device "ich9-uhci-3"] - driver = "ich9-usb-uhci3" - multifunction = "on" - bus = "pcie.0" - addr = "1d.2" - masterbus = "ich9-ehci-1.0" - firstport = "4" - - -[device "ich9-pci-bridge"] - driver = "i82801b11-bridge" - bus = "pcie.0" - addr = "1e.0" diff --git a/docs/q35-emulated.cfg b/docs/q35-emulated.cfg new file mode 100644 index 0000000..c6416d6 --- /dev/null +++ b/docs/q35-emulated.cfg @@ -0,0 +1,288 @@ +# q35 - Emulated guest (graphical console) +# ========================================================= +# +# Usage: +# +# $ qemu-system-x86_64 \ +# -nodefaults \ +# -readconfig q35-emulated.cfg +# +# You will probably need to tweak the lines marked as +# CHANGE ME before being able to use this configuration! +# +# The guest will have a selection of emulated devices that +# closely resembles that of a physical machine, and will be +# accessed through a graphical console. +# +# --------------------------------------------------------- +# +# Using -nodefaults is required to have full control over +# the virtual hardware: when it's specified, QEMU will +# populate the board with only the builtin peripherals +# plus a small selection of core PCI devices and +# controllers; the user will then have to explicitly add +# further devices. +# +# The core PCI devices show up in the guest as: +# +# 00:00.0 Host bridge +# 00:1f.0 ISA bridge / LPC +# 00:1f.2 SATA (AHCI) controller +# 00:1f.3 SMBus controller +# +# This configuration file adds a number of devices that +# are pretty much guaranteed to be present in every single +# physical machine based on q35, more specifically: +# +# 00:01.0 VGA compatible controller +# 00:19.0 Ethernet controller +# 00:1a.* USB controller (#2) +# 00:1b.0 Audio device +# 00:1c.* PCI bridge (PCI Express Root Ports) +# 00:1d.* USB Controller (#1) +# 00:1e.0 PCI bridge (legacy PCI bridge) +# +# More information about these devices is available below. + + +# Machine options +# ========================================================= +# +# We use the q35 machine type and enable KVM acceleration +# for better performance. +# +# Using less than 1 GiB of memory is probably not going to +# yield good performance in the guest, and might even lead +# to obscure boot issues in some cases. +# +# Unfortunately, there is no way to configure the CPU model +# in this file, so it will have to be provided on the +# command line. + +[machine] + type = "q35" + accel = "kvm" + +[memory] + size = "1024" + + +# PCI bridge (PCI Express Root Ports) +# ========================================================= +# +# We add four PCI Express Root Ports, all sharing the same +# slot on the PCI Express Root Bus. These ports support +# hotplug. + +[device "ich9-pcie-port-1"] + driver = "ioh3420" + multifunction = "on" + bus = "pcie.0" + addr = "1c.0" + port = "1" + chassis = "1" + +[device "ich9-pcie-port-2"] + driver = "ioh3420" + multifunction = "on" + bus = "pcie.0" + addr = "1c.1" + port = "2" + chassis = "2" + +[device "ich9-pcie-port-3"] + driver = "ioh3420" + multifunction = "on" + bus = "pcie.0" + addr = "1c.2" + port = "3" + chassis = "3" + +[device "ich9-pcie-port-4"] + driver = "ioh3420" + multifunction = "on" + bus = "pcie.0" + addr = "1c.3" + port = "4" + chassis = "4" + + +# PCI bridge (legacy PCI bridge) +# ========================================================= +# +# This bridge can be used to build an independent topology +# for legacy PCI devices. PCI Express devices should be +# plugged into PCI Express slots instead, so ideally there +# will be no devices connected to this bridge. + +[device "ich9-pci-bridge"] + driver = "i82801b11-bridge" + bus = "pcie.0" + addr = "1e.0" + + +# SATA storage +# ========================================================= +# +# An implicit SATA controller is created automatically for +# every single q35 guest; here we create a disk, backed by +# a qcow2 disk image on the host's filesystem, and attach +# it to that controller so that the guest can use it. +# +# We also create an optical disk, mostly for installation +# purposes: once the guest OS has been succesfully +# installed, the guest will no longer boot from optical +# media. If you don't want, or no longer want, to have an +# optical disk in the guest you can safely comment out +# all relevant sections below. + +[device "sata-disk"] + driver = "ide-hd" + bus = "ide.0" + drive = "disk" + bootindex = "1" + +[drive "disk"] + file = "guest.qcow2" # CHANGE ME + format = "qcow2" + if = "none" + +[device "sata-optical-disk"] + driver = "ide-cd" + bus = "ide.1" + drive = "optical-disk" + bootindex = "2" + +[drive "optical-disk"] + file = "install.iso" # CHANGE ME + format = "raw" + if = "none" + + +# USB controller (#1) +# ========================================================= +# +# EHCI controller + UHCI companion controllers. + +[device "ich9-ehci-1"] + driver = "ich9-usb-ehci1" + multifunction = "on" + bus = "pcie.0" + addr = "1d.7" + +[device "ich9-uhci-1"] + driver = "ich9-usb-uhci1" + multifunction = "on" + bus = "pcie.0" + addr = "1d.0" + masterbus = "ich9-ehci-1.0" + firstport = "0" + +[device "ich9-uhci-2"] + driver = "ich9-usb-uhci2" + multifunction = "on" + bus = "pcie.0" + addr = "1d.1" + masterbus = "ich9-ehci-1.0" + firstport = "2" + +[device "ich9-uhci-3"] + driver = "ich9-usb-uhci3" + multifunction = "on" + bus = "pcie.0" + addr = "1d.2" + masterbus = "ich9-ehci-1.0" + firstport = "4" + + +# USB controller (#2) +# ========================================================= +# +# EHCI controller + UHCI companion controllers. + +[device "ich9-ehci-2"] + driver = "ich9-usb-ehci2" + multifunction = "on" + bus = "pcie.0" + addr = "1a.7" + +[device "ich9-uhci-4"] + driver = "ich9-usb-uhci4" + multifunction = "on" + bus = "pcie.0" + addr = "1a.0" + masterbus = "ich9-ehci-2.0" + firstport = "0" + +[device "ich9-uhci-5"] + driver = "ich9-usb-uhci5" + multifunction = "on" + bus = "pcie.0" + addr = "1a.1" + masterbus = "ich9-ehci-2.0" + firstport = "2" + +[device "ich9-uhci-6"] + driver = "ich9-usb-uhci6" + multifunction = "on" + bus = "pcie.0" + addr = "1a.2" + masterbus = "ich9-ehci-2.0" + firstport = "4" + + +# Ethernet controller +# ========================================================= +# +# We add a Gigabit Ethernet interface to the guest; on the +# host side, we take advantage of user networking so that +# the QEMU process doesn't require any additional +# privileges. + +[netdev "hostnet"] + type = "user" + +[device "net"] + driver = "e1000" + netdev = "hostnet" + bus = "pcie.0" + addr = "19.0" + + +# VGA compatible controller +# ========================================================= +# +# We use stdvga instead of Cirrus as it supports more video +# modes and is closer to what actual hardware looks like. +# +# If you're running the guest on a remote, potentially +# headless host, you will probably want to append something +# like +# +# -display vnc=127.0.0.1:0 +# +# to the command line in order to prevent QEMU from +# creating a graphical display window on the host and +# enable remote access instead. + +[device "video"] + driver = "VGA" + bus = "pcie.0" + addr = "01.0" + + +# Audio device +# ========================================================= +# +# The sound card is a legacy PCI device that is plugged +# directly into the PCI Express Root Bus. + +[device "ich9-hda-audio"] + driver = "ich9-intel-hda" + bus = "pcie.0" + addr = "1b.0" + +[device "ich9-hda-duplex"] + driver = "hda-duplex" + bus = "ich9-hda-audio.0" + cad = "0" diff --git a/docs/q35-virtio-graphical.cfg b/docs/q35-virtio-graphical.cfg new file mode 100644 index 0000000..28bde2f --- /dev/null +++ b/docs/q35-virtio-graphical.cfg @@ -0,0 +1,248 @@ +# q35 - VirtIO guest (graphical console) +# ========================================================= +# +# Usage: +# +# $ qemu-system-x86_64 \ +# -nodefaults \ +# -readconfig q35-virtio-graphical.cfg +# +# You will probably need to tweak the lines marked as +# CHANGE ME before being able to use this configuration! +# +# The guest will have a selection of VirtIO devices +# tailored towards optimal performance with modern guests, +# and will be accessed through a graphical console. +# +# --------------------------------------------------------- +# +# Using -nodefaults is required to have full control over +# the virtual hardware: when it's specified, QEMU will +# populate the board with only the builtin peripherals +# plus a small selection of core PCI devices and +# controllers; the user will then have to explicitly add +# further devices. +# +# The core PCI devices show up in the guest as: +# +# 00:00.0 Host bridge +# 00:1f.0 ISA bridge / LPC +# 00:1f.2 SATA (AHCI) controller +# 00:1f.3 SMBus controller +# +# This configuration file adds a number of other useful +# devices, more specifically: +# +# 00:01.0 VGA compatible controller +# 00:1b.0 Audio device +# 00.1c.* PCI bridge (PCI Express Root Ports) +# 01:00.0 SCSI storage controller +# 02:00.0 Ethernet controller +# 03:00.0 USB controller +# +# More information about these devices is available below. + + +# Machine options +# ========================================================= +# +# We use the q35 machine type and enable KVM acceleration +# for better performance. +# +# Using less than 1 GiB of memory is probably not going to +# yield good performance in the guest, and might even lead +# to obscure boot issues in some cases. + +[machine] + type = "q35" + accel = "kvm" + +[memory] + size = "1024" + + +# PCI bridge (PCI Express Root Ports) +# ========================================================= +# +# We create eight PCI Express Root Ports, and we plug them +# all into separate functions of the same slot. Some of +# them will be used by devices, the rest will remain +# available for hotplug. + +[device "pcie.1"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.0" + port = "1" + chassis = "1" + multifunction = "on" + +[device "pcie.2"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.1" + port = "2" + chassis = "2" + +[device "pcie.3"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.2" + port = "3" + chassis = "3" + +[device "pcie.4"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.3" + port = "4" + chassis = "4" + +[device "pcie.5"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.4" + port = "5" + chassis = "5" + +[device "pcie.6"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.5" + port = "6" + chassis = "6" + +[device "pcie.7"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.6" + port = "7" + chassis = "7" + +[device "pcie.8"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.7" + port = "8" + chassis = "8" + + +# SCSI storage controller (and storage) +# ========================================================= +# +# We use virtio-scsi here so that we can (hot)plug a large +# number of disks without running into issues; a SCSI disk, +# backed by a qcow2 disk image on the host's filesystem, is +# attached to it. +# +# We also create an optical disk, mostly for installation +# purposes: once the guest OS has been succesfully +# installed, the guest will no longer boot from optical +# media. If you don't want, or no longer want, to have an +# optical disk in the guest you can safely comment out +# all relevant sections below. + +[device "scsi"] + driver = "virtio-scsi-pci" + bus = "pcie.1" + addr = "00.0" + +[device "scsi-disk"] + driver = "scsi-hd" + bus = "scsi.0" + drive = "disk" + bootindex = "1" + +[drive "disk"] + file = "guest.qcow2" # CHANGE ME + format = "qcow2" + if = "none" + +[device "scsi-optical-disk"] + driver = "scsi-cd" + bus = "scsi.0" + drive = "optical-disk" + bootindex = "2" + +[drive "optical-disk"] + file = "install.iso" # CHANGE ME + format = "raw" + if = "none" + + +# Ethernet controller +# ========================================================= +# +# We use virtio-net for improved performance over emulated +# hardware; on the host side, we take advantage of user +# networking so that the QEMU process doesn't require any +# additional privileges. + +[netdev "hostnet"] + type = "user" + +[device "net"] + driver = "virtio-net-pci" + netdev = "hostnet" + bus = "pcie.2" + addr = "00.0" + + +# USB controller (and input devices) +# ========================================================= +# +# We add a virtualization-friendly USB 3.0 controller and +# a USB tablet so that graphical guests can be controlled +# appropriately. A USB keyboard is not needed, as q35 +# guests get a PS/2 one added automatically. + +[device "usb"] + driver = "nec-usb-xhci" + bus = "pcie.3" + addr = "00.0" + +[device "tablet"] + driver = "usb-tablet" + bus = "usb.0" + + +# VGA compatible controller +# ========================================================= +# +# We plug the QXL video card directly into the PCI Express +# Root Bus as it is a legacy PCI device; this way, we can +# reduce the number of PCI Express controllers in the +# guest. +# +# If you're running the guest on a remote, potentially +# headless host, you will probably want to append something +# like +# +# -display vnc=127.0.0.1:0 +# +# to the command line in order to prevent QEMU from +# creating a graphical display window on the host and +# enable remote access instead. + +[device "video"] + driver = "qxl-vga" + bus = "pcie.0" + addr = "01.0" + + +# Audio device +# ========================================================= +# +# Like the video card, the sound card is a legacy PCI +# device and as such can be plugged directly into the PCI +# Express Root Bus. + +[device "sound"] + driver = "ich9-intel-hda" + bus = "pcie.0" + addr = "1b.0" + +[device "duplex"] + driver = "hda-duplex" + bus = "sound.0" + cad = "0" diff --git a/docs/q35-virtio-serial.cfg b/docs/q35-virtio-serial.cfg new file mode 100644 index 0000000..c33c9cc --- /dev/null +++ b/docs/q35-virtio-serial.cfg @@ -0,0 +1,193 @@ +# q35 - VirtIO guest (serial console) +# ========================================================= +# +# Usage: +# +# $ qemu-system-x86_64 \ +# -nodefaults \ +# -readconfig q35-virtio-serial.cfg \ +# -display none -serial mon:stdio +# +# You will probably need to tweak the lines marked as +# CHANGE ME before being able to use this configuration! +# +# The guest will have a selection of VirtIO devices +# tailored towards optimal performance with modern guests, +# and will be accessed through the serial console. +# +# --------------------------------------------------------- +# +# Using -nodefaults is required to have full control over +# the virtual hardware: when it's specified, QEMU will +# populate the board with only the builtin peripherals +# plus a small selection of core PCI devices and +# controllers; the user will then have to explicitly add +# further devices. +# +# The core PCI devices show up in the guest as: +# +# 00:00.0 Host bridge +# 00:1f.0 ISA bridge / LPC +# 00:1f.2 SATA (AHCI) controller +# 00:1f.3 SMBus controller +# +# This configuration file adds a number of other useful +# devices, more specifically: +# +# 00.1c.* PCI bridge (PCI Express Root Ports) +# 01:00.0 SCSI storage controller +# 02:00.0 Ethernet controller +# +# More information about these devices is available below. +# +# We use '-display none' to prevent QEMU from creating a +# graphical display window, which would serve no use in +# this specific configuration, and '-serial mon:stdio' to +# multiplex the guest's serial console and the QEMU monitor +# to the host's stdio; use 'Ctrl+A h' to learn how to +# switch between the two and more. + + +# Machine options +# ========================================================= +# +# We use the q35 machine type and enable KVM acceleration +# for better performance. +# +# Using less than 1 GiB of memory is probably not going to +# yield good performance in the guest, and might even lead +# to obscure boot issues in some cases. + +[machine] + type = "q35" + accel = "kvm" + +[memory] + size = "1024" + + +# PCI bridge (PCI Express Root Ports) +# ========================================================= +# +# We create eight PCI Express Root Ports, and we plug them +# all into separate functions of the same slot. Some of +# them will be used by devices, the rest will remain +# available for hotplug. + +[device "pcie.1"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.0" + port = "1" + chassis = "1" + multifunction = "on" + +[device "pcie.2"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.1" + port = "2" + chassis = "2" + +[device "pcie.3"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.2" + port = "3" + chassis = "3" + +[device "pcie.4"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.3" + port = "4" + chassis = "4" + +[device "pcie.5"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.4" + port = "5" + chassis = "5" + +[device "pcie.6"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.5" + port = "6" + chassis = "6" + +[device "pcie.7"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.6" + port = "7" + chassis = "7" + +[device "pcie.8"] + driver = "pcie-root-port" + bus = "pcie.0" + addr = "1c.7" + port = "8" + chassis = "8" + + +# SCSI storage controller (and storage) +# ========================================================= +# +# We use virtio-scsi here so that we can (hot)plug a large +# number of disks without running into issues; a SCSI disk, +# backed by a qcow2 disk image on the host's filesystem, is +# attached to it. +# +# We also create an optical disk, mostly for installation +# purposes: once the guest OS has been succesfully +# installed, the guest will no longer boot from optical +# media. If you don't want, or no longer want, to have an +# optical disk in the guest you can safely comment out +# all relevant sections below. + +[device "scsi"] + driver = "virtio-scsi-pci" + bus = "pcie.1" + addr = "00.0" + +[device "scsi-disk"] + driver = "scsi-hd" + bus = "scsi.0" + drive = "disk" + bootindex = "1" + +[drive "disk"] + file = "guest.qcow2" # CHANGE ME + format = "qcow2" + if = "none" + +[device "scsi-optical-disk"] + driver = "scsi-cd" + bus = "scsi.0" + drive = "optical-disk" + bootindex = "2" + +[drive "optical-disk"] + file = "install.iso" # CHANGE ME + format = "raw" + if = "none" + + +# Ethernet controller +# ========================================================= +# +# We use virtio-net for improved performance over emulated +# hardware; on the host side, we take advantage of user +# networking so that the QEMU process doesn't require any +# additional privileges. + +[netdev "hostnet"] + type = "user" + +[device "net"] + driver = "virtio-net-pci" + netdev = "hostnet" + bus = "pcie.2" + addr = "00.0" @@ -45,6 +45,12 @@ #include "exec/address-spaces.h" #include "sysemu/xen-mapcache.h" #include "trace-root.h" + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE +#include <fcntl.h> +#include <linux/falloc.h> +#endif + #endif #include "exec/cpu-all.h" #include "qemu/rcu_queue.h" @@ -1518,6 +1524,19 @@ size_t qemu_ram_pagesize(RAMBlock *rb) return rb->page_size; } +/* Returns the largest size of page in use */ +size_t qemu_ram_pagesize_largest(void) +{ + RAMBlock *block; + size_t largest = 0; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + largest = MAX(largest, qemu_ram_pagesize(block)); + } + + return largest; +} + static int memory_try_enable_merging(void *addr, size_t len) { if (!machine_mem_merge(current_machine)) { @@ -3294,4 +3313,68 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) rcu_read_unlock(); return ret; } + +/* + * Unmap pages of memory from start to start+length such that + * they a) read as 0, b) Trigger whatever fault mechanism + * the OS provides for postcopy. + * The pages must be unmapped by the end of the function. + * Returns: 0 on success, none-0 on failure + * + */ +int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) +{ + int ret = -1; + + uint8_t *host_startaddr = rb->host + start; + + if ((uintptr_t)host_startaddr & (rb->page_size - 1)) { + error_report("ram_block_discard_range: Unaligned start address: %p", + host_startaddr); + goto err; + } + + if ((start + length) <= rb->used_length) { + uint8_t *host_endaddr = host_startaddr + length; + if ((uintptr_t)host_endaddr & (rb->page_size - 1)) { + error_report("ram_block_discard_range: Unaligned end address: %p", + host_endaddr); + goto err; + } + + errno = ENOTSUP; /* If we are missing MADVISE etc */ + + if (rb->page_size == qemu_host_page_size) { +#if defined(CONFIG_MADVISE) + /* Note: We need the madvise MADV_DONTNEED behaviour of definitely + * freeing the page. + */ + ret = madvise(host_startaddr, length, MADV_DONTNEED); +#endif + } else { + /* Huge page case - unfortunately it can't do DONTNEED, but + * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the + * huge page file. + */ +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, length); +#endif + } + if (ret) { + ret = -errno; + error_report("ram_block_discard_range: Failed to discard range " + "%s:%" PRIx64 " +%zx (%d)", + rb->idstr, start, length, ret); + } + } else { + error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64 + "/%zx/" RAM_ADDR_FMT")", + rb->idstr, start, length, rb->used_length); + } + +err: + return ret; +} + #endif @@ -2045,13 +2045,17 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) const char* device = qdict_get_str(qdict, "device"); const char* command = qdict_get_str(qdict, "command"); Error *err = NULL; + int ret; blk = blk_by_name(device); if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); if (bs) { - blk = local_blk = blk_new(); - blk_insert_bs(blk, bs); + blk = local_blk = blk_new(0, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, &err); + if (ret < 0) { + goto fail; + } } else { goto fail; } @@ -2060,6 +2064,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) aio_context = blk_get_aio_context(blk); aio_context_acquire(aio_context); + /* + * Notably absent: Proper permission management. This is sad, but it seems + * almost impossible to achieve without changing the semantics and thereby + * limiting the use cases of the qemu-io HMP command. + * + * In an ideal world we would unconditionally create a new BlockBackend for + * qemuio_command(), but we have commands like 'reopen' and want them to + * take effect on the exact BlockBackend whose name the user passed instead + * of just on a temporary copy of it. + * + * Another problem is that deleting the temporary BlockBackend involves + * draining all requests on it first, but some qemu-iotests cases want to + * issue multiple aio_read/write requests and expect them to complete in + * the background while the monitor has already returned. + * + * This is also what prevents us from saving the original permissions and + * restoring them later: We can't revoke permissions until all requests + * have completed, and we don't know when that is nor can we really let + * anything else run before we have revoken them to avoid race conditions. + * + * What happens now is that command() in qemu-io-cmds.c can extend the + * permissions if necessary for the qemu-io command. And they simply stay + * extended, possibly resulting in a read-only guest device keeping write + * permissions. Ugly, but it appears to be the lesser evil. + */ qemuio_command(blk, command); aio_context_release(aio_context); diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 2369b91..f22a3c3 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -13,7 +13,9 @@ #include "qemu/osdep.h" #include "9p.h" +#include "9p-local.h" #include "9p-xattr.h" +#include "9p-util.h" #include "fsdev/qemu-fsdev.h" /* local_ops */ #include <arpa/inet.h> #include <pwd.h> @@ -43,40 +45,62 @@ #define BTRFS_SUPER_MAGIC 0x9123683E #endif -#define VIRTFS_META_DIR ".virtfs_metadata" +typedef struct { + int mountfd; +} LocalData; -static char *local_mapped_attr_path(FsContext *ctx, const char *path) +int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags, + mode_t mode) { - int dirlen; - const char *name = strrchr(path, '/'); - if (name) { - dirlen = name - path; - ++name; - } else { - name = path; - dirlen = 0; + LocalData *data = fs_ctx->private; + + /* All paths are relative to the path data->mountfd points to */ + while (*path == '/') { + path++; } - return g_strdup_printf("%s/%.*s/%s/%s", ctx->fs_root, - dirlen, path, VIRTFS_META_DIR, name); + + return relative_openat_nofollow(data->mountfd, path, flags, mode); +} + +int local_opendir_nofollow(FsContext *fs_ctx, const char *path) +{ + return local_open_nofollow(fs_ctx, path, O_DIRECTORY | O_RDONLY, 0); +} + +static void renameat_preserve_errno(int odirfd, const char *opath, int ndirfd, + const char *npath) +{ + int serrno = errno; + renameat(odirfd, opath, ndirfd, npath); + errno = serrno; } -static FILE *local_fopen(const char *path, const char *mode) +static void unlinkat_preserve_errno(int dirfd, const char *path, int flags) +{ + int serrno = errno; + unlinkat(dirfd, path, flags); + errno = serrno; +} + +#define VIRTFS_META_DIR ".virtfs_metadata" + +static FILE *local_fopenat(int dirfd, const char *name, const char *mode) { int fd, o_mode = 0; FILE *fp; - int flags = O_NOFOLLOW; + int flags; /* * only supports two modes */ if (mode[0] == 'r') { - flags |= O_RDONLY; + flags = O_RDONLY; } else if (mode[0] == 'w') { - flags |= O_WRONLY | O_TRUNC | O_CREAT; + flags = O_WRONLY | O_TRUNC | O_CREAT; o_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; } else { return NULL; } - fd = open(path, flags, o_mode); + fd = openat_file(dirfd, name, flags, o_mode); if (fd == -1) { return NULL; } @@ -88,16 +112,20 @@ static FILE *local_fopen(const char *path, const char *mode) } #define ATTR_MAX 100 -static void local_mapped_file_attr(FsContext *ctx, const char *path, +static void local_mapped_file_attr(int dirfd, const char *name, struct stat *stbuf) { FILE *fp; char buf[ATTR_MAX]; - char *attr_path; + int map_dirfd; - attr_path = local_mapped_attr_path(ctx, path); - fp = local_fopen(attr_path, "r"); - g_free(attr_path); + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { + return; + } + + fp = local_fopenat(map_dirfd, name, "r"); + close_preserve_errno(map_dirfd); if (!fp) { return; } @@ -119,12 +147,17 @@ static void local_mapped_file_attr(FsContext *ctx, const char *path, static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf) { - int err; - char *buffer; - char *path = fs_path->data; + int err = -1; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } - buffer = rpath(fs_ctx, path); - err = lstat(buffer, stbuf); + err = fstatat(dirfd, name, stbuf, AT_SYMLINK_NOFOLLOW); if (err) { goto err_out; } @@ -134,87 +167,83 @@ static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf) gid_t tmp_gid; mode_t tmp_mode; dev_t tmp_dev; - if (getxattr(buffer, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) { + + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.uid", &tmp_uid, + sizeof(uid_t)) > 0) { stbuf->st_uid = le32_to_cpu(tmp_uid); } - if (getxattr(buffer, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.gid", &tmp_gid, + sizeof(gid_t)) > 0) { stbuf->st_gid = le32_to_cpu(tmp_gid); } - if (getxattr(buffer, "user.virtfs.mode", - &tmp_mode, sizeof(mode_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.mode", &tmp_mode, + sizeof(mode_t)) > 0) { stbuf->st_mode = le32_to_cpu(tmp_mode); } - if (getxattr(buffer, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.rdev", &tmp_dev, + sizeof(dev_t)) > 0) { stbuf->st_rdev = le64_to_cpu(tmp_dev); } } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - local_mapped_file_attr(fs_ctx, path, stbuf); + local_mapped_file_attr(dirfd, name, stbuf); } err_out: - g_free(buffer); - return err; -} - -static int local_create_mapped_attr_dir(FsContext *ctx, const char *path) -{ - int err; - char *attr_dir; - char *tmp_path = g_strdup(path); - - attr_dir = g_strdup_printf("%s/%s/%s", - ctx->fs_root, dirname(tmp_path), VIRTFS_META_DIR); - - err = mkdir(attr_dir, 0700); - if (err < 0 && errno == EEXIST) { - err = 0; - } - g_free(attr_dir); - g_free(tmp_path); + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return err; } -static int local_set_mapped_file_attr(FsContext *ctx, - const char *path, FsCred *credp) +static int local_set_mapped_file_attrat(int dirfd, const char *name, + FsCred *credp) { FILE *fp; - int ret = 0; + int ret; char buf[ATTR_MAX]; - char *attr_path; int uid = -1, gid = -1, mode = -1, rdev = -1; + int map_dirfd; + + ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + return -1; + } - attr_path = local_mapped_attr_path(ctx, path); - fp = local_fopen(attr_path, "r"); + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { + return -1; + } + + fp = local_fopenat(map_dirfd, name, "r"); if (!fp) { - goto create_map_file; + if (errno == ENOENT) { + goto update_map_file; + } else { + close_preserve_errno(map_dirfd); + return -1; + } } memset(buf, 0, ATTR_MAX); while (fgets(buf, ATTR_MAX, fp)) { if (!strncmp(buf, "virtfs.uid", 10)) { - uid = atoi(buf+11); + uid = atoi(buf + 11); } else if (!strncmp(buf, "virtfs.gid", 10)) { - gid = atoi(buf+11); + gid = atoi(buf + 11); } else if (!strncmp(buf, "virtfs.mode", 11)) { - mode = atoi(buf+12); + mode = atoi(buf + 12); } else if (!strncmp(buf, "virtfs.rdev", 11)) { - rdev = atoi(buf+12); + rdev = atoi(buf + 12); } memset(buf, 0, ATTR_MAX); } fclose(fp); - goto update_map_file; - -create_map_file: - ret = local_create_mapped_attr_dir(ctx, path); - if (ret < 0) { - goto err_out; - } update_map_file: - fp = local_fopen(attr_path, "w"); + fp = local_fopenat(map_dirfd, name, "w"); + close_preserve_errno(map_dirfd); if (!fp) { - ret = -1; - goto err_out; + return -1; } if (credp->fc_uid != -1) { @@ -230,7 +259,6 @@ update_map_file: rdev = credp->fc_rdev; } - if (uid != -1) { fprintf(fp, "virtfs.uid=%d\n", uid); } @@ -245,39 +273,71 @@ update_map_file: } fclose(fp); -err_out: - g_free(attr_path); + return 0; +} + +static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) +{ + int fd, ret; + + /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). + * Unfortunately, the linux kernel doesn't implement it yet. As an + * alternative, let's open the file and use fchmod() instead. This + * may fail depending on the permissions of the file, but it is the + * best we can do to avoid TOCTTOU. We first try to open read-only + * in case name points to a directory. If that fails, we try write-only + * in case name doesn't point to a directory. + */ + fd = openat_file(dirfd, name, O_RDONLY, 0); + if (fd == -1) { + /* In case the file is writable-only and isn't a directory. */ + if (errno == EACCES) { + fd = openat_file(dirfd, name, O_WRONLY, 0); + } + if (fd == -1 && errno == EISDIR) { + errno = EACCES; + } + } + if (fd == -1) { + return -1; + } + ret = fchmod(fd, mode); + close_preserve_errno(fd); return ret; } -static int local_set_xattr(const char *path, FsCred *credp) +static int local_set_xattrat(int dirfd, const char *path, FsCred *credp) { int err; if (credp->fc_uid != -1) { uint32_t tmp_uid = cpu_to_le32(credp->fc_uid); - err = setxattr(path, "user.virtfs.uid", &tmp_uid, sizeof(uid_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.uid", &tmp_uid, + sizeof(uid_t), 0); if (err) { return err; } } if (credp->fc_gid != -1) { uint32_t tmp_gid = cpu_to_le32(credp->fc_gid); - err = setxattr(path, "user.virtfs.gid", &tmp_gid, sizeof(gid_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.gid", &tmp_gid, + sizeof(gid_t), 0); if (err) { return err; } } if (credp->fc_mode != -1) { uint32_t tmp_mode = cpu_to_le32(credp->fc_mode); - err = setxattr(path, "user.virtfs.mode", &tmp_mode, sizeof(mode_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.mode", &tmp_mode, + sizeof(mode_t), 0); if (err) { return err; } } if (credp->fc_rdev != -1) { uint64_t tmp_rdev = cpu_to_le64(credp->fc_rdev); - err = setxattr(path, "user.virtfs.rdev", &tmp_rdev, sizeof(dev_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.rdev", &tmp_rdev, + sizeof(dev_t), 0); if (err) { return err; } @@ -285,58 +345,56 @@ static int local_set_xattr(const char *path, FsCred *credp) return 0; } -static int local_post_create_passthrough(FsContext *fs_ctx, const char *path, - FsCred *credp) +static int local_set_cred_passthrough(FsContext *fs_ctx, int dirfd, + const char *name, FsCred *credp) { - char *buffer; - - buffer = rpath(fs_ctx, path); - if (lchown(buffer, credp->fc_uid, credp->fc_gid) < 0) { + if (fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) < 0) { /* * If we fail to change ownership and if we are * using security model none. Ignore the error */ if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) { - goto err; + return -1; } } - if (chmod(buffer, credp->fc_mode & 07777) < 0) { - goto err; - } - - g_free(buffer); - return 0; -err: - g_free(buffer); - return -1; + return fchmodat_nofollow(dirfd, name, credp->fc_mode & 07777); } static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath *fs_path, char *buf, size_t bufsz) { ssize_t tsize = -1; - char *buffer; - char *path = fs_path->data; if ((fs_ctx->export_flags & V9FS_SM_MAPPED) || (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE)) { int fd; - buffer = rpath(fs_ctx, path); - fd = open(buffer, O_RDONLY | O_NOFOLLOW); - g_free(buffer); + + fd = local_open_nofollow(fs_ctx, fs_path->data, O_RDONLY, 0); if (fd == -1) { return -1; } do { tsize = read(fd, (void *)buf, bufsz); } while (tsize == -1 && errno == EINTR); - close(fd); + close_preserve_errno(fd); } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - tsize = readlink(buffer, buf, bufsz); - g_free(buffer); + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + tsize = readlinkat(dirfd, name, buf, bufsz); + close_preserve_errno(dirfd); + out: + g_free(name); + g_free(dirpath); } return tsize; } @@ -354,27 +412,32 @@ static int local_closedir(FsContext *ctx, V9fsFidOpenState *fs) static int local_open(FsContext *ctx, V9fsPath *fs_path, int flags, V9fsFidOpenState *fs) { - char *buffer; - char *path = fs_path->data; + int fd; - buffer = rpath(ctx, path); - fs->fd = open(buffer, flags | O_NOFOLLOW); - g_free(buffer); + fd = local_open_nofollow(ctx, fs_path->data, flags, 0); + if (fd == -1) { + return -1; + } + fs->fd = fd; return fs->fd; } static int local_opendir(FsContext *ctx, V9fsPath *fs_path, V9fsFidOpenState *fs) { - char *buffer; - char *path = fs_path->data; + int dirfd; + DIR *stream; + + dirfd = local_opendir_nofollow(ctx, fs_path->data); + if (dirfd == -1) { + return -1; + } - buffer = rpath(ctx, path); - fs->dir.stream = opendir(buffer); - g_free(buffer); - if (!fs->dir.stream) { + stream = fdopendir(dirfd); + if (!stream) { return -1; } + fs->dir.stream = stream; return 0; } @@ -463,145 +526,122 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs, static int local_chmod(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp) { - char *buffer; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); int ret = -1; - char *path = fs_path->data; + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - ret = local_set_xattr(buffer, credp); - g_free(buffer); + ret = local_set_xattrat(dirfd, name, credp); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - return local_set_mapped_file_attr(fs_ctx, path, credp); - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - ret = chmod(buffer, credp->fc_mode); - g_free(buffer); + ret = local_set_mapped_file_attrat(dirfd, name, credp); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + ret = fchmodat_nofollow(dirfd, name, credp->fc_mode); } + close_preserve_errno(dirfd); + +out: + g_free(dirpath); + g_free(name); return ret; } static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, FsCred *credp) { - char *path; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } - /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + err = mknodat(dirfd, name, SM_LOCAL_MODE_BITS | S_IFREG, 0); if (err == -1) { goto out; } - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0); - if (err == -1) { - goto out; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, credp->fc_mode, credp->fc_rdev); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev); if (err == -1) { goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, 0); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, FsCred *credp) { - char *path; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } - /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + err = mkdirat(dirfd, name, SM_LOCAL_DIR_MODE_BITS); if (err == -1) { goto out; } - credp->fc_mode = credp->fc_mode|S_IFDIR; - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS); - if (err == -1) { - goto out; + credp->fc_mode = credp->fc_mode | S_IFDIR; + + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - credp->fc_mode = credp->fc_mode|S_IFDIR; - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, credp->fc_mode); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = mkdirat(dirfd, name, credp->fc_mode); if (err == -1) { goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, AT_REMOVEDIR); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } @@ -649,62 +689,45 @@ static int local_fstat(FsContext *fs_ctx, int fid_type, static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, int flags, FsCred *credp, V9fsFidOpenState *fs) { - char *path; int fd = -1; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; /* * Mark all the open to not follow symlinks */ flags |= O_NOFOLLOW; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, SM_LOCAL_MODE_BITS); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + fd = openat_file(dirfd, name, flags, SM_LOCAL_MODE_BITS); if (fd == -1) { - err = fd; goto out; } credp->fc_mode = credp->fc_mode|S_IFREG; - /* Set cleint credentials in xattr */ - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, SM_LOCAL_MODE_BITS); - if (fd == -1) { - err = fd; - goto out; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + /* Set cleint credentials in xattr */ + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - credp->fc_mode = credp->fc_mode|S_IFREG; - /* Set client credentials in .virtfs_metadata directory files */ - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, credp->fc_mode); + fd = openat_file(dirfd, name, flags, credp->fc_mode); if (fd == -1) { - err = fd; goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } @@ -713,12 +736,11 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, goto out; err_end: - close(fd); - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, + flags & O_DIRECTORY ? AT_REMOVEDIR : 0); + close_preserve_errno(fd); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } @@ -727,23 +749,22 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath, V9fsPath *dir_path, const char *name, FsCred *credp) { int err = -1; - int serrno = 0; - char *newpath; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - newpath = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { int fd; ssize_t oldpath_size, write_size; - buffer = rpath(fs_ctx, newpath); - fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS); + + fd = openat_file(dirfd, name, O_CREAT | O_EXCL | O_RDWR, + SM_LOCAL_MODE_BITS); if (fd == -1) { - err = fd; goto out; } /* Write the oldpath (target) to the file. */ @@ -751,218 +772,204 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath, do { write_size = write(fd, (void *)oldpath, oldpath_size); } while (write_size == -1 && errno == EINTR); + close_preserve_errno(fd); if (write_size != oldpath_size) { - serrno = errno; - close(fd); - err = -1; goto err_end; } - close(fd); /* Set cleint credentials in symlink's xattr */ - credp->fc_mode = credp->fc_mode|S_IFLNK; - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - int fd; - ssize_t oldpath_size, write_size; - buffer = rpath(fs_ctx, newpath); - fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS); - if (fd == -1) { - err = fd; - goto out; - } - /* Write the oldpath (target) to the file. */ - oldpath_size = strlen(oldpath); - do { - write_size = write(fd, (void *)oldpath, oldpath_size); - } while (write_size == -1 && errno == EINTR); + credp->fc_mode = credp->fc_mode | S_IFLNK; - if (write_size != oldpath_size) { - serrno = errno; - close(fd); - err = -1; - goto err_end; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - close(fd); - /* Set cleint credentials in symlink's xattr */ - credp->fc_mode = credp->fc_mode|S_IFLNK; - err = local_set_mapped_file_attr(fs_ctx, newpath, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, newpath); - err = symlink(oldpath, buffer); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = symlinkat(oldpath, dirfd, name); if (err) { goto out; } - err = lchown(buffer, credp->fc_uid, credp->fc_gid); + err = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW); if (err == -1) { /* * If we fail to change ownership and if we are * using security model none. Ignore the error */ if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) { - serrno = errno; goto err_end; - } else + } else { err = 0; + } } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, 0); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } static int local_link(FsContext *ctx, V9fsPath *oldpath, V9fsPath *dirpath, const char *name) { - int ret; - V9fsString newpath; - char *buffer, *buffer1; + char *odirpath = g_path_get_dirname(oldpath->data); + char *oname = g_path_get_basename(oldpath->data); + int ret = -1; + int odirfd, ndirfd; + + odirfd = local_opendir_nofollow(ctx, odirpath); + if (odirfd == -1) { + goto out; + } - v9fs_string_init(&newpath); - v9fs_string_sprintf(&newpath, "%s/%s", dirpath->data, name); + ndirfd = local_opendir_nofollow(ctx, dirpath->data); + if (ndirfd == -1) { + close_preserve_errno(odirfd); + goto out; + } - buffer = rpath(ctx, oldpath->data); - buffer1 = rpath(ctx, newpath.data); - ret = link(buffer, buffer1); - g_free(buffer); - g_free(buffer1); + ret = linkat(odirfd, oname, ndirfd, name, 0); + if (ret < 0) { + goto out_close; + } /* now link the virtfs_metadata files */ - if (!ret && (ctx->export_flags & V9FS_SM_MAPPED_FILE)) { - /* Link the .virtfs_metadata files. Create the metada directory */ - ret = local_create_mapped_attr_dir(ctx, newpath.data); - if (ret < 0) { - goto err_out; + if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { + int omap_dirfd, nmap_dirfd; + + ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + goto err_undo_link; } - buffer = local_mapped_attr_path(ctx, oldpath->data); - buffer1 = local_mapped_attr_path(ctx, newpath.data); - ret = link(buffer, buffer1); - g_free(buffer); - g_free(buffer1); + + omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR); + if (omap_dirfd == -1) { + goto err; + } + + nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR); + if (nmap_dirfd == -1) { + close_preserve_errno(omap_dirfd); + goto err; + } + + ret = linkat(omap_dirfd, oname, nmap_dirfd, name, 0); + close_preserve_errno(nmap_dirfd); + close_preserve_errno(omap_dirfd); if (ret < 0 && errno != ENOENT) { - goto err_out; + goto err_undo_link; } - } -err_out: - v9fs_string_free(&newpath); - return ret; -} -static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size) -{ - char *buffer; - int ret; - char *path = fs_path->data; + ret = 0; + } + goto out_close; - buffer = rpath(ctx, path); - ret = truncate(buffer, size); - g_free(buffer); +err: + ret = -1; +err_undo_link: + unlinkat_preserve_errno(ndirfd, name, 0); +out_close: + close_preserve_errno(ndirfd); + close_preserve_errno(odirfd); +out: + g_free(oname); + g_free(odirpath); return ret; } -static int local_rename(FsContext *ctx, const char *oldpath, - const char *newpath) +static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size) { - int err; - char *buffer, *buffer1; + int fd, ret; - if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - err = local_create_mapped_attr_dir(ctx, newpath); - if (err < 0) { - return err; - } - /* rename the .virtfs_metadata files */ - buffer = local_mapped_attr_path(ctx, oldpath); - buffer1 = local_mapped_attr_path(ctx, newpath); - err = rename(buffer, buffer1); - g_free(buffer); - g_free(buffer1); - if (err < 0 && errno != ENOENT) { - return err; - } + fd = local_open_nofollow(ctx, fs_path->data, O_WRONLY, 0); + if (fd == -1) { + return -1; } - - buffer = rpath(ctx, oldpath); - buffer1 = rpath(ctx, newpath); - err = rename(buffer, buffer1); - g_free(buffer); - g_free(buffer1); - return err; + ret = ftruncate(fd, size); + close_preserve_errno(fd); + return ret; } static int local_chown(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp) { - char *buffer; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); int ret = -1; - char *path = fs_path->data; + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } if ((credp->fc_uid == -1 && credp->fc_gid == -1) || (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - ret = lchown(buffer, credp->fc_uid, credp->fc_gid); - g_free(buffer); + ret = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - ret = local_set_xattr(buffer, credp); - g_free(buffer); + ret = local_set_xattrat(dirfd, name, credp); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - return local_set_mapped_file_attr(fs_ctx, path, credp); + ret = local_set_mapped_file_attrat(dirfd, name, credp); } + + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return ret; } static int local_utimensat(FsContext *s, V9fsPath *fs_path, const struct timespec *buf) { - char *buffer; - int ret; - char *path = fs_path->data; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd, ret = -1; + + dirfd = local_opendir_nofollow(s, dirpath); + if (dirfd == -1) { + goto out; + } - buffer = rpath(s, path); - ret = qemu_utimens(buffer, buf); - g_free(buffer); + ret = utimensat(dirfd, name, buf, AT_SYMLINK_NOFOLLOW); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(name); return ret; } -static int local_remove(FsContext *ctx, const char *path) +static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name, + int flags) { - int err; - struct stat stbuf; - char *buffer; + int ret = -1; if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(ctx, path); - err = lstat(buffer, &stbuf); - g_free(buffer); - if (err) { - goto err_out; - } - /* - * If directory remove .virtfs_metadata contained in the - * directory - */ - if (S_ISDIR(stbuf.st_mode)) { - buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root, - path, VIRTFS_META_DIR); - err = remove(buffer); - g_free(buffer); - if (err < 0 && errno != ENOENT) { + int map_dirfd; + + if (flags == AT_REMOVEDIR) { + int fd; + + fd = openat(dirfd, name, O_RDONLY | O_DIRECTORY | O_PATH); + if (fd == -1) { + goto err_out; + } + /* + * If directory remove .virtfs_metadata contained in the + * directory + */ + ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR); + close_preserve_errno(fd); + if (ret < 0 && errno != ENOENT) { /* * We didn't had the .virtfs_metadata file. May be file created * in non-mapped mode ?. Ignore ENOENT. @@ -972,12 +979,12 @@ static int local_remove(FsContext *ctx, const char *path) } /* * Now remove the name from parent directory - * .virtfs_metadata directory + * .virtfs_metadata directory. */ - buffer = local_mapped_attr_path(ctx, path); - err = remove(buffer); - g_free(buffer); - if (err < 0 && errno != ENOENT) { + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + ret = unlinkat(map_dirfd, name, 0); + close_preserve_errno(map_dirfd); + if (ret < 0 && errno != ENOENT) { /* * We didn't had the .virtfs_metadata file. May be file created * in non-mapped mode ?. Ignore ENOENT. @@ -986,10 +993,39 @@ static int local_remove(FsContext *ctx, const char *path) } } - buffer = rpath(ctx, path); - err = remove(buffer); - g_free(buffer); + ret = unlinkat(dirfd, name, flags); +err_out: + return ret; +} + +static int local_remove(FsContext *ctx, const char *path) +{ + struct stat stbuf; + char *dirpath = g_path_get_dirname(path); + char *name = g_path_get_basename(path); + int flags = 0; + int dirfd; + int err = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd) { + goto out; + } + + if (fstatat(dirfd, path, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) { + goto err_out; + } + + if (S_ISDIR(stbuf.st_mode)) { + flags |= AT_REMOVEDIR; + } + + err = local_unlinkat_common(ctx, dirfd, name, flags); err_out: + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return err; } @@ -1013,13 +1049,11 @@ static int local_fsync(FsContext *ctx, int fid_type, static int local_statfs(FsContext *s, V9fsPath *fs_path, struct statfs *stbuf) { - char *buffer; - int ret; - char *path = fs_path->data; + int fd, ret; - buffer = rpath(s, path); - ret = statfs(buffer, stbuf); - g_free(buffer); + fd = local_open_nofollow(s, fs_path->data, O_RDONLY, 0); + ret = fstatfs(fd, stbuf); + close_preserve_errno(fd); return ret; } @@ -1071,70 +1105,105 @@ static int local_renameat(FsContext *ctx, V9fsPath *olddir, const char *new_name) { int ret; - V9fsString old_full_name, new_full_name; + int odirfd, ndirfd; - v9fs_string_init(&old_full_name); - v9fs_string_init(&new_full_name); + odirfd = local_opendir_nofollow(ctx, olddir->data); + if (odirfd == -1) { + return -1; + } - v9fs_string_sprintf(&old_full_name, "%s/%s", olddir->data, old_name); - v9fs_string_sprintf(&new_full_name, "%s/%s", newdir->data, new_name); + ndirfd = local_opendir_nofollow(ctx, newdir->data); + if (ndirfd == -1) { + close_preserve_errno(odirfd); + return -1; + } - ret = local_rename(ctx, old_full_name.data, new_full_name.data); - v9fs_string_free(&old_full_name); - v9fs_string_free(&new_full_name); + ret = renameat(odirfd, old_name, ndirfd, new_name); + if (ret < 0) { + goto out; + } + + if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { + int omap_dirfd, nmap_dirfd; + + ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + goto err_undo_rename; + } + + omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR); + if (omap_dirfd == -1) { + goto err; + } + + nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR); + if (nmap_dirfd == -1) { + close_preserve_errno(omap_dirfd); + goto err; + } + + /* rename the .virtfs_metadata files */ + ret = renameat(omap_dirfd, old_name, nmap_dirfd, new_name); + close_preserve_errno(nmap_dirfd); + close_preserve_errno(omap_dirfd); + if (ret < 0 && errno != ENOENT) { + goto err_undo_rename; + } + + ret = 0; + } + goto out; + +err: + ret = -1; +err_undo_rename: + renameat_preserve_errno(ndirfd, new_name, odirfd, old_name); +out: + close_preserve_errno(ndirfd); + close_preserve_errno(odirfd); return ret; } +static void v9fs_path_init_dirname(V9fsPath *path, const char *str) +{ + path->data = g_path_get_dirname(str); + path->size = strlen(path->data) + 1; +} + +static int local_rename(FsContext *ctx, const char *oldpath, + const char *newpath) +{ + int err; + char *oname = g_path_get_basename(oldpath); + char *nname = g_path_get_basename(newpath); + V9fsPath olddir, newdir; + + v9fs_path_init_dirname(&olddir, oldpath); + v9fs_path_init_dirname(&newdir, newpath); + + err = local_renameat(ctx, &olddir, oname, &newdir, nname); + + v9fs_path_free(&newdir); + v9fs_path_free(&olddir); + g_free(nname); + g_free(oname); + + return err; +} + static int local_unlinkat(FsContext *ctx, V9fsPath *dir, const char *name, int flags) { int ret; - V9fsString fullname; - char *buffer; - - v9fs_string_init(&fullname); + int dirfd; - v9fs_string_sprintf(&fullname, "%s/%s", dir->data, name); - if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - if (flags == AT_REMOVEDIR) { - /* - * If directory remove .virtfs_metadata contained in the - * directory - */ - buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root, - fullname.data, VIRTFS_META_DIR); - ret = remove(buffer); - g_free(buffer); - if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ - goto err_out; - } - } - /* - * Now remove the name from parent directory - * .virtfs_metadata directory. - */ - buffer = local_mapped_attr_path(ctx, fullname.data); - ret = remove(buffer); - g_free(buffer); - if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ - goto err_out; - } + dirfd = local_opendir_nofollow(ctx, dir->data); + if (dirfd == -1) { + return -1; } - /* Remove the name finally */ - buffer = rpath(ctx, fullname.data); - ret = remove(buffer); - g_free(buffer); -err_out: - v9fs_string_free(&fullname); + ret = local_unlinkat_common(ctx, dirfd, name, flags); + close_preserve_errno(dirfd); return ret; } @@ -1168,8 +1237,31 @@ static int local_ioc_getversion(FsContext *ctx, V9fsPath *path, static int local_init(FsContext *ctx) { - int err = 0; struct statfs stbuf; + LocalData *data = g_malloc(sizeof(*data)); + + data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY); + if (data->mountfd == -1) { + goto err; + } + +#ifdef FS_IOC_GETVERSION + /* + * use ioc_getversion only if the ioctl is definied + */ + if (fstatfs(data->mountfd, &stbuf) < 0) { + close_preserve_errno(data->mountfd); + goto err; + } + switch (stbuf.f_type) { + case EXT2_SUPER_MAGIC: + case BTRFS_SUPER_MAGIC: + case REISERFS_SUPER_MAGIC: + case XFS_SUPER_MAGIC: + ctx->exops.get_st_gen = local_ioc_getversion; + break; + } +#endif if (ctx->export_flags & V9FS_SM_PASSTHROUGH) { ctx->xops = passthrough_xattr_ops; @@ -1185,23 +1277,21 @@ static int local_init(FsContext *ctx) ctx->xops = passthrough_xattr_ops; } ctx->export_flags |= V9FS_PATHNAME_FSCONTEXT; -#ifdef FS_IOC_GETVERSION - /* - * use ioc_getversion only if the iocl is definied - */ - err = statfs(ctx->fs_root, &stbuf); - if (!err) { - switch (stbuf.f_type) { - case EXT2_SUPER_MAGIC: - case BTRFS_SUPER_MAGIC: - case REISERFS_SUPER_MAGIC: - case XFS_SUPER_MAGIC: - ctx->exops.get_st_gen = local_ioc_getversion; - break; - } - } -#endif - return err; + + ctx->private = data; + return 0; + +err: + g_free(data); + return -1; +} + +static void local_cleanup(FsContext *ctx) +{ + LocalData *data = ctx->private; + + close(data->mountfd); + g_free(data); } static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse) @@ -1252,6 +1342,7 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse) FileOperations local_ops = { .parse_opts = local_parse_opts, .init = local_init, + .cleanup = local_cleanup, .lstat = local_lstat, .readlink = local_readlink, .close = local_close, diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h new file mode 100644 index 0000000..32c7274 --- /dev/null +++ b/hw/9pfs/9p-local.h @@ -0,0 +1,20 @@ +/* + * 9p local backend utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_9P_LOCAL_H +#define QEMU_9P_LOCAL_H + +int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags, + mode_t mode); +int local_opendir_nofollow(FsContext *fs_ctx, const char *path); + +#endif diff --git a/hw/9pfs/9p-posix-acl.c b/hw/9pfs/9p-posix-acl.c index ec00318..bbf8906 100644 --- a/hw/9pfs/9p-posix-acl.c +++ b/hw/9pfs/9p-posix-acl.c @@ -25,13 +25,7 @@ static ssize_t mp_pacl_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, MAP_ACL_ACCESS, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size); } static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path, @@ -56,23 +50,16 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path, static int mp_pacl_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, MAP_ACL_ACCESS, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size, + flags); } static int mp_pacl_removexattr(FsContext *ctx, const char *path, const char *name) { int ret; - char *buffer; - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, MAP_ACL_ACCESS); + ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS); if (ret == -1 && errno == ENODATA) { /* * We don't get ENODATA error when trying to remove a @@ -82,20 +69,13 @@ static int mp_pacl_removexattr(FsContext *ctx, errno = 0; ret = 0; } - g_free(buffer); return ret; } static ssize_t mp_dacl_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, MAP_ACL_DEFAULT, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size); } static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path, @@ -120,23 +100,16 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path, static int mp_dacl_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, MAP_ACL_DEFAULT, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size, + flags); } static int mp_dacl_removexattr(FsContext *ctx, const char *path, const char *name) { int ret; - char *buffer; - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, MAP_ACL_DEFAULT); + ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT); if (ret == -1 && errno == ENODATA) { /* * We don't get ENODATA error when trying to remove a @@ -146,7 +119,6 @@ static int mp_dacl_removexattr(FsContext *ctx, errno = 0; ret = 0; } - g_free(buffer); return ret; } diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c new file mode 100644 index 0000000..fdb4d57 --- /dev/null +++ b/hw/9pfs/9p-util.c @@ -0,0 +1,69 @@ +/* + * 9p utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/xattr.h" +#include "9p-util.h" + +int relative_openat_nofollow(int dirfd, const char *path, int flags, + mode_t mode) +{ + int fd; + + fd = dup(dirfd); + if (fd == -1) { + return -1; + } + + while (*path) { + const char *c; + int next_fd; + char *head; + + /* Only relative paths without consecutive slashes */ + assert(path[0] != '/'); + + head = g_strdup(path); + c = strchr(path, '/'); + if (c) { + head[c - path] = 0; + next_fd = openat_dir(fd, head); + } else { + next_fd = openat_file(fd, head, flags, mode); + } + g_free(head); + if (next_fd == -1) { + close_preserve_errno(fd); + return -1; + } + close(fd); + fd = next_fd; + + if (!c) { + break; + } + path = c + 1; + } + + return fd; +} + +ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lgetxattr(proc_path, name, value, size); + g_free(proc_path); + return ret; +} diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h new file mode 100644 index 0000000..091f3ce --- /dev/null +++ b/hw/9pfs/9p-util.h @@ -0,0 +1,54 @@ +/* + * 9p utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_9P_UTIL_H +#define QEMU_9P_UTIL_H + +static inline void close_preserve_errno(int fd) +{ + int serrno = errno; + close(fd); + errno = serrno; +} + +static inline int openat_dir(int dirfd, const char *name) +{ + return openat(dirfd, name, O_DIRECTORY | O_RDONLY | O_PATH); +} + +static inline int openat_file(int dirfd, const char *name, int flags, + mode_t mode) +{ + int fd, serrno, ret; + + fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK, + mode); + if (fd == -1) { + return -1; + } + + serrno = errno; + /* O_NONBLOCK was only needed to open the file. Let's drop it. */ + ret = fcntl(fd, F_SETFL, flags); + assert(!ret); + errno = serrno; + return fd; +} + +int relative_openat_nofollow(int dirfd, const char *path, int flags, + mode_t mode); +ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name, + void *value, size_t size); +int fsetxattrat_nofollow(int dirfd, const char *path, const char *name, + void *value, size_t size, int flags); + +#endif diff --git a/hw/9pfs/9p-xattr-user.c b/hw/9pfs/9p-xattr-user.c index f87530c..2c90817 100644 --- a/hw/9pfs/9p-xattr-user.c +++ b/hw/9pfs/9p-xattr-user.c @@ -20,9 +20,6 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -31,10 +28,7 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path, errno = ENOATTR; return -1; } - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, name, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, name, value, size); } static ssize_t mp_user_listxattr(FsContext *ctx, const char *path, @@ -73,9 +67,6 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char *path, static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -84,18 +75,12 @@ static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name, errno = EACCES; return -1; } - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, name, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, name, value, size, flags); } static int mp_user_removexattr(FsContext *ctx, const char *path, const char *name) { - char *buffer; - int ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -104,10 +89,7 @@ static int mp_user_removexattr(FsContext *ctx, errno = EACCES; return -1; } - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, name); - g_free(buffer); - return ret; + return local_removexattr_nofollow(ctx, path, name); } XattrOperations mapped_user_xattr = { diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c index 5d8595e..eec160b 100644 --- a/hw/9pfs/9p-xattr.c +++ b/hw/9pfs/9p-xattr.c @@ -15,6 +15,8 @@ #include "9p.h" #include "fsdev/file-op-9p.h" #include "9p-xattr.h" +#include "9p-util.h" +#include "9p-local.h" static XattrOperations *get_xattr_operations(XattrOperations **h, @@ -58,6 +60,16 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path, return name_size; } +static ssize_t flistxattrat_nofollow(int dirfd, const char *filename, + char *list, size_t size) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = llistxattr(proc_path, list, size); + g_free(proc_path); + return ret; +} /* * Get the list and pass to each layer to find out whether @@ -67,24 +79,37 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value, size_t vsize) { ssize_t size = 0; - char *buffer; void *ovalue = value; XattrOperations *xops; char *orig_value, *orig_value_start; ssize_t xattr_len, parsed_len = 0, attr_len; + char *dirpath, *name; + int dirfd; /* Get the actual len */ - buffer = rpath(ctx, path); - xattr_len = llistxattr(buffer, value, 0); + dirpath = g_path_get_dirname(path); + dirfd = local_opendir_nofollow(ctx, dirpath); + g_free(dirpath); + if (dirfd == -1) { + return -1; + } + + name = g_path_get_basename(path); + xattr_len = flistxattrat_nofollow(dirfd, name, value, 0); if (xattr_len <= 0) { - g_free(buffer); + g_free(name); + close_preserve_errno(dirfd); return xattr_len; } /* Now fetch the xattr and find the actual size */ orig_value = g_malloc(xattr_len); - xattr_len = llistxattr(buffer, orig_value, xattr_len); - g_free(buffer); + xattr_len = flistxattrat_nofollow(dirfd, name, orig_value, xattr_len); + g_free(name); + close_preserve_errno(dirfd); + if (xattr_len < 0) { + return -1; + } /* store the orig pointer */ orig_value_start = orig_value; @@ -143,6 +168,135 @@ int v9fs_remove_xattr(FsContext *ctx, } +ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fgetxattrat_nofollow(dirfd, filename, name, value, size); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size) +{ + return local_getxattr_nofollow(ctx, path, name, value, size); +} + +int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size, int flags) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lsetxattr(proc_path, name, value, size, flags); + g_free(proc_path); + return ret; +} + +ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size, + int flags) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fsetxattrat_nofollow(dirfd, filename, name, value, size, flags); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value, + size_t size, int flags) +{ + return local_setxattr_nofollow(ctx, path, name, value, size, flags); +} + +static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, + const char *name) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lremovexattr(proc_path, name); + g_free(proc_path); + return ret; +} + +ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path, + const char *name) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fremovexattrat_nofollow(dirfd, filename, name); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +int pt_removexattr(FsContext *ctx, const char *path, const char *name) +{ + return local_removexattr_nofollow(ctx, path, name); +} + +ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size) +{ + errno = ENOTSUP; + return -1; +} + +int notsup_setxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size, int flags) +{ + errno = ENOTSUP; + return -1; +} + +ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name, + void *value, size_t size) +{ + return 0; +} + +int notsup_removexattr(FsContext *ctx, const char *path, const char *name) +{ + errno = ENOTSUP; + return -1; +} + XattrOperations *mapped_xattr_ops[] = { &mapped_user_xattr, &mapped_pacl_xattr, diff --git a/hw/9pfs/9p-xattr.h b/hw/9pfs/9p-xattr.h index a853ea6..0d83996 100644 --- a/hw/9pfs/9p-xattr.h +++ b/hw/9pfs/9p-xattr.h @@ -29,6 +29,13 @@ typedef struct xattr_operations const char *path, const char *name); } XattrOperations; +ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size); +ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size, + int flags); +ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path, + const char *name); extern XattrOperations mapped_user_xattr; extern XattrOperations passthrough_user_xattr; @@ -49,73 +56,21 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value, int v9fs_set_xattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags); int v9fs_remove_xattr(FsContext *ctx, const char *path, const char *name); + ssize_t pt_listxattr(FsContext *ctx, const char *path, char *name, void *value, size_t size); - -static inline ssize_t pt_getxattr(FsContext *ctx, const char *path, - const char *name, void *value, size_t size) -{ - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, name, value, size); - g_free(buffer); - return ret; -} - -static inline int pt_setxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size, int flags) -{ - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, name, value, size, flags); - g_free(buffer); - return ret; -} - -static inline int pt_removexattr(FsContext *ctx, - const char *path, const char *name) -{ - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lremovexattr(path, name); - g_free(buffer); - return ret; -} - -static inline ssize_t notsup_getxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size) -{ - errno = ENOTSUP; - return -1; -} - -static inline int notsup_setxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size, int flags) -{ - errno = ENOTSUP; - return -1; -} - -static inline ssize_t notsup_listxattr(FsContext *ctx, const char *path, - char *name, void *value, size_t size) -{ - return 0; -} - -static inline int notsup_removexattr(FsContext *ctx, - const char *path, const char *name) -{ - errno = ENOTSUP; - return -1; -} +ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size); +int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value, + size_t size, int flags); +int pt_removexattr(FsContext *ctx, const char *path, const char *name); + +ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size); +int notsup_setxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size, int flags); +ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name, + void *value, size_t size); +int notsup_removexattr(FsContext *ctx, const char *path, const char *name); #endif diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs index da0ae0c..32197e6 100644 --- a/hw/9pfs/Makefile.objs +++ b/hw/9pfs/Makefile.objs @@ -1,4 +1,4 @@ -common-obj-y = 9p.o +common-obj-y = 9p.o 9p-util.o common-obj-y += 9p-local.o 9p-xattr.o common-obj-y += 9p-xattr-user.o 9p-posix-acl.o common-obj-y += coth.o cofs.o codir.o cofile.o diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index d957d1e..2b0f3e1 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -49,7 +49,6 @@ #define ACPI_PCIHP_ADDR 0xae00 #define ACPI_PCIHP_SIZE 0x0014 -#define ACPI_PCIHP_LEGACY_SIZE 0x000f #define PCI_UP_BASE 0x0000 #define PCI_DOWN_BASE 0x0004 #define PCI_EJ_BASE 0x0008 @@ -302,16 +301,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, s->root= root_bus; s->legacy_piix = !bridges_enabled; - if (s->legacy_piix) { - unsigned *bus_bsel = g_malloc(sizeof *bus_bsel); - - s->io_len = ACPI_PCIHP_LEGACY_SIZE; - - *bus_bsel = ACPI_PCIHP_BSEL_DEFAULT; - object_property_add_uint32_ptr(OBJECT(root_bus), ACPI_PCIHP_PROP_BSEL, - bus_bsel, NULL); - } - memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s, "acpi-pci-hotplug", s->io_len); memory_region_add_subregion(address_space_io, s->io_base, &s->io); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 6d99fe4..a553a7e 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -440,6 +440,8 @@ static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque) { PIIX4PMState *s = opaque; + /* pci_bus cannot outlive PIIX4PMState, because /machine keeps it alive + * and it's not hot-unpluggable */ qbus_set_hotplug_handler(BUS(pci_bus), DEVICE(s), &error_abort); } diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c index 0c9ca7b..c8a11f2 100644 --- a/hw/arm/armv7m.c +++ b/hw/arm/armv7m.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" +#include "hw/arm/armv7m.h" #include "qapi/error.h" #include "qemu-common.h" #include "cpu.h" @@ -17,147 +18,260 @@ #include "elf.h" #include "sysemu/qtest.h" #include "qemu/error-report.h" +#include "exec/address-spaces.h" /* Bitbanded IO. Each word corresponds to a single bit. */ /* Get the byte address of the real memory for a bitband access. */ -static inline uint32_t bitband_addr(void * opaque, uint32_t addr) +static inline hwaddr bitband_addr(BitBandState *s, hwaddr offset) { - uint32_t res; - - res = *(uint32_t *)opaque; - res |= (addr & 0x1ffffff) >> 5; - return res; - + return s->base | (offset & 0x1ffffff) >> 5; } -static uint32_t bitband_readb(void *opaque, hwaddr offset) +static MemTxResult bitband_read(void *opaque, hwaddr offset, + uint64_t *data, unsigned size, MemTxAttrs attrs) { - uint8_t v; - cpu_physical_memory_read(bitband_addr(opaque, offset), &v, 1); - return (v & (1 << ((offset >> 2) & 7))) != 0; + BitBandState *s = opaque; + uint8_t buf[4]; + MemTxResult res; + int bitpos, bit; + hwaddr addr; + + assert(size <= 4); + + /* Find address in underlying memory and round down to multiple of size */ + addr = bitband_addr(s, offset) & (-size); + res = address_space_read(s->source_as, addr, attrs, buf, size); + if (res) { + return res; + } + /* Bit position in the N bytes read... */ + bitpos = (offset >> 2) & ((size * 8) - 1); + /* ...converted to byte in buffer and bit in byte */ + bit = (buf[bitpos >> 3] >> (bitpos & 7)) & 1; + *data = bit; + return MEMTX_OK; } -static void bitband_writeb(void *opaque, hwaddr offset, - uint32_t value) +static MemTxResult bitband_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size, MemTxAttrs attrs) { - uint32_t addr; - uint8_t mask; - uint8_t v; - addr = bitband_addr(opaque, offset); - mask = (1 << ((offset >> 2) & 7)); - cpu_physical_memory_read(addr, &v, 1); - if (value & 1) - v |= mask; - else - v &= ~mask; - cpu_physical_memory_write(addr, &v, 1); + BitBandState *s = opaque; + uint8_t buf[4]; + MemTxResult res; + int bitpos, bit; + hwaddr addr; + + assert(size <= 4); + + /* Find address in underlying memory and round down to multiple of size */ + addr = bitband_addr(s, offset) & (-size); + res = address_space_read(s->source_as, addr, attrs, buf, size); + if (res) { + return res; + } + /* Bit position in the N bytes read... */ + bitpos = (offset >> 2) & ((size * 8) - 1); + /* ...converted to byte in buffer and bit in byte */ + bit = 1 << (bitpos & 7); + if (value & 1) { + buf[bitpos >> 3] |= bit; + } else { + buf[bitpos >> 3] &= ~bit; + } + return address_space_write(s->source_as, addr, attrs, buf, size); } -static uint32_t bitband_readw(void *opaque, hwaddr offset) +static const MemoryRegionOps bitband_ops = { + .read_with_attrs = bitband_read, + .write_with_attrs = bitband_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl.min_access_size = 1, + .impl.max_access_size = 4, + .valid.min_access_size = 1, + .valid.max_access_size = 4, +}; + +static void bitband_init(Object *obj) { - uint32_t addr; - uint16_t mask; - uint16_t v; - addr = bitband_addr(opaque, offset) & ~1; - mask = (1 << ((offset >> 2) & 15)); - mask = tswap16(mask); - cpu_physical_memory_read(addr, &v, 2); - return (v & mask) != 0; + BitBandState *s = BITBAND(obj); + SysBusDevice *dev = SYS_BUS_DEVICE(obj); + + object_property_add_link(obj, "source-memory", + TYPE_MEMORY_REGION, + (Object **)&s->source_memory, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); + memory_region_init_io(&s->iomem, obj, &bitband_ops, s, + "bitband", 0x02000000); + sysbus_init_mmio(dev, &s->iomem); } -static void bitband_writew(void *opaque, hwaddr offset, - uint32_t value) +static void bitband_realize(DeviceState *dev, Error **errp) { - uint32_t addr; - uint16_t mask; - uint16_t v; - addr = bitband_addr(opaque, offset) & ~1; - mask = (1 << ((offset >> 2) & 15)); - mask = tswap16(mask); - cpu_physical_memory_read(addr, &v, 2); - if (value & 1) - v |= mask; - else - v &= ~mask; - cpu_physical_memory_write(addr, &v, 2); + BitBandState *s = BITBAND(dev); + + if (!s->source_memory) { + error_setg(errp, "source-memory property not set"); + return; + } + + s->source_as = address_space_init_shareable(s->source_memory, + "bitband-source"); } -static uint32_t bitband_readl(void *opaque, hwaddr offset) +/* Board init. */ + +static const hwaddr bitband_input_addr[ARMV7M_NUM_BITBANDS] = { + 0x20000000, 0x40000000 +}; + +static const hwaddr bitband_output_addr[ARMV7M_NUM_BITBANDS] = { + 0x22000000, 0x42000000 +}; + +static void armv7m_instance_init(Object *obj) { - uint32_t addr; - uint32_t mask; - uint32_t v; - addr = bitband_addr(opaque, offset) & ~3; - mask = (1 << ((offset >> 2) & 31)); - mask = tswap32(mask); - cpu_physical_memory_read(addr, &v, 4); - return (v & mask) != 0; + ARMv7MState *s = ARMV7M(obj); + int i; + + /* Can't init the cpu here, we don't yet know which model to use */ + + object_property_add_link(obj, "memory", + TYPE_MEMORY_REGION, + (Object **)&s->board_memory, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); + memory_region_init(&s->container, obj, "armv7m-container", UINT64_MAX); + + object_initialize(&s->nvic, sizeof(s->nvic), "armv7m_nvic"); + qdev_set_parent_bus(DEVICE(&s->nvic), sysbus_get_default()); + object_property_add_alias(obj, "num-irq", + OBJECT(&s->nvic), "num-irq", &error_abort); + + for (i = 0; i < ARRAY_SIZE(s->bitband); i++) { + object_initialize(&s->bitband[i], sizeof(s->bitband[i]), TYPE_BITBAND); + qdev_set_parent_bus(DEVICE(&s->bitband[i]), sysbus_get_default()); + } } -static void bitband_writel(void *opaque, hwaddr offset, - uint32_t value) +static void armv7m_realize(DeviceState *dev, Error **errp) { - uint32_t addr; - uint32_t mask; - uint32_t v; - addr = bitband_addr(opaque, offset) & ~3; - mask = (1 << ((offset >> 2) & 31)); - mask = tswap32(mask); - cpu_physical_memory_read(addr, &v, 4); - if (value & 1) - v |= mask; - else - v &= ~mask; - cpu_physical_memory_write(addr, &v, 4); -} + ARMv7MState *s = ARMV7M(dev); + SysBusDevice *sbd; + Error *err = NULL; + int i; + char **cpustr; + ObjectClass *oc; + const char *typename; + CPUClass *cc; + + if (!s->board_memory) { + error_setg(errp, "memory property was not set"); + return; + } -static const MemoryRegionOps bitband_ops = { - .old_mmio = { - .read = { bitband_readb, bitband_readw, bitband_readl, }, - .write = { bitband_writeb, bitband_writew, bitband_writel, }, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; + memory_region_add_subregion_overlap(&s->container, 0, s->board_memory, -1); -#define TYPE_BITBAND "ARM,bitband-memory" -#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND) + cpustr = g_strsplit(s->cpu_model, ",", 2); -typedef struct { - /*< private >*/ - SysBusDevice parent_obj; - /*< public >*/ + oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]); + if (!oc) { + error_setg(errp, "Unknown CPU model %s", cpustr[0]); + g_strfreev(cpustr); + return; + } - MemoryRegion iomem; - uint32_t base; -} BitBandState; + cc = CPU_CLASS(oc); + typename = object_class_get_name(oc); + cc->parse_features(typename, cpustr[1], &err); + g_strfreev(cpustr); + if (err) { + error_propagate(errp, err); + return; + } -static void bitband_init(Object *obj) -{ - BitBandState *s = BITBAND(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); + s->cpu = ARM_CPU(object_new(typename)); + if (!s->cpu) { + error_setg(errp, "Unknown CPU model %s", s->cpu_model); + return; + } - memory_region_init_io(&s->iomem, obj, &bitband_ops, &s->base, - "bitband", 0x02000000); - sysbus_init_mmio(dev, &s->iomem); + object_property_set_link(OBJECT(s->cpu), OBJECT(&s->container), "memory", + &error_abort); + object_property_set_bool(OBJECT(s->cpu), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + /* Note that we must realize the NVIC after the CPU */ + object_property_set_bool(OBJECT(&s->nvic), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + /* Alias the NVIC's input and output GPIOs as our own so the board + * code can wire them up. (We do this in realize because the + * NVIC doesn't create the input GPIO array until realize.) + */ + qdev_pass_gpios(DEVICE(&s->nvic), dev, NULL); + qdev_pass_gpios(DEVICE(&s->nvic), dev, "SYSRESETREQ"); + + /* Wire the NVIC up to the CPU */ + sbd = SYS_BUS_DEVICE(&s->nvic); + sysbus_connect_irq(sbd, 0, + qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ)); + s->cpu->env.nvic = &s->nvic; + + memory_region_add_subregion(&s->container, 0xe000e000, + sysbus_mmio_get_region(sbd, 0)); + + for (i = 0; i < ARRAY_SIZE(s->bitband); i++) { + Object *obj = OBJECT(&s->bitband[i]); + SysBusDevice *sbd = SYS_BUS_DEVICE(&s->bitband[i]); + + object_property_set_int(obj, bitband_input_addr[i], "base", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + object_property_set_link(obj, OBJECT(s->board_memory), + "source-memory", &error_abort); + object_property_set_bool(obj, true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(&s->container, bitband_output_addr[i], + sysbus_mmio_get_region(sbd, 0)); + } } -static void armv7m_bitband_init(void) -{ - DeviceState *dev; +static Property armv7m_properties[] = { + DEFINE_PROP_STRING("cpu-model", ARMv7MState, cpu_model), + DEFINE_PROP_END_OF_LIST(), +}; - dev = qdev_create(NULL, TYPE_BITBAND); - qdev_prop_set_uint32(dev, "base", 0x20000000); - qdev_init_nofail(dev); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x22000000); +static void armv7m_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); - dev = qdev_create(NULL, TYPE_BITBAND); - qdev_prop_set_uint32(dev, "base", 0x40000000); - qdev_init_nofail(dev); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x42000000); + dc->realize = armv7m_realize; + dc->props = armv7m_properties; } -/* Board init. */ +static const TypeInfo armv7m_info = { + .name = TYPE_ARMV7M, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(ARMv7MState), + .instance_init = armv7m_instance_init, + .class_init = armv7m_class_init, +}; static void armv7m_reset(void *opaque) { @@ -168,37 +282,35 @@ static void armv7m_reset(void *opaque) /* Init CPU and memory for a v7-M based board. mem_size is in bytes. - Returns the NVIC array. */ + Returns the ARMv7M device. */ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, const char *kernel_filename, const char *cpu_model) { - ARMCPU *cpu; - CPUARMState *env; - DeviceState *nvic; - int image_size; - uint64_t entry; - uint64_t lowaddr; - int big_endian; + DeviceState *armv7m; if (cpu_model == NULL) { - cpu_model = "cortex-m3"; - } - cpu = cpu_arm_init(cpu_model); - if (cpu == NULL) { - fprintf(stderr, "Unable to find CPU definition\n"); - exit(1); + cpu_model = "cortex-m3"; } - env = &cpu->env; - armv7m_bitband_init(); + armv7m = qdev_create(NULL, "armv7m"); + qdev_prop_set_uint32(armv7m, "num-irq", num_irq); + qdev_prop_set_string(armv7m, "cpu-model", cpu_model); + object_property_set_link(OBJECT(armv7m), OBJECT(get_system_memory()), + "memory", &error_abort); + /* This will exit with an error if the user passed us a bad cpu_model */ + qdev_init_nofail(armv7m); + + armv7m_load_kernel(ARM_CPU(first_cpu), kernel_filename, mem_size); + return armv7m; +} - nvic = qdev_create(NULL, "armv7m_nvic"); - qdev_prop_set_uint32(nvic, "num-irq", num_irq); - env->nvic = nvic; - qdev_init_nofail(nvic); - sysbus_connect_irq(SYS_BUS_DEVICE(nvic), 0, - qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ)); +void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size) +{ + int image_size; + uint64_t entry; + uint64_t lowaddr; + int big_endian; #ifdef TARGET_WORDS_BIGENDIAN big_endian = 1; @@ -224,8 +336,15 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, } } + /* CPU objects (unlike devices) are not automatically reset on system + * reset, so we must always register a handler to do so. Unlike + * A-profile CPUs, we don't need to do anything special in the + * handler to arrange that it starts correctly. + * This is arguably the wrong place to do this, but it matches the + * way A-profile does it. Note that this means that every M profile + * board must call this function! + */ qemu_register_reset(armv7m_reset, cpu); - return nvic; } static Property bitband_properties[] = { @@ -237,6 +356,7 @@ static void bitband_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + dc->realize = bitband_realize; dc->props = bitband_properties; } @@ -251,6 +371,7 @@ static const TypeInfo bitband_info = { static void armv7m_register_types(void) { type_register_static(&bitband_info); + type_register_static(&armv7m_info); } type_init(armv7m_register_types) diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c index 9ed22d5..369ef1e 100644 --- a/hw/arm/bcm2835_peripherals.c +++ b/hw/arm/bcm2835_peripherals.c @@ -96,6 +96,11 @@ static void bcm2835_peripherals_init(Object *obj) object_property_add_child(obj, "sdhci", OBJECT(&s->sdhci), NULL); qdev_set_parent_bus(DEVICE(&s->sdhci), sysbus_get_default()); + /* SDHOST */ + object_initialize(&s->sdhost, sizeof(s->sdhost), TYPE_BCM2835_SDHOST); + object_property_add_child(obj, "sdhost", OBJECT(&s->sdhost), NULL); + qdev_set_parent_bus(DEVICE(&s->sdhost), sysbus_get_default()); + /* DMA Channels */ object_initialize(&s->dma, sizeof(s->dma), TYPE_BCM2835_DMA); object_property_add_child(obj, "dma", OBJECT(&s->dma), NULL); @@ -103,6 +108,16 @@ static void bcm2835_peripherals_init(Object *obj) object_property_add_const_link(OBJECT(&s->dma), "dma-mr", OBJECT(&s->gpu_bus_mr), &error_abort); + + /* GPIO */ + object_initialize(&s->gpio, sizeof(s->gpio), TYPE_BCM2835_GPIO); + object_property_add_child(obj, "gpio", OBJECT(&s->gpio), NULL); + qdev_set_parent_bus(DEVICE(&s->gpio), sysbus_get_default()); + + object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhci", + OBJECT(&s->sdhci.sdbus), &error_abort); + object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost", + OBJECT(&s->sdhost.sdbus), &error_abort); } static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) @@ -267,13 +282,20 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci), 0, qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, INTERRUPT_ARASANSDIO)); - object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->sdhci), "sd-bus", - &err); + + /* SDHOST */ + object_property_set_bool(OBJECT(&s->sdhost), true, "realized", &err); if (err) { error_propagate(errp, err); return; } + memory_region_add_subregion(&s->peri_mr, MMCI0_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->sdhost), 0)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhost), 0, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_SDIO)); + /* DMA Channels */ object_property_set_bool(OBJECT(&s->dma), true, "realized", &err); if (err) { @@ -292,6 +314,23 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) BCM2835_IC_GPU_IRQ, INTERRUPT_DMA0 + n)); } + + /* GPIO */ + object_property_set_bool(OBJECT(&s->gpio), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(&s->peri_mr, GPIO_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->gpio), 0)); + + object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->gpio), "sd-bus", + &err); + if (err) { + error_propagate(errp, err); + return; + } } static void bcm2835_peripherals_class_init(ObjectClass *oc, void *data) diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c index 23d7928..3cfe332 100644 --- a/hw/arm/netduino2.c +++ b/hw/arm/netduino2.c @@ -27,17 +27,18 @@ #include "hw/boards.h" #include "qemu/error-report.h" #include "hw/arm/stm32f205_soc.h" +#include "hw/arm/arm.h" static void netduino2_init(MachineState *machine) { DeviceState *dev; dev = qdev_create(NULL, TYPE_STM32F205_SOC); - if (machine->kernel_filename) { - qdev_prop_set_string(dev, "kernel-filename", machine->kernel_filename); - } qdev_prop_set_string(dev, "cpu-model", "cortex-m3"); object_property_set_bool(OBJECT(dev), true, "realized", &error_fatal); + + armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, + FLASH_SIZE); } static void netduino2_machine_init(MachineClass *mc) diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c index 38425bd..6e1260d 100644 --- a/hw/arm/stm32f205_soc.c +++ b/hw/arm/stm32f205_soc.c @@ -49,6 +49,9 @@ static void stm32f205_soc_initfn(Object *obj) STM32F205State *s = STM32F205_SOC(obj); int i; + object_initialize(&s->armv7m, sizeof(s->armv7m), TYPE_ARMV7M); + qdev_set_parent_bus(DEVICE(&s->armv7m), sysbus_get_default()); + object_initialize(&s->syscfg, sizeof(s->syscfg), TYPE_STM32F2XX_SYSCFG); qdev_set_parent_bus(DEVICE(&s->syscfg), sysbus_get_default()); @@ -82,7 +85,7 @@ static void stm32f205_soc_initfn(Object *obj) static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) { STM32F205State *s = STM32F205_SOC(dev_soc); - DeviceState *dev, *nvic; + DeviceState *dev, *armv7m; SysBusDevice *busdev; Error *err = NULL; int i; @@ -110,8 +113,16 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) vmstate_register_ram_global(sram); memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram); - nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96, - s->kernel_filename, s->cpu_model); + armv7m = DEVICE(&s->armv7m); + qdev_prop_set_uint32(armv7m, "num-irq", 96); + qdev_prop_set_string(armv7m, "cpu-model", s->cpu_model); + object_property_set_link(OBJECT(&s->armv7m), OBJECT(get_system_memory()), + "memory", &error_abort); + object_property_set_bool(OBJECT(&s->armv7m), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } /* System configuration controller */ dev = DEVICE(&s->syscfg); @@ -122,7 +133,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, 0x40013800); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, 71)); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, 71)); /* Attach UART (uses USART registers) and USART controllers */ for (i = 0; i < STM_NUM_USARTS; i++) { @@ -136,7 +147,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, usart_addr[i]); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, usart_irq[i])); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i])); } /* Timer 2 to 5 */ @@ -150,7 +161,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, timer_addr[i]); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, timer_irq[i])); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, timer_irq[i])); } /* ADC 1 to 3 */ @@ -162,7 +173,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) return; } qdev_connect_gpio_out(DEVICE(s->adc_irqs), 0, - qdev_get_gpio_in(nvic, ADC_IRQ)); + qdev_get_gpio_in(armv7m, ADC_IRQ)); for (i = 0; i < STM_NUM_ADCS; i++) { dev = DEVICE(&(s->adc[i])); @@ -187,12 +198,11 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, spi_addr[i]); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, spi_irq[i])); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, spi_irq[i])); } } static Property stm32f205_soc_properties[] = { - DEFINE_PROP_STRING("kernel-filename", STM32F205State, kernel_filename), DEFINE_PROP_STRING("cpu-model", STM32F205State, cpu_model), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/block/block.c b/hw/block/block.c index 8dc9d84..27878d0 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -51,11 +51,33 @@ void blkconf_blocksizes(BlockConf *conf) } } -void blkconf_apply_backend_options(BlockConf *conf) +void blkconf_apply_backend_options(BlockConf *conf, bool readonly, + bool resizable, Error **errp) { BlockBackend *blk = conf->blk; BlockdevOnError rerror, werror; + uint64_t perm, shared_perm; bool wce; + int ret; + + perm = BLK_PERM_CONSISTENT_READ; + if (!readonly) { + perm |= BLK_PERM_WRITE; + } + + shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_GRAPH_MOD; + if (resizable) { + shared_perm |= BLK_PERM_RESIZE; + } + if (conf->share_rw) { + shared_perm |= BLK_PERM_WRITE; + } + + ret = blk_set_perm(blk, perm, shared_perm, errp); + if (ret < 0) { + return; + } switch (conf->wce) { case ON_OFF_AUTO_ON: wce = true; break; diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 17d29e7..a328693 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -186,6 +186,7 @@ typedef enum FDiskFlags { struct FDrive { FDCtrl *fdctrl; BlockBackend *blk; + BlockConf *conf; /* Drive status */ FloppyDriveType drive; /* CMOS drive type */ uint8_t perpendicular; /* 2.88 MB access mode */ @@ -469,9 +470,22 @@ static void fd_revalidate(FDrive *drv) } } -static void fd_change_cb(void *opaque, bool load) +static void fd_change_cb(void *opaque, bool load, Error **errp) { FDrive *drive = opaque; + Error *local_err = NULL; + + if (!load) { + blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort); + } else { + blkconf_apply_backend_options(drive->conf, + blk_is_read_only(drive->blk), false, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } drive->media_changed = 1; drive->media_validated = false; @@ -508,6 +522,7 @@ static int floppy_drive_init(DeviceState *qdev) FloppyDrive *dev = FLOPPY_DRIVE(qdev); FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus); FDrive *drive; + Error *local_err = NULL; int ret; if (dev->unit == -1) { @@ -533,7 +548,7 @@ static int floppy_drive_init(DeviceState *qdev) if (!dev->conf.blk) { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(); + dev->conf.blk = blk_new(0, BLK_PERM_ALL); ret = blk_attach_dev(dev->conf.blk, qdev); assert(ret == 0); } @@ -551,7 +566,13 @@ static int floppy_drive_init(DeviceState *qdev) * blkconf_apply_backend_options(). */ dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO; dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO; - blkconf_apply_backend_options(&dev->conf); + + blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk), + false, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } /* 'enospc' is the default for -drive, 'report' is what blk_new() gives us * for empty drives. */ @@ -565,6 +586,7 @@ static int floppy_drive_init(DeviceState *qdev) return -1; } + drive->conf = &dev->conf; drive->blk = dev->conf.blk; drive->fdctrl = bus->fdc; diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 2d6eb46..190573c 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -1215,6 +1215,7 @@ static void m25p80_realize(SSISlave *ss, Error **errp) { Flash *s = M25P80(ss); M25P80Class *mc = M25P80_GET_CLASS(s); + int ret; s->pi = mc->pi; @@ -1222,6 +1223,13 @@ static void m25p80_realize(SSISlave *ss, Error **errp) s->dirty_page = -1; if (s->blk) { + uint64_t perm = BLK_PERM_CONSISTENT_READ | + (blk_is_read_only(s->blk) ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + DB_PRINT_L(0, "Binding to IF_MTD drive\n"); s->storage = blk_blockalign(s->blk, s->size); diff --git a/hw/block/nand.c b/hw/block/nand.c index c69e675..0d33ac2 100644 --- a/hw/block/nand.c +++ b/hw/block/nand.c @@ -373,6 +373,8 @@ static void nand_realize(DeviceState *dev, Error **errp) { int pagesize; NANDFlashState *s = NAND(dev); + int ret; + s->buswidth = nand_flash_ids[s->chip_id].width >> 3; s->size = nand_flash_ids[s->chip_id].size << 20; @@ -407,6 +409,11 @@ static void nand_realize(DeviceState *dev, Error **errp) error_setg(errp, "Can't use a read-only drive"); return; } + ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } if (blk_getlength(s->blk) >= (s->pages << s->page_shift) + (s->pages << s->oob_shift)) { pagesize = 0; diff --git a/hw/block/nvme.c b/hw/block/nvme.c index ae91a18..ae303d4 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -835,6 +835,7 @@ static int nvme_init(PCIDevice *pci_dev) int i; int64_t bs_size; uint8_t *pci_conf; + Error *local_err = NULL; if (!n->conf.blk) { return -1; @@ -850,7 +851,12 @@ static int nvme_init(PCIDevice *pci_dev) return -1; } blkconf_blocksizes(&n->conf); - blkconf_apply_backend_options(&n->conf); + blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), + false, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } pci_conf = pci_dev->config; pci_conf[PCI_INTERRUPT_PIN] = 1; diff --git a/hw/block/onenand.c b/hw/block/onenand.c index 8d84227..ddf5492 100644 --- a/hw/block/onenand.c +++ b/hw/block/onenand.c @@ -778,6 +778,7 @@ static int onenand_initfn(SysBusDevice *sbd) OneNANDState *s = ONE_NAND(dev); uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7)); void *ram; + Error *local_err = NULL; s->base = (hwaddr)-1; s->rdy = NULL; @@ -796,6 +797,12 @@ static int onenand_initfn(SysBusDevice *sbd) error_report("Can't use a read-only drive"); return -1; } + blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } s->blk_cur = s->blk; } s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT), diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 71b98a3..594d4cf 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -758,6 +758,18 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); if (pfl->blk) { + uint64_t perm; + pfl->ro = blk_is_read_only(pfl->blk); + perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + } else { + pfl->ro = 0; + } + + if (pfl->blk) { /* read the initial flash content */ ret = blk_pread(pfl->blk, 0, pfl->storage, total_len); @@ -768,12 +780,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) } } - if (pfl->blk) { - pfl->ro = blk_is_read_only(pfl->blk); - } else { - pfl->ro = 0; - } - /* Default to devices being used at their maximum device width. This was * assumed before the device_width support was added. */ diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c index ef71322..e6c5c6c 100644 --- a/hw/block/pflash_cfi02.c +++ b/hw/block/pflash_cfi02.c @@ -632,6 +632,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl)); pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem); pfl->chip_len = chip_len; + + if (pfl->blk) { + uint64_t perm; + pfl->ro = blk_is_read_only(pfl->blk); + perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + } else { + pfl->ro = 0; + } + if (pfl->blk) { /* read the initial flash content */ ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len); @@ -646,12 +659,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) pfl->rom_mode = 1; sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); - if (pfl->blk) { - pfl->ro = blk_is_read_only(pfl->blk); - } else { - pfl->ro = 0; - } - pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl); pfl->wcycle = 0; pfl->cmd = 0; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 843bd2f..98c16a7 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -928,7 +928,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) } blkconf_serial(&conf->conf, &conf->serial); - blkconf_apply_backend_options(&conf->conf); + blkconf_apply_backend_options(&conf->conf, + blk_is_read_only(conf->conf.blk), true, + &err); + if (err) { + error_propagate(errp, err); + return; + } s->original_wce = blk_enable_write_cache(conf->conf.blk); blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err); if (err) { diff --git a/hw/core/bus.c b/hw/core/bus.c index cf383fc..4651f24 100644 --- a/hw/core/bus.c +++ b/hw/core/bus.c @@ -197,7 +197,7 @@ static void qbus_initfn(Object *obj) TYPE_HOTPLUG_HANDLER, (Object **)&bus->hotplug_handler, object_property_allow_set_link, - OBJ_PROP_LINK_UNREF_ON_RELEASE, + 0, NULL); object_property_add_bool(obj, "realized", bus_get_realized, bus_set_realized, NULL); diff --git a/hw/core/loader.c b/hw/core/loader.c index 8b980e9..bf17b42 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -435,6 +435,19 @@ int load_elf_as(const char *filename, uint64_t *highaddr, int big_endian, int elf_machine, int clear_lsb, int data_swab, AddressSpace *as) { + return load_elf_ram(filename, translate_fn, translate_opaque, + pentry, lowaddr, highaddr, big_endian, elf_machine, + clear_lsb, data_swab, as, true); +} + +/* return < 0 if error, otherwise the number of bytes loaded in memory */ +int load_elf_ram(const char *filename, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, int big_endian, int elf_machine, + int clear_lsb, int data_swab, AddressSpace *as, + bool load_rom) +{ int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED; uint8_t e_ident[EI_NIDENT]; @@ -473,11 +486,11 @@ int load_elf_as(const char *filename, if (e_ident[EI_CLASS] == ELFCLASS64) { ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab, pentry, lowaddr, highaddr, elf_machine, clear_lsb, - data_swab, as); + data_swab, as, load_rom); } else { ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab, pentry, lowaddr, highaddr, elf_machine, clear_lsb, - data_swab, as); + data_swab, as, load_rom); } fail: diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c index 3af82af..59ccb00 100644 --- a/hw/core/ptimer.c +++ b/hw/core/ptimer.c @@ -12,6 +12,7 @@ #include "qemu/host-utils.h" #include "sysemu/replay.h" #include "sysemu/qtest.h" +#include "block/aio.h" #define DELTA_ADJUST 1 #define DELTA_NO_ADJUST -1 @@ -353,3 +354,10 @@ ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask) s->policy_mask = policy_mask; return s; } + +void ptimer_free(ptimer_state *s) +{ + qemu_bh_delete(s->bh); + timer_free(s->timer); + g_free(s); +} diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 94f4d8b..c34be1c 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -73,14 +73,19 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr, { BlockBackend *blk; bool blk_created = false; + int ret; blk = blk_by_name(str); if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); if (bs) { - blk = blk_new(); - blk_insert_bs(blk, bs); + blk = blk_new(0, BLK_PERM_ALL); blk_created = true; + + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto fail; + } } } if (!blk) { diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 06ba02e..1e7fb33 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -37,6 +37,7 @@ #include "hw/boards.h" #include "hw/sysbus.h" #include "qapi-event.h" +#include "migration/migration.h" int qdev_hotplug = 0; static bool qdev_hot_added = false; @@ -102,9 +103,23 @@ static void bus_add_child(BusState *bus, DeviceState *child) void qdev_set_parent_bus(DeviceState *dev, BusState *bus) { + bool replugging = dev->parent_bus != NULL; + + if (replugging) { + /* Keep a reference to the device while it's not plugged into + * any bus, to avoid it potentially evaporating when it is + * dereffed in bus_remove_child(). + */ + object_ref(OBJECT(dev)); + bus_remove_child(dev->parent_bus, dev); + object_unref(OBJECT(dev->parent_bus)); + } dev->parent_bus = bus; object_ref(OBJECT(bus)); bus_add_child(bus, dev); + if (replugging) { + object_unref(OBJECT(dev)); + } } /* Create a new device. This only initializes the device state @@ -889,6 +904,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp) Error *local_err = NULL; bool unattached_parent = false; static int unattached_count; + int ret; if (dev->hotplugged && !dc->hotpluggable) { error_setg(errp, QERR_DEVICE_NO_HOTPLUG, object_get_typename(obj)); @@ -896,6 +912,11 @@ static void device_set_realized(Object *obj, bool value, Error **errp) } if (value && !dev->realized) { + ret = check_migratable(obj, &local_err); + if (ret < 0) { + goto fail; + } + if (!obj->parent) { gchar *name = g_strdup_printf("device[%d]", unattached_count++); diff --git a/hw/gpio/Makefile.objs b/hw/gpio/Makefile.objs index a43c7cf..fa0a72e 100644 --- a/hw/gpio/Makefile.objs +++ b/hw/gpio/Makefile.objs @@ -7,3 +7,4 @@ common-obj-$(CONFIG_GPIO_KEY) += gpio_key.o obj-$(CONFIG_OMAP) += omap_gpio.o obj-$(CONFIG_IMX) += imx_gpio.o +obj-$(CONFIG_RASPI) += bcm2835_gpio.o diff --git a/hw/gpio/bcm2835_gpio.c b/hw/gpio/bcm2835_gpio.c new file mode 100644 index 0000000..acc2e3c --- /dev/null +++ b/hw/gpio/bcm2835_gpio.c @@ -0,0 +1,353 @@ +/* + * Raspberry Pi (BCM2835) GPIO Controller + * + * Copyright (c) 2017 Antfield SAS + * + * Authors: + * Clement Deschamps <clement.deschamps@antfield.fr> + * Luc Michel <luc.michel@antfield.fr> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/timer.h" +#include "qapi/error.h" +#include "hw/sysbus.h" +#include "hw/sd/sd.h" +#include "hw/gpio/bcm2835_gpio.h" + +#define GPFSEL0 0x00 +#define GPFSEL1 0x04 +#define GPFSEL2 0x08 +#define GPFSEL3 0x0C +#define GPFSEL4 0x10 +#define GPFSEL5 0x14 +#define GPSET0 0x1C +#define GPSET1 0x20 +#define GPCLR0 0x28 +#define GPCLR1 0x2C +#define GPLEV0 0x34 +#define GPLEV1 0x38 +#define GPEDS0 0x40 +#define GPEDS1 0x44 +#define GPREN0 0x4C +#define GPREN1 0x50 +#define GPFEN0 0x58 +#define GPFEN1 0x5C +#define GPHEN0 0x64 +#define GPHEN1 0x68 +#define GPLEN0 0x70 +#define GPLEN1 0x74 +#define GPAREN0 0x7C +#define GPAREN1 0x80 +#define GPAFEN0 0x88 +#define GPAFEN1 0x8C +#define GPPUD 0x94 +#define GPPUDCLK0 0x98 +#define GPPUDCLK1 0x9C + +static uint32_t gpfsel_get(BCM2835GpioState *s, uint8_t reg) +{ + int i; + uint32_t value = 0; + for (i = 0; i < 10; i++) { + uint32_t index = 10 * reg + i; + if (index < sizeof(s->fsel)) { + value |= (s->fsel[index] & 0x7) << (3 * i); + } + } + return value; +} + +static void gpfsel_set(BCM2835GpioState *s, uint8_t reg, uint32_t value) +{ + int i; + for (i = 0; i < 10; i++) { + uint32_t index = 10 * reg + i; + if (index < sizeof(s->fsel)) { + int fsel = (value >> (3 * i)) & 0x7; + s->fsel[index] = fsel; + } + } + + /* SD controller selection (48-53) */ + if (s->sd_fsel != 0 + && (s->fsel[48] == 0) /* SD_CLK_R */ + && (s->fsel[49] == 0) /* SD_CMD_R */ + && (s->fsel[50] == 0) /* SD_DATA0_R */ + && (s->fsel[51] == 0) /* SD_DATA1_R */ + && (s->fsel[52] == 0) /* SD_DATA2_R */ + && (s->fsel[53] == 0) /* SD_DATA3_R */ + ) { + /* SDHCI controller selected */ + sdbus_reparent_card(s->sdbus_sdhost, s->sdbus_sdhci); + s->sd_fsel = 0; + } else if (s->sd_fsel != 4 + && (s->fsel[48] == 4) /* SD_CLK_R */ + && (s->fsel[49] == 4) /* SD_CMD_R */ + && (s->fsel[50] == 4) /* SD_DATA0_R */ + && (s->fsel[51] == 4) /* SD_DATA1_R */ + && (s->fsel[52] == 4) /* SD_DATA2_R */ + && (s->fsel[53] == 4) /* SD_DATA3_R */ + ) { + /* SDHost controller selected */ + sdbus_reparent_card(s->sdbus_sdhci, s->sdbus_sdhost); + s->sd_fsel = 4; + } +} + +static int gpfsel_is_out(BCM2835GpioState *s, int index) +{ + if (index >= 0 && index < 54) { + return s->fsel[index] == 1; + } + return 0; +} + +static void gpset(BCM2835GpioState *s, + uint32_t val, uint8_t start, uint8_t count, uint32_t *lev) +{ + uint32_t changes = val & ~*lev; + uint32_t cur = 1; + + int i; + for (i = 0; i < count; i++) { + if ((changes & cur) && (gpfsel_is_out(s, start + i))) { + qemu_set_irq(s->out[start + i], 1); + } + cur <<= 1; + } + + *lev |= val; +} + +static void gpclr(BCM2835GpioState *s, + uint32_t val, uint8_t start, uint8_t count, uint32_t *lev) +{ + uint32_t changes = val & *lev; + uint32_t cur = 1; + + int i; + for (i = 0; i < count; i++) { + if ((changes & cur) && (gpfsel_is_out(s, start + i))) { + qemu_set_irq(s->out[start + i], 0); + } + cur <<= 1; + } + + *lev &= ~val; +} + +static uint64_t bcm2835_gpio_read(void *opaque, hwaddr offset, + unsigned size) +{ + BCM2835GpioState *s = (BCM2835GpioState *)opaque; + + switch (offset) { + case GPFSEL0: + case GPFSEL1: + case GPFSEL2: + case GPFSEL3: + case GPFSEL4: + case GPFSEL5: + return gpfsel_get(s, offset / 4); + case GPSET0: + case GPSET1: + /* Write Only */ + return 0; + case GPCLR0: + case GPCLR1: + /* Write Only */ + return 0; + case GPLEV0: + return s->lev0; + case GPLEV1: + return s->lev1; + case GPEDS0: + case GPEDS1: + case GPREN0: + case GPREN1: + case GPFEN0: + case GPFEN1: + case GPHEN0: + case GPHEN1: + case GPLEN0: + case GPLEN1: + case GPAREN0: + case GPAREN1: + case GPAFEN0: + case GPAFEN1: + case GPPUD: + case GPPUDCLK0: + case GPPUDCLK1: + /* Not implemented */ + return 0; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n", + __func__, offset); + break; + } + + return 0; +} + +static void bcm2835_gpio_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + BCM2835GpioState *s = (BCM2835GpioState *)opaque; + + switch (offset) { + case GPFSEL0: + case GPFSEL1: + case GPFSEL2: + case GPFSEL3: + case GPFSEL4: + case GPFSEL5: + gpfsel_set(s, offset / 4, value); + break; + case GPSET0: + gpset(s, value, 0, 32, &s->lev0); + break; + case GPSET1: + gpset(s, value, 32, 22, &s->lev1); + break; + case GPCLR0: + gpclr(s, value, 0, 32, &s->lev0); + break; + case GPCLR1: + gpclr(s, value, 32, 22, &s->lev1); + break; + case GPLEV0: + case GPLEV1: + /* Read Only */ + break; + case GPEDS0: + case GPEDS1: + case GPREN0: + case GPREN1: + case GPFEN0: + case GPFEN1: + case GPHEN0: + case GPHEN1: + case GPLEN0: + case GPLEN1: + case GPAREN0: + case GPAREN1: + case GPAFEN0: + case GPAFEN1: + case GPPUD: + case GPPUDCLK0: + case GPPUDCLK1: + /* Not implemented */ + break; + default: + goto err_out; + } + return; + +err_out: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n", + __func__, offset); +} + +static void bcm2835_gpio_reset(DeviceState *dev) +{ + BCM2835GpioState *s = BCM2835_GPIO(dev); + + int i; + for (i = 0; i < 6; i++) { + gpfsel_set(s, i, 0); + } + + s->sd_fsel = 0; + + /* SDHCI is selected by default */ + sdbus_reparent_card(&s->sdbus, s->sdbus_sdhci); + + s->lev0 = 0; + s->lev1 = 0; +} + +static const MemoryRegionOps bcm2835_gpio_ops = { + .read = bcm2835_gpio_read, + .write = bcm2835_gpio_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const VMStateDescription vmstate_bcm2835_gpio = { + .name = "bcm2835_gpio", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(fsel, BCM2835GpioState, 54), + VMSTATE_UINT32(lev0, BCM2835GpioState), + VMSTATE_UINT32(lev1, BCM2835GpioState), + VMSTATE_UINT8(sd_fsel, BCM2835GpioState), + VMSTATE_END_OF_LIST() + } +}; + +static void bcm2835_gpio_init(Object *obj) +{ + BCM2835GpioState *s = BCM2835_GPIO(obj); + DeviceState *dev = DEVICE(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), + TYPE_SD_BUS, DEVICE(s), "sd-bus"); + + memory_region_init_io(&s->iomem, obj, + &bcm2835_gpio_ops, s, "bcm2835_gpio", 0x1000); + sysbus_init_mmio(sbd, &s->iomem); + qdev_init_gpio_out(dev, s->out, 54); +} + +static void bcm2835_gpio_realize(DeviceState *dev, Error **errp) +{ + BCM2835GpioState *s = BCM2835_GPIO(dev); + Object *obj; + Error *err = NULL; + + obj = object_property_get_link(OBJECT(dev), "sdbus-sdhci", &err); + if (obj == NULL) { + error_setg(errp, "%s: required sdhci link not found: %s", + __func__, error_get_pretty(err)); + return; + } + s->sdbus_sdhci = SD_BUS(obj); + + obj = object_property_get_link(OBJECT(dev), "sdbus-sdhost", &err); + if (obj == NULL) { + error_setg(errp, "%s: required sdhost link not found: %s", + __func__, error_get_pretty(err)); + return; + } + s->sdbus_sdhost = SD_BUS(obj); +} + +static void bcm2835_gpio_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_bcm2835_gpio; + dc->realize = &bcm2835_gpio_realize; + dc->reset = &bcm2835_gpio_reset; +} + +static const TypeInfo bcm2835_gpio_info = { + .name = TYPE_BCM2835_GPIO, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(BCM2835GpioState), + .instance_init = bcm2835_gpio_init, + .class_init = bcm2835_gpio_class_init, +}; + +static void bcm2835_gpio_register_types(void) +{ + type_register_static(&bcm2835_gpio_info); +} + +type_init(bcm2835_gpio_register_types) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 1c928ab..f44767b 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -462,7 +462,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque) *bus_bsel = (*bsel_alloc)++; object_property_add_uint32_ptr(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, - bus_bsel, NULL); + bus_bsel, &error_abort); } return bsel_alloc; @@ -471,7 +471,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque) static void acpi_set_pci_info(void) { PCIBus *bus = find_i440fx(); /* TODO: Q35 support */ - unsigned bsel_alloc = 0; + unsigned bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT; if (bus) { /* Scan all PCI buses. Set property to enable acpi based hotplug. */ diff --git a/hw/ide/core.c b/hw/ide/core.c index cfa5de6..db509b3 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1120,7 +1120,7 @@ static void ide_cfata_metadata_write(IDEState *s) } /* called when the inserted state of the media has changed */ -static void ide_cd_change_cb(void *opaque, bool load) +static void ide_cd_change_cb(void *opaque, bool load, Error **errp) { IDEState *s = opaque; uint64_t nb_sectors; diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index dbaa75c..4383cd1 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -170,7 +170,7 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) return -1; } else { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(); + dev->conf.blk = blk_new(0, BLK_PERM_ALL); } } @@ -196,7 +196,12 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) return -1; } } - blkconf_apply_backend_options(&dev->conf); + blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD, + &err); + if (err) { + error_report_err(err); + return -1; + } if (ide_init_drive(s, dev->conf.blk, kind, dev->version, dev->serial, dev->model, dev->wwn, diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs index 8948106..adedd0d 100644 --- a/hw/intc/Makefile.objs +++ b/hw/intc/Makefile.objs @@ -24,7 +24,7 @@ obj-$(CONFIG_APIC) += apic.o apic_common.o obj-$(CONFIG_ARM_GIC_KVM) += arm_gic_kvm.o obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_kvm.o obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_its_kvm.o -obj-$(CONFIG_STELLARIS) += armv7m_nvic.o +obj-$(CONFIG_ARM_V7M) += armv7m_nvic.o obj-$(CONFIG_EXYNOS4) += exynos4210_gic.o exynos4210_combiner.o obj-$(CONFIG_GRLIB) += grlib_irqmp.o obj-$(CONFIG_IOAPIC) += ioapic.o diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 16b9b0f..c6493d6 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -70,6 +70,38 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = { } }; +static int icc_sre_el1_reg_pre_load(void *opaque) +{ + GICv3CPUState *cs = opaque; + + /* + * If the sre_el1 subsection is not transferred this + * means SRE_EL1 is 0x7 (which might not be the same as + * our reset value). + */ + cs->icc_sre_el1 = 0x7; + return 0; +} + +static bool icc_sre_el1_reg_needed(void *opaque) +{ + GICv3CPUState *cs = opaque; + + return cs->icc_sre_el1 != 7; +} + +const VMStateDescription vmstate_gicv3_cpu_sre_el1 = { + .name = "arm_gicv3_cpu/sre_el1", + .version_id = 1, + .minimum_version_id = 1, + .pre_load = icc_sre_el1_reg_pre_load, + .needed = icc_sre_el1_reg_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(icc_sre_el1, GICv3CPUState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_gicv3_cpu = { .name = "arm_gicv3_cpu", .version_id = 1, @@ -100,6 +132,10 @@ static const VMStateDescription vmstate_gicv3_cpu = { .subsections = (const VMStateDescription * []) { &vmstate_gicv3_cpu_virt, NULL + }, + .subsections = (const VMStateDescription * []) { + &vmstate_gicv3_cpu_sre_el1, + NULL } }; @@ -216,6 +252,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) s->cpu[i].cpu = cpu; s->cpu[i].gic = s; + /* Store GICv3CPUState in CPUARMState gicv3state pointer */ + gicv3_set_gicv3state(cpu, &s->cpu[i]); /* Pre-construct the GICR_TYPER: * For our implementation: diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index f775aba..0b20856 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -19,6 +19,14 @@ #include "gicv3_internal.h" #include "cpu.h" +void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s) +{ + ARMCPU *arm_cpu = ARM_CPU(cpu); + CPUARMState *env = &arm_cpu->env; + + env->gicv3state = (void *)s; +}; + static GICv3CPUState *icc_cs_from_env(CPUARMState *env) { /* Given the CPU, find the right GICv3CPUState struct. diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index d69dc47..81f0403 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -23,8 +23,10 @@ #include "qapi/error.h" #include "hw/intc/arm_gicv3_common.h" #include "hw/sysbus.h" +#include "qemu/error-report.h" #include "sysemu/kvm.h" #include "kvm_arm.h" +#include "gicv3_internal.h" #include "vgic_common.h" #include "migration/migration.h" @@ -44,6 +46,32 @@ #define KVM_ARM_GICV3_GET_CLASS(obj) \ OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3) +#define KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2) \ + (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \ + ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \ + ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \ + ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \ + ARM64_SYS_REG_SHIFT_MASK(op2, OP2)) + +#define ICC_PMR_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0) +#define ICC_BPR0_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3) +#define ICC_AP0R_EL1(n) \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n) +#define ICC_AP1R_EL1(n) \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n) +#define ICC_BPR1_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3) +#define ICC_CTLR_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4) +#define ICC_SRE_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5) +#define ICC_IGRPEN0_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6) +#define ICC_IGRPEN1_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7) + typedef struct KVMARMGICv3Class { ARMGICv3CommonClass parent_class; DeviceRealize parent_realize; @@ -57,16 +85,549 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level) kvm_arm_gic_set_irq(s->num_irq, irq, level); } +#define KVM_VGIC_ATTR(reg, typer) \ + ((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg)) + +static inline void kvm_gicd_access(GICv3State *s, int offset, + uint32_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + KVM_VGIC_ATTR(offset, 0), + val, write); +} + +static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu, + uint32_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, + KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer), + val, write); +} + +static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu, + uint64_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer), + val, write); +} + +static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu, + uint32_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) | + (VGIC_LEVEL_INFO_LINE_LEVEL << + KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT), + val, write); +} + +/* Loop through each distributor IRQ related register; since bits + * corresponding to SPIs and PPIs are RAZ/WI when affinity routing + * is enabled, we skip those. + */ +#define for_each_dist_irq_reg(_irq, _max, _field_width) \ + for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width)) + +static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp) +{ + uint32_t reg, *field; + int irq; + + field = (uint32_t *)bmp; + for_each_dist_irq_reg(irq, s->num_irq, 8) { + kvm_gicd_access(s, offset, ®, false); + *field = reg; + offset += 4; + field++; + } +} + +static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp) +{ + uint32_t reg, *field; + int irq; + + field = (uint32_t *)bmp; + for_each_dist_irq_reg(irq, s->num_irq, 8) { + reg = *field; + kvm_gicd_access(s, offset, ®, true); + offset += 4; + field++; + } +} + +static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset, + uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 2) { + kvm_gicd_access(s, offset, ®, false); + reg = half_unshuffle32(reg >> 1); + if (irq % 32 != 0) { + reg = (reg << 16); + } + *gic_bmp_ptr32(bmp, irq) |= reg; + offset += 4; + } +} + +static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset, + uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 2) { + reg = *gic_bmp_ptr32(bmp, irq); + if (irq % 32 != 0) { + reg = (reg & 0xffff0000) >> 16; + } else { + reg = reg & 0xffff; + } + reg = half_shuffle32(reg) << 1; + kvm_gicd_access(s, offset, ®, true); + offset += 4; + } +} + +static void kvm_gic_get_line_level_bmp(GICv3State *s, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + kvm_gic_line_level_access(s, irq, 0, ®, false); + *gic_bmp_ptr32(bmp, irq) = reg; + } +} + +static void kvm_gic_put_line_level_bmp(GICv3State *s, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + reg = *gic_bmp_ptr32(bmp, irq); + kvm_gic_line_level_access(s, irq, 0, ®, true); + } +} + +/* Read a bitmap register group from the kernel VGIC. */ +static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + kvm_gicd_access(s, offset, ®, false); + *gic_bmp_ptr32(bmp, irq) = reg; + offset += 4; + } +} + +static void kvm_dist_putbmp(GICv3State *s, uint32_t offset, + uint32_t clroffset, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + /* If this bitmap is a set/clear register pair, first write to the + * clear-reg to clear all bits before using the set-reg to write + * the 1 bits. + */ + if (clroffset != 0) { + reg = 0; + kvm_gicd_access(s, clroffset, ®, true); + } + reg = *gic_bmp_ptr32(bmp, irq); + kvm_gicd_access(s, offset, ®, true); + offset += 4; + } +} + +static void kvm_arm_gicv3_check(GICv3State *s) +{ + uint32_t reg; + uint32_t num_irq; + + /* Sanity checking s->num_irq */ + kvm_gicd_access(s, GICD_TYPER, ®, false); + num_irq = ((reg & 0x1f) + 1) * 32; + + if (num_irq < s->num_irq) { + error_report("Model requests %u IRQs, but kernel supports max %u", + s->num_irq, num_irq); + abort(); + } +} + static void kvm_arm_gicv3_put(GICv3State *s) { - /* TODO */ - DPRINTF("Cannot put kernel gic state, no kernel interface\n"); + uint32_t regl, regh, reg; + uint64_t reg64, redist_typer; + int ncpu, i; + + kvm_arm_gicv3_check(s); + + kvm_gicr_access(s, GICR_TYPER, 0, ®l, false); + kvm_gicr_access(s, GICR_TYPER + 4, 0, ®h, false); + redist_typer = ((uint64_t)regh << 32) | regl; + + reg = s->gicd_ctlr; + kvm_gicd_access(s, GICD_CTLR, ®, true); + + if (redist_typer & GICR_TYPER_PLPIS) { + /* Set base addresses before LPIs are enabled by GICR_CTLR write */ + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + reg64 = c->gicr_propbaser; + regl = (uint32_t)reg64; + kvm_gicr_access(s, GICR_PROPBASER, ncpu, ®l, true); + regh = (uint32_t)(reg64 >> 32); + kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, ®h, true); + + reg64 = c->gicr_pendbaser; + if (!c->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) { + /* Setting PTZ is advised if LPIs are disabled, to reduce + * GIC initialization time. + */ + reg64 |= GICR_PENDBASER_PTZ; + } + regl = (uint32_t)reg64; + kvm_gicr_access(s, GICR_PENDBASER, ncpu, ®l, true); + regh = (uint32_t)(reg64 >> 32); + kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, ®h, true); + } + } + + /* Redistributor state (one per CPU) */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + reg = c->gicr_ctlr; + kvm_gicr_access(s, GICR_CTLR, ncpu, ®, true); + + reg = c->gicr_statusr[GICV3_NS]; + kvm_gicr_access(s, GICR_STATUSR, ncpu, ®, true); + + reg = c->gicr_waker; + kvm_gicr_access(s, GICR_WAKER, ncpu, ®, true); + + reg = c->gicr_igroupr0; + kvm_gicr_access(s, GICR_IGROUPR0, ncpu, ®, true); + + reg = ~0; + kvm_gicr_access(s, GICR_ICENABLER0, ncpu, ®, true); + reg = c->gicr_ienabler0; + kvm_gicr_access(s, GICR_ISENABLER0, ncpu, ®, true); + + /* Restore config before pending so we treat level/edge correctly */ + reg = half_shuffle32(c->edge_trigger >> 16) << 1; + kvm_gicr_access(s, GICR_ICFGR1, ncpu, ®, true); + + reg = c->level; + kvm_gic_line_level_access(s, 0, ncpu, ®, true); + + reg = ~0; + kvm_gicr_access(s, GICR_ICPENDR0, ncpu, ®, true); + reg = c->gicr_ipendr0; + kvm_gicr_access(s, GICR_ISPENDR0, ncpu, ®, true); + + reg = ~0; + kvm_gicr_access(s, GICR_ICACTIVER0, ncpu, ®, true); + reg = c->gicr_iactiver0; + kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, ®, true); + + for (i = 0; i < GIC_INTERNAL; i += 4) { + reg = c->gicr_ipriorityr[i] | + (c->gicr_ipriorityr[i + 1] << 8) | + (c->gicr_ipriorityr[i + 2] << 16) | + (c->gicr_ipriorityr[i + 3] << 24); + kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, ®, true); + } + } + + /* Distributor state (shared between all CPUs */ + reg = s->gicd_statusr[GICV3_NS]; + kvm_gicd_access(s, GICD_STATUSR, ®, true); + + /* s->enable bitmap -> GICD_ISENABLERn */ + kvm_dist_putbmp(s, GICD_ISENABLER, GICD_ICENABLER, s->enabled); + + /* s->group bitmap -> GICD_IGROUPRn */ + kvm_dist_putbmp(s, GICD_IGROUPR, 0, s->group); + + /* Restore targets before pending to ensure the pending state is set on + * the appropriate CPU interfaces in the kernel + */ + + /* s->gicd_irouter[irq] -> GICD_IROUTERn + * We can't use kvm_dist_put() here because the registers are 64-bit + */ + for (i = GIC_INTERNAL; i < s->num_irq; i++) { + uint32_t offset; + + offset = GICD_IROUTER + (sizeof(uint32_t) * i); + reg = (uint32_t)s->gicd_irouter[i]; + kvm_gicd_access(s, offset, ®, true); + + offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4; + reg = (uint32_t)(s->gicd_irouter[i] >> 32); + kvm_gicd_access(s, offset, ®, true); + } + + /* s->trigger bitmap -> GICD_ICFGRn + * (restore configuration registers before pending IRQs so we treat + * level/edge correctly) + */ + kvm_dist_put_edge_trigger(s, GICD_ICFGR, s->edge_trigger); + + /* s->level bitmap -> line_level */ + kvm_gic_put_line_level_bmp(s, s->level); + + /* s->pending bitmap -> GICD_ISPENDRn */ + kvm_dist_putbmp(s, GICD_ISPENDR, GICD_ICPENDR, s->pending); + + /* s->active bitmap -> GICD_ISACTIVERn */ + kvm_dist_putbmp(s, GICD_ISACTIVER, GICD_ICACTIVER, s->active); + + /* s->gicd_ipriority[] -> GICD_IPRIORITYRn */ + kvm_dist_put_priority(s, GICD_IPRIORITYR, s->gicd_ipriority); + + /* CPU Interface state (one per CPU) */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], true); + kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu, + &c->icc_igrpen[GICV3_G0], true); + kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu, + &c->icc_igrpen[GICV3_G1NS], true); + kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, true); + kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], true); + kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], true); + + num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] & + ICC_CTLR_EL1_PRIBITS_MASK) >> + ICC_CTLR_EL1_PRIBITS_SHIFT) + 1; + + switch (num_pri_bits) { + case 7: + reg64 = c->icc_apr[GICV3_G0][3]; + kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, ®64, true); + reg64 = c->icc_apr[GICV3_G0][2]; + kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, ®64, true); + case 6: + reg64 = c->icc_apr[GICV3_G0][1]; + kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, ®64, true); + default: + reg64 = c->icc_apr[GICV3_G0][0]; + kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, ®64, true); + } + + switch (num_pri_bits) { + case 7: + reg64 = c->icc_apr[GICV3_G1NS][3]; + kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, ®64, true); + reg64 = c->icc_apr[GICV3_G1NS][2]; + kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, ®64, true); + case 6: + reg64 = c->icc_apr[GICV3_G1NS][1]; + kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, ®64, true); + default: + reg64 = c->icc_apr[GICV3_G1NS][0]; + kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, ®64, true); + } + } } static void kvm_arm_gicv3_get(GICv3State *s) { - /* TODO */ - DPRINTF("Cannot get kernel gic state, no kernel interface\n"); + uint32_t regl, regh, reg; + uint64_t reg64, redist_typer; + int ncpu, i; + + kvm_arm_gicv3_check(s); + + kvm_gicr_access(s, GICR_TYPER, 0, ®l, false); + kvm_gicr_access(s, GICR_TYPER + 4, 0, ®h, false); + redist_typer = ((uint64_t)regh << 32) | regl; + + kvm_gicd_access(s, GICD_CTLR, ®, false); + s->gicd_ctlr = reg; + + /* Redistributor state (one per CPU) */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + kvm_gicr_access(s, GICR_CTLR, ncpu, ®, false); + c->gicr_ctlr = reg; + + kvm_gicr_access(s, GICR_STATUSR, ncpu, ®, false); + c->gicr_statusr[GICV3_NS] = reg; + + kvm_gicr_access(s, GICR_WAKER, ncpu, ®, false); + c->gicr_waker = reg; + + kvm_gicr_access(s, GICR_IGROUPR0, ncpu, ®, false); + c->gicr_igroupr0 = reg; + kvm_gicr_access(s, GICR_ISENABLER0, ncpu, ®, false); + c->gicr_ienabler0 = reg; + kvm_gicr_access(s, GICR_ICFGR1, ncpu, ®, false); + c->edge_trigger = half_unshuffle32(reg >> 1) << 16; + kvm_gic_line_level_access(s, 0, ncpu, ®, false); + c->level = reg; + kvm_gicr_access(s, GICR_ISPENDR0, ncpu, ®, false); + c->gicr_ipendr0 = reg; + kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, ®, false); + c->gicr_iactiver0 = reg; + + for (i = 0; i < GIC_INTERNAL; i += 4) { + kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, ®, false); + c->gicr_ipriorityr[i] = extract32(reg, 0, 8); + c->gicr_ipriorityr[i + 1] = extract32(reg, 8, 8); + c->gicr_ipriorityr[i + 2] = extract32(reg, 16, 8); + c->gicr_ipriorityr[i + 3] = extract32(reg, 24, 8); + } + } + + if (redist_typer & GICR_TYPER_PLPIS) { + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + kvm_gicr_access(s, GICR_PROPBASER, ncpu, ®l, false); + kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, ®h, false); + c->gicr_propbaser = ((uint64_t)regh << 32) | regl; + + kvm_gicr_access(s, GICR_PENDBASER, ncpu, ®l, false); + kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, ®h, false); + c->gicr_pendbaser = ((uint64_t)regh << 32) | regl; + } + } + + /* Distributor state (shared between all CPUs */ + + kvm_gicd_access(s, GICD_STATUSR, ®, false); + s->gicd_statusr[GICV3_NS] = reg; + + /* GICD_IGROUPRn -> s->group bitmap */ + kvm_dist_getbmp(s, GICD_IGROUPR, s->group); + + /* GICD_ISENABLERn -> s->enabled bitmap */ + kvm_dist_getbmp(s, GICD_ISENABLER, s->enabled); + + /* Line level of irq */ + kvm_gic_get_line_level_bmp(s, s->level); + /* GICD_ISPENDRn -> s->pending bitmap */ + kvm_dist_getbmp(s, GICD_ISPENDR, s->pending); + + /* GICD_ISACTIVERn -> s->active bitmap */ + kvm_dist_getbmp(s, GICD_ISACTIVER, s->active); + + /* GICD_ICFGRn -> s->trigger bitmap */ + kvm_dist_get_edge_trigger(s, GICD_ICFGR, s->edge_trigger); + + /* GICD_IPRIORITYRn -> s->gicd_ipriority[] */ + kvm_dist_get_priority(s, GICD_IPRIORITYR, s->gicd_ipriority); + + /* GICD_IROUTERn -> s->gicd_irouter[irq] */ + for (i = GIC_INTERNAL; i < s->num_irq; i++) { + uint32_t offset; + + offset = GICD_IROUTER + (sizeof(uint32_t) * i); + kvm_gicd_access(s, offset, ®l, false); + offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4; + kvm_gicd_access(s, offset, ®h, false); + s->gicd_irouter[i] = ((uint64_t)regh << 32) | regl; + } + + /***************************************************************** + * CPU Interface(s) State + */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], false); + kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu, + &c->icc_igrpen[GICV3_G0], false); + kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu, + &c->icc_igrpen[GICV3_G1NS], false); + kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, false); + kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], false); + kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], false); + num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] & + ICC_CTLR_EL1_PRIBITS_MASK) >> + ICC_CTLR_EL1_PRIBITS_SHIFT) + 1; + + switch (num_pri_bits) { + case 7: + kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, ®64, false); + c->icc_apr[GICV3_G0][3] = reg64; + kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, ®64, false); + c->icc_apr[GICV3_G0][2] = reg64; + case 6: + kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, ®64, false); + c->icc_apr[GICV3_G0][1] = reg64; + default: + kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, ®64, false); + c->icc_apr[GICV3_G0][0] = reg64; + } + + switch (num_pri_bits) { + case 7: + kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][3] = reg64; + kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][2] = reg64; + case 6: + kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][1] = reg64; + default: + kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][0] = reg64; + } + } +} + +static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) +{ + ARMCPU *cpu; + GICv3State *s; + GICv3CPUState *c; + + c = (GICv3CPUState *)env->gicv3state; + s = c->gic; + cpu = ARM_CPU(c->cpu); + + /* Initialize to actual HW supported configuration */ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity), + &c->icc_ctlr_el1[GICV3_NS], false); + + c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; + c->icc_pmr_el1 = 0; + c->icc_bpr[GICV3_G0] = GIC_MIN_BPR; + c->icc_bpr[GICV3_G1] = GIC_MIN_BPR; + c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR; + + c->icc_sre_el1 = 0x7; + memset(c->icc_apr, 0, sizeof(c->icc_apr)); + memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen)); } static void kvm_arm_gicv3_reset(DeviceState *dev) @@ -77,9 +638,43 @@ static void kvm_arm_gicv3_reset(DeviceState *dev) DPRINTF("Reset\n"); kgc->parent_reset(dev); + + if (s->migration_blocker) { + DPRINTF("Cannot put kernel gic state, no kernel interface\n"); + return; + } + kvm_arm_gicv3_put(s); } +/* + * CPU interface registers of GIC needs to be reset on CPU reset. + * For the calling arm_gicv3_icc_reset() on CPU reset, we register + * below ARMCPRegInfo. As we reset the whole cpu interface under single + * register reset, we define only one register of CPU interface instead + * of defining all the registers. + */ +static const ARMCPRegInfo gicv3_cpuif_reginfo[] = { + { .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4, + /* + * If ARM_CP_NOP is used, resetfn is not called, + * So ARM_CP_NO_RAW is appropriate type. + */ + .type = ARM_CP_NO_RAW, + .access = PL1_RW, + .readfn = arm_cp_read_zero, + .writefn = arm_cp_write_ignore, + /* + * We hang the whole cpu interface reset routine off here + * rather than parcelling it out into one little function + * per register + */ + .resetfn = arm_gicv3_icc_reset, + }, + REGINFO_SENTINEL +}; + static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) { GICv3State *s = KVM_ARM_GICV3(dev); @@ -103,16 +698,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL); - /* Block migration of a KVM GICv3 device: the API for saving and restoring - * the state in the kernel is not yet finalised in the kernel or - * implemented in QEMU. - */ - error_setg(&s->migration_blocker, "vGICv3 migration is not implemented"); - migrate_add_blocker(s->migration_blocker, &local_err); - if (local_err) { - error_propagate(errp, local_err); - error_free(s->migration_blocker); - return; + for (i = 0; i < s->num_cpu; i++) { + ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); + + define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); } /* Try to create the device via the device control API */ @@ -145,6 +734,18 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) kvm_irqchip_commit_routes(kvm_state); } + + if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_CTLR)) { + error_setg(&s->migration_blocker, "This operating system kernel does " + "not support vGICv3 migration"); + migrate_add_blocker(s->migration_blocker, &local_err); + if (local_err) { + error_propagate(errp, local_err); + error_free(s->migration_blocker); + return; + } + } } static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 76097b4..32ffa0b 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -17,8 +17,8 @@ #include "hw/sysbus.h" #include "qemu/timer.h" #include "hw/arm/arm.h" +#include "hw/arm/armv7m_nvic.h" #include "target/arm/cpu.h" -#include "exec/address-spaces.h" #include "qemu/log.h" #include "trace.h" @@ -47,7 +47,6 @@ * "exception" more or less interchangeably. */ #define NVIC_FIRST_IRQ 16 -#define NVIC_MAX_VECTORS 512 #define NVIC_MAX_IRQ (NVIC_MAX_VECTORS - NVIC_FIRST_IRQ) /* Effective running priority of the CPU when no exception is active @@ -55,116 +54,10 @@ */ #define NVIC_NOEXC_PRIO 0x100 -typedef struct VecInfo { - /* Exception priorities can range from -3 to 255; only the unmodifiable - * priority values for RESET, NMI and HardFault can be negative. - */ - int16_t prio; - uint8_t enabled; - uint8_t pending; - uint8_t active; - uint8_t level; /* exceptions <=15 never set level */ -} VecInfo; - -typedef struct NVICState { - /*< private >*/ - SysBusDevice parent_obj; - /*< public >*/ - - ARMCPU *cpu; - - VecInfo vectors[NVIC_MAX_VECTORS]; - uint32_t prigroup; - - /* vectpending and exception_prio are both cached state that can - * be recalculated from the vectors[] array and the prigroup field. - */ - unsigned int vectpending; /* highest prio pending enabled exception */ - int exception_prio; /* group prio of the highest prio active exception */ - - struct { - uint32_t control; - uint32_t reload; - int64_t tick; - QEMUTimer *timer; - } systick; - - MemoryRegion sysregmem; - MemoryRegion container; - - uint32_t num_irq; - qemu_irq excpout; - qemu_irq sysresetreq; -} NVICState; - -#define TYPE_NVIC "armv7m_nvic" - -#define NVIC(obj) \ - OBJECT_CHECK(NVICState, (obj), TYPE_NVIC) - static const uint8_t nvic_id[] = { 0x00, 0xb0, 0x1b, 0x00, 0x0d, 0xe0, 0x05, 0xb1 }; -/* qemu timers run at 1GHz. We want something closer to 1MHz. */ -#define SYSTICK_SCALE 1000ULL - -#define SYSTICK_ENABLE (1 << 0) -#define SYSTICK_TICKINT (1 << 1) -#define SYSTICK_CLKSOURCE (1 << 2) -#define SYSTICK_COUNTFLAG (1 << 16) - -int system_clock_scale; - -/* Conversion factor from qemu timer to SysTick frequencies. */ -static inline int64_t systick_scale(NVICState *s) -{ - if (s->systick.control & SYSTICK_CLKSOURCE) - return system_clock_scale; - else - return 1000; -} - -static void systick_reload(NVICState *s, int reset) -{ - /* The Cortex-M3 Devices Generic User Guide says that "When the - * ENABLE bit is set to 1, the counter loads the RELOAD value from the - * SYST RVR register and then counts down". So, we need to check the - * ENABLE bit before reloading the value. - */ - if ((s->systick.control & SYSTICK_ENABLE) == 0) { - return; - } - - if (reset) - s->systick.tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - s->systick.tick += (s->systick.reload + 1) * systick_scale(s); - timer_mod(s->systick.timer, s->systick.tick); -} - -static void systick_timer_tick(void * opaque) -{ - NVICState *s = (NVICState *)opaque; - s->systick.control |= SYSTICK_COUNTFLAG; - if (s->systick.control & SYSTICK_TICKINT) { - /* Trigger the interrupt. */ - armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK); - } - if (s->systick.reload == 0) { - s->systick.control &= ~SYSTICK_ENABLE; - } else { - systick_reload(s, 0); - } -} - -static void systick_reset(NVICState *s) -{ - s->systick.control = 0; - s->systick.reload = 0; - s->systick.tick = 0; - timer_del(s->systick.timer); -} - static int nvic_pending_prio(NVICState *s) { /* return the priority of the current pending interrupt, @@ -510,30 +403,6 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset) switch (offset) { case 4: /* Interrupt Control Type. */ return ((s->num_irq - NVIC_FIRST_IRQ) / 32) - 1; - case 0x10: /* SysTick Control and Status. */ - val = s->systick.control; - s->systick.control &= ~SYSTICK_COUNTFLAG; - return val; - case 0x14: /* SysTick Reload Value. */ - return s->systick.reload; - case 0x18: /* SysTick Current Value. */ - { - int64_t t; - if ((s->systick.control & SYSTICK_ENABLE) == 0) - return 0; - t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (t >= s->systick.tick) - return 0; - val = ((s->systick.tick - (t + 1)) / systick_scale(s)) + 1; - /* The interrupt in triggered when the timer reaches zero. - However the counter is not reloaded until the next clock - tick. This is a hack to return zero during the first tick. */ - if (val > s->systick.reload) - val = 0; - return val; - } - case 0x1c: /* SysTick Calibration Value. */ - return 10000; case 0xd00: /* CPUID Base. */ return cpu->midr; case 0xd04: /* Interrupt Control State. */ @@ -668,40 +537,8 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset) static void nvic_writel(NVICState *s, uint32_t offset, uint32_t value) { ARMCPU *cpu = s->cpu; - uint32_t oldval; + switch (offset) { - case 0x10: /* SysTick Control and Status. */ - oldval = s->systick.control; - s->systick.control &= 0xfffffff8; - s->systick.control |= value & 7; - if ((oldval ^ value) & SYSTICK_ENABLE) { - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (value & SYSTICK_ENABLE) { - if (s->systick.tick) { - s->systick.tick += now; - timer_mod(s->systick.timer, s->systick.tick); - } else { - systick_reload(s, 1); - } - } else { - timer_del(s->systick.timer); - s->systick.tick -= now; - if (s->systick.tick < 0) - s->systick.tick = 0; - } - } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) { - /* This is a hack. Force the timer to be reloaded - when the reference clock is changed. */ - systick_reload(s, 1); - } - break; - case 0x14: /* SysTick Reload Value. */ - s->systick.reload = value; - break; - case 0x18: /* SysTick Current Value. Writes reload the timer. */ - systick_reload(s, 1); - s->systick.control &= ~SYSTICK_COUNTFLAG; - break; case 0xd04: /* Interrupt Control State. */ if (value & (1 << 31)) { armv7m_nvic_set_pending(s, ARMV7M_EXCP_NMI); @@ -1000,16 +837,12 @@ static const VMStateDescription vmstate_VecInfo = { static const VMStateDescription vmstate_nvic = { .name = "armv7m_nvic", - .version_id = 3, - .minimum_version_id = 3, + .version_id = 4, + .minimum_version_id = 4, .post_load = &nvic_post_load, .fields = (VMStateField[]) { VMSTATE_STRUCT_ARRAY(vectors, NVICState, NVIC_MAX_VECTORS, 1, vmstate_VecInfo, VecInfo), - VMSTATE_UINT32(systick.control, NVICState), - VMSTATE_UINT32(systick.reload, NVICState), - VMSTATE_INT64(systick.tick, NVICState), - VMSTATE_TIMER_PTR(systick.timer, NVICState), VMSTATE_UINT32(prigroup, NVICState), VMSTATE_END_OF_LIST() } @@ -1047,13 +880,26 @@ static void armv7m_nvic_reset(DeviceState *dev) s->exception_prio = NVIC_NOEXC_PRIO; s->vectpending = 0; +} - systick_reset(s); +static void nvic_systick_trigger(void *opaque, int n, int level) +{ + NVICState *s = opaque; + + if (level) { + /* SysTick just asked us to pend its exception. + * (This is different from an external interrupt line's + * behaviour.) + */ + armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK); + } } static void armv7m_nvic_realize(DeviceState *dev, Error **errp) { NVICState *s = NVIC(dev); + SysBusDevice *systick_sbd; + Error *err = NULL; s->cpu = ARM_CPU(qemu_get_cpu(0)); assert(s->cpu); @@ -1068,10 +914,19 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp) /* include space for internal exception vectors */ s->num_irq += NVIC_FIRST_IRQ; + object_property_set_bool(OBJECT(&s->systick), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + systick_sbd = SYS_BUS_DEVICE(&s->systick); + sysbus_connect_irq(systick_sbd, 0, + qdev_get_gpio_in_named(dev, "systick-trigger", 0)); + /* The NVIC and System Control Space (SCS) starts at 0xe000e000 * and looks like this: * 0x004 - ICTR - * 0x010 - 0x1c - systick + * 0x010 - 0xff - systick * 0x100..0x7ec - NVIC * 0x7f0..0xcff - Reserved * 0xd00..0xd3c - SCS registers @@ -1089,12 +944,11 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp) memory_region_init_io(&s->sysregmem, OBJECT(s), &nvic_sysreg_ops, s, "nvic_sysregs", 0x1000); memory_region_add_subregion(&s->container, 0, &s->sysregmem); + memory_region_add_subregion_overlap(&s->container, 0x10, + sysbus_mmio_get_region(systick_sbd, 0), + 1); - /* Map the whole thing into system memory at the location required - * by the v7M architecture. - */ - memory_region_add_subregion(get_system_memory(), 0xe000e000, &s->container); - s->systick.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container); } static void armv7m_nvic_instance_init(Object *obj) @@ -1109,8 +963,12 @@ static void armv7m_nvic_instance_init(Object *obj) NVICState *nvic = NVIC(obj); SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + object_initialize(&nvic->systick, sizeof(nvic->systick), TYPE_SYSTICK); + qdev_set_parent_bus(DEVICE(&nvic->systick), sysbus_get_default()); + sysbus_init_irq(sbd, &nvic->excpout); qdev_init_gpio_out_named(dev, &nvic->sysresetreq, "SYSRESETREQ", 1); + qdev_init_gpio_in_named(dev, nvic_systick_trigger, "systick-trigger", 1); } static void armv7m_nvic_class_init(ObjectClass *klass, void *data) diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h index aeb801d..05303a5 100644 --- a/hw/intc/gicv3_internal.h +++ b/hw/intc/gicv3_internal.h @@ -138,6 +138,7 @@ #define ICC_CTLR_EL1_EOIMODE (1U << 1) #define ICC_CTLR_EL1_PMHE (1U << 6) #define ICC_CTLR_EL1_PRIBITS_SHIFT 8 +#define ICC_CTLR_EL1_PRIBITS_MASK (7U << ICC_CTLR_EL1_PRIBITS_SHIFT) #define ICC_CTLR_EL1_IDBITS_SHIFT 11 #define ICC_CTLR_EL1_SEIS (1U << 14) #define ICC_CTLR_EL1_A3V (1U << 15) @@ -407,4 +408,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s) } } +void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s); + #endif /* QEMU_ARM_GICV3_INTERNAL_H */ diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 095c16a..ffc0747 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -49,40 +49,41 @@ int xics_get_cpu_index_by_dt_id(int cpu_dt_id) return -1; } -void xics_cpu_destroy(XICSState *xics, PowerPCCPU *cpu) +void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); - ICPState *ss = &xics->ss[cs->cpu_index]; + ICPState *icp = xics_icp_get(xi, cs->cpu_index); - assert(cs->cpu_index < xics->nr_servers); - assert(cs == ss->cs); + assert(icp); + assert(cs == icp->cs); - ss->output = NULL; - ss->cs = NULL; + icp->output = NULL; + icp->cs = NULL; } -void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu) +void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; - ICPState *ss = &xics->ss[cs->cpu_index]; - XICSStateClass *info = XICS_COMMON_GET_CLASS(xics); + ICPState *icp = xics_icp_get(xi, cs->cpu_index); + ICPStateClass *icpc; - assert(cs->cpu_index < xics->nr_servers); + assert(icp); - ss->cs = cs; + icp->cs = cs; - if (info->cpu_setup) { - info->cpu_setup(xics, cpu); + icpc = ICP_GET_CLASS(icp); + if (icpc->cpu_setup) { + icpc->cpu_setup(icp, cpu); } switch (PPC_INPUT(env)) { case PPC_FLAGS_INPUT_POWER7: - ss->output = env->irq_inputs[POWER7_INPUT_INT]; + icp->output = env->irq_inputs[POWER7_INPUT_INT]; break; case PPC_FLAGS_INPUT_970: - ss->output = env->irq_inputs[PPC970_INPUT_INT]; + icp->output = env->irq_inputs[PPC970_INPUT_INT]; break; default: @@ -92,185 +93,43 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu) } } -static void xics_common_pic_print_info(InterruptStatsProvider *obj, - Monitor *mon) +void icp_pic_print_info(ICPState *icp, Monitor *mon) { - XICSState *xics = XICS_COMMON(obj); - ICSState *ics; - uint32_t i; - - for (i = 0; i < xics->nr_servers; i++) { - ICPState *icp = &xics->ss[i]; - - if (!icp->output) { - continue; - } - monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n", - i, icp->xirr, icp->xirr_owner, - icp->pending_priority, icp->mfrr); - } - - QLIST_FOREACH(ics, &xics->ics, list) { - monitor_printf(mon, "ICS %4x..%4x %p\n", - ics->offset, ics->offset + ics->nr_irqs - 1, ics); - - if (!ics->irqs) { - continue; - } - - for (i = 0; i < ics->nr_irqs; i++) { - ICSIRQState *irq = ics->irqs + i; - - if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) { - continue; - } - monitor_printf(mon, " %4x %s %02x %02x\n", - ics->offset + i, - (irq->flags & XICS_FLAGS_IRQ_LSI) ? - "LSI" : "MSI", - irq->priority, irq->status); - } - } -} - -/* - * XICS Common class - parent for emulated XICS and KVM-XICS - */ -static void xics_common_reset(DeviceState *d) -{ - XICSState *xics = XICS_COMMON(d); - ICSState *ics; - int i; - - for (i = 0; i < xics->nr_servers; i++) { - device_reset(DEVICE(&xics->ss[i])); - } - - QLIST_FOREACH(ics, &xics->ics, list) { - device_reset(DEVICE(ics)); - } -} - -static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - XICSState *xics = XICS_COMMON(obj); - int64_t value = xics->nr_irqs; + int cpu_index = icp->cs ? icp->cs->cpu_index : -1; - visit_type_int(v, name, &value, errp); -} - -static void xics_prop_set_nr_irqs(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - XICSState *xics = XICS_COMMON(obj); - XICSStateClass *info = XICS_COMMON_GET_CLASS(xics); - Error *error = NULL; - int64_t value; - - visit_type_int(v, name, &value, &error); - if (error) { - error_propagate(errp, error); + if (!icp->output) { return; } - if (xics->nr_irqs) { - error_setg(errp, "Number of interrupts is already set to %u", - xics->nr_irqs); - return; - } - - assert(info->set_nr_irqs); - info->set_nr_irqs(xics, value, errp); -} - -void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers, - const char *typename, Error **errp) -{ - int i; - - xics->nr_servers = nr_servers; - - xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState)); - for (i = 0; i < xics->nr_servers; i++) { - char name[32]; - ICPState *icp = &xics->ss[i]; - - object_initialize(icp, sizeof(*icp), typename); - snprintf(name, sizeof(name), "icp[%d]", i); - object_property_add_child(OBJECT(xics), name, OBJECT(icp), errp); - icp->xics = xics; - } + monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n", + cpu_index, icp->xirr, icp->xirr_owner, + icp->pending_priority, icp->mfrr); } -static void xics_prop_get_nr_servers(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) +void ics_pic_print_info(ICSState *ics, Monitor *mon) { - XICSState *xics = XICS_COMMON(obj); - int64_t value = xics->nr_servers; - - visit_type_int(v, name, &value, errp); -} + uint32_t i; -static void xics_prop_set_nr_servers(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - XICSState *xics = XICS_COMMON(obj); - XICSStateClass *xsc = XICS_COMMON_GET_CLASS(xics); - Error *error = NULL; - int64_t value; + monitor_printf(mon, "ICS %4x..%4x %p\n", + ics->offset, ics->offset + ics->nr_irqs - 1, ics); - visit_type_int(v, name, &value, &error); - if (error) { - error_propagate(errp, error); + if (!ics->irqs) { return; } - if (xics->nr_servers) { - error_setg(errp, "Number of servers is already set to %u", - xics->nr_servers); - return; - } - - assert(xsc->set_nr_servers); - xsc->set_nr_servers(xics, value, errp); -} - -static void xics_common_initfn(Object *obj) -{ - XICSState *xics = XICS_COMMON(obj); - QLIST_INIT(&xics->ics); - object_property_add(obj, "nr_irqs", "int", - xics_prop_get_nr_irqs, xics_prop_set_nr_irqs, - NULL, NULL, NULL); - object_property_add(obj, "nr_servers", "int", - xics_prop_get_nr_servers, xics_prop_set_nr_servers, - NULL, NULL, NULL); -} - -static void xics_common_class_init(ObjectClass *oc, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(oc); - InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(oc); + for (i = 0; i < ics->nr_irqs; i++) { + ICSIRQState *irq = ics->irqs + i; - dc->reset = xics_common_reset; - ic->print_info = xics_common_pic_print_info; + if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) { + continue; + } + monitor_printf(mon, " %4x %s %02x %02x\n", + ics->offset + i, + (irq->flags & XICS_FLAGS_IRQ_LSI) ? + "LSI" : "MSI", + irq->priority, irq->status); + } } -static const TypeInfo xics_common_info = { - .name = TYPE_XICS_COMMON, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(XICSState), - .class_size = sizeof(XICSStateClass), - .instance_init = xics_common_initfn, - .class_init = xics_common_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_INTERRUPT_STATS_PROVIDER }, - { } - }, -}; - /* * ICP: Presentation layer */ @@ -278,8 +137,8 @@ static const TypeInfo xics_common_info = { #define XISR_MASK 0x00ffffff #define CPPR_MASK 0xff000000 -#define XISR(ss) (((ss)->xirr) & XISR_MASK) -#define CPPR(ss) (((ss)->xirr) >> 24) +#define XISR(icp) (((icp)->xirr) & XISR_MASK) +#define CPPR(icp) (((icp)->xirr) >> 24) static void ics_reject(ICSState *ics, uint32_t nr) { @@ -290,7 +149,7 @@ static void ics_reject(ICSState *ics, uint32_t nr) } } -static void ics_resend(ICSState *ics) +void ics_resend(ICSState *ics) { ICSStateClass *k = ICS_BASE_GET_CLASS(ics); @@ -308,151 +167,152 @@ static void ics_eoi(ICSState *ics, int nr) } } -static void icp_check_ipi(ICPState *ss) +static void icp_check_ipi(ICPState *icp) { - if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) { + if (XISR(icp) && (icp->pending_priority <= icp->mfrr)) { return; } - trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr); + trace_xics_icp_check_ipi(icp->cs->cpu_index, icp->mfrr); - if (XISR(ss) && ss->xirr_owner) { - ics_reject(ss->xirr_owner, XISR(ss)); + if (XISR(icp) && icp->xirr_owner) { + ics_reject(icp->xirr_owner, XISR(icp)); } - ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI; - ss->pending_priority = ss->mfrr; - ss->xirr_owner = NULL; - qemu_irq_raise(ss->output); + icp->xirr = (icp->xirr & ~XISR_MASK) | XICS_IPI; + icp->pending_priority = icp->mfrr; + icp->xirr_owner = NULL; + qemu_irq_raise(icp->output); } -static void icp_resend(ICPState *ss) +void icp_resend(ICPState *icp) { - ICSState *ics; + XICSFabric *xi = icp->xics; + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi); - if (ss->mfrr < CPPR(ss)) { - icp_check_ipi(ss); - } - QLIST_FOREACH(ics, &ss->xics->ics, list) { - ics_resend(ics); + if (icp->mfrr < CPPR(icp)) { + icp_check_ipi(icp); } + + xic->ics_resend(xi); } -void icp_set_cppr(ICPState *ss, uint8_t cppr) +void icp_set_cppr(ICPState *icp, uint8_t cppr) { uint8_t old_cppr; uint32_t old_xisr; - old_cppr = CPPR(ss); - ss->xirr = (ss->xirr & ~CPPR_MASK) | (cppr << 24); + old_cppr = CPPR(icp); + icp->xirr = (icp->xirr & ~CPPR_MASK) | (cppr << 24); if (cppr < old_cppr) { - if (XISR(ss) && (cppr <= ss->pending_priority)) { - old_xisr = XISR(ss); - ss->xirr &= ~XISR_MASK; /* Clear XISR */ - ss->pending_priority = 0xff; - qemu_irq_lower(ss->output); - if (ss->xirr_owner) { - ics_reject(ss->xirr_owner, old_xisr); - ss->xirr_owner = NULL; + if (XISR(icp) && (cppr <= icp->pending_priority)) { + old_xisr = XISR(icp); + icp->xirr &= ~XISR_MASK; /* Clear XISR */ + icp->pending_priority = 0xff; + qemu_irq_lower(icp->output); + if (icp->xirr_owner) { + ics_reject(icp->xirr_owner, old_xisr); + icp->xirr_owner = NULL; } } } else { - if (!XISR(ss)) { - icp_resend(ss); + if (!XISR(icp)) { + icp_resend(icp); } } } -void icp_set_mfrr(ICPState *ss, uint8_t mfrr) +void icp_set_mfrr(ICPState *icp, uint8_t mfrr) { - ss->mfrr = mfrr; - if (mfrr < CPPR(ss)) { - icp_check_ipi(ss); + icp->mfrr = mfrr; + if (mfrr < CPPR(icp)) { + icp_check_ipi(icp); } } -uint32_t icp_accept(ICPState *ss) +uint32_t icp_accept(ICPState *icp) { - uint32_t xirr = ss->xirr; + uint32_t xirr = icp->xirr; - qemu_irq_lower(ss->output); - ss->xirr = ss->pending_priority << 24; - ss->pending_priority = 0xff; - ss->xirr_owner = NULL; + qemu_irq_lower(icp->output); + icp->xirr = icp->pending_priority << 24; + icp->pending_priority = 0xff; + icp->xirr_owner = NULL; - trace_xics_icp_accept(xirr, ss->xirr); + trace_xics_icp_accept(xirr, icp->xirr); return xirr; } -uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr) +uint32_t icp_ipoll(ICPState *icp, uint32_t *mfrr) { if (mfrr) { - *mfrr = ss->mfrr; + *mfrr = icp->mfrr; } - return ss->xirr; + return icp->xirr; } -void icp_eoi(ICPState *ss, uint32_t xirr) +void icp_eoi(ICPState *icp, uint32_t xirr) { + XICSFabric *xi = icp->xics; + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi); ICSState *ics; uint32_t irq; /* Send EOI -> ICS */ - ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK); - trace_xics_icp_eoi(ss->cs->cpu_index, xirr, ss->xirr); + icp->xirr = (icp->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK); + trace_xics_icp_eoi(icp->cs->cpu_index, xirr, icp->xirr); irq = xirr & XISR_MASK; - QLIST_FOREACH(ics, &ss->xics->ics, list) { - if (ics_valid_irq(ics, irq)) { - ics_eoi(ics, irq); - } + + ics = xic->ics_get(xi, irq); + if (ics) { + ics_eoi(ics, irq); } - if (!XISR(ss)) { - icp_resend(ss); + if (!XISR(icp)) { + icp_resend(icp); } } static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority) { - XICSState *xics = ics->xics; - ICPState *ss = xics->ss + server; + ICPState *icp = xics_icp_get(ics->xics, server); trace_xics_icp_irq(server, nr, priority); - if ((priority >= CPPR(ss)) - || (XISR(ss) && (ss->pending_priority <= priority))) { + if ((priority >= CPPR(icp)) + || (XISR(icp) && (icp->pending_priority <= priority))) { ics_reject(ics, nr); } else { - if (XISR(ss) && ss->xirr_owner) { - ics_reject(ss->xirr_owner, XISR(ss)); - ss->xirr_owner = NULL; + if (XISR(icp) && icp->xirr_owner) { + ics_reject(icp->xirr_owner, XISR(icp)); + icp->xirr_owner = NULL; } - ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK); - ss->xirr_owner = ics; - ss->pending_priority = priority; - trace_xics_icp_raise(ss->xirr, ss->pending_priority); - qemu_irq_raise(ss->output); + icp->xirr = (icp->xirr & ~XISR_MASK) | (nr & XISR_MASK); + icp->xirr_owner = ics; + icp->pending_priority = priority; + trace_xics_icp_raise(icp->xirr, icp->pending_priority); + qemu_irq_raise(icp->output); } } static void icp_dispatch_pre_save(void *opaque) { - ICPState *ss = opaque; - ICPStateClass *info = ICP_GET_CLASS(ss); + ICPState *icp = opaque; + ICPStateClass *info = ICP_GET_CLASS(icp); if (info->pre_save) { - info->pre_save(ss); + info->pre_save(icp); } } static int icp_dispatch_post_load(void *opaque, int version_id) { - ICPState *ss = opaque; - ICPStateClass *info = ICP_GET_CLASS(ss); + ICPState *icp = opaque; + ICPStateClass *info = ICP_GET_CLASS(icp); if (info->post_load) { - return info->post_load(ss, version_id); + return info->post_load(icp, version_id); } return 0; @@ -485,12 +345,30 @@ static void icp_reset(DeviceState *dev) qemu_set_irq(icp->output, 0); } +static void icp_realize(DeviceState *dev, Error **errp) +{ + ICPState *icp = ICP(dev); + Object *obj; + Error *err = NULL; + + obj = object_property_get_link(OBJECT(dev), "xics", &err); + if (!obj) { + error_setg(errp, "%s: required link 'xics' not found: %s", + __func__, error_get_pretty(err)); + return; + } + + icp->xics = XICS_FABRIC(obj); +} + + static void icp_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->reset = icp_reset; dc->vmsd = &vmstate_icp_server; + dc->realize = icp_realize; } static const TypeInfo icp_info = { @@ -663,17 +541,6 @@ static void ics_simple_reset(DeviceState *dev) } } -static int ics_simple_post_load(ICSState *ics, int version_id) -{ - int i; - - for (i = 0; i < ics->xics->nr_servers; i++) { - icp_resend(&ics->xics->ss[i]); - } - - return 0; -} - static void ics_simple_dispatch_pre_save(void *opaque) { ICSState *ics = opaque; @@ -746,15 +613,20 @@ static void ics_simple_realize(DeviceState *dev, Error **errp) ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs); } +static Property ics_simple_properties[] = { + DEFINE_PROP_UINT32("nr-irqs", ICSState, nr_irqs, 0), + DEFINE_PROP_END_OF_LIST(), +}; + static void ics_simple_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); ICSStateClass *isc = ICS_BASE_CLASS(klass); - dc->realize = ics_simple_realize; + isc->realize = ics_simple_realize; + dc->props = ics_simple_properties; dc->vmsd = &vmstate_ics_simple; dc->reset = ics_simple_reset; - isc->post_load = ics_simple_post_load; isc->reject = ics_simple_reject; isc->resend = ics_simple_resend; isc->eoi = ics_simple_eoi; @@ -769,38 +641,69 @@ static const TypeInfo ics_simple_info = { .instance_init = ics_simple_initfn, }; +static void ics_base_realize(DeviceState *dev, Error **errp) +{ + ICSStateClass *icsc = ICS_BASE_GET_CLASS(dev); + ICSState *ics = ICS_BASE(dev); + Object *obj; + Error *err = NULL; + + obj = object_property_get_link(OBJECT(dev), "xics", &err); + if (!obj) { + error_setg(errp, "%s: required link 'xics' not found: %s", + __func__, error_get_pretty(err)); + return; + } + ics->xics = XICS_FABRIC(obj); + + + if (icsc->realize) { + icsc->realize(dev, errp); + } +} + +static void ics_base_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = ics_base_realize; +} + static const TypeInfo ics_base_info = { .name = TYPE_ICS_BASE, .parent = TYPE_DEVICE, .abstract = true, .instance_size = sizeof(ICSState), + .class_init = ics_base_class_init, .class_size = sizeof(ICSStateClass), }; +static const TypeInfo xics_fabric_info = { + .name = TYPE_XICS_FABRIC, + .parent = TYPE_INTERFACE, + .class_size = sizeof(XICSFabricClass), +}; + /* * Exported functions */ -ICSState *xics_find_source(XICSState *xics, int irq) +qemu_irq xics_get_qirq(XICSFabric *xi, int irq) { - ICSState *ics; + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi); + ICSState *ics = xic->ics_get(xi, irq); - QLIST_FOREACH(ics, &xics->ics, list) { - if (ics_valid_irq(ics, irq)) { - return ics; - } + if (ics) { + return ics->qirqs[irq - ics->offset]; } + return NULL; } -qemu_irq xics_get_qirq(XICSState *xics, int irq) +ICPState *xics_icp_get(XICSFabric *xi, int server) { - ICSState *ics = xics_find_source(xics, irq); + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi); - if (ics) { - return ics->qirqs[irq - ics->offset]; - } - - return NULL; + return xic->icp_get(xi, server); } void ics_set_irq_type(ICSState *ics, int srcno, bool lsi) @@ -813,10 +716,10 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi) static void xics_register_types(void) { - type_register_static(&xics_common_info); type_register_static(&ics_simple_info); type_register_static(&ics_base_info); type_register_static(&icp_info); + type_register_static(&xics_fabric_info); } type_init(xics_register_types) diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 17694ea..0a3daca 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -40,16 +40,12 @@ #include <sys/ioctl.h> -typedef struct KVMXICSState { - XICSState parent_obj; - - int kernel_xics_fd; -} KVMXICSState; +static int kernel_xics_fd = -1; /* * ICP-KVM */ -static void icp_get_kvm_state(ICPState *ss) +static void icp_get_kvm_state(ICPState *icp) { uint64_t state; struct kvm_one_reg reg = { @@ -59,25 +55,25 @@ static void icp_get_kvm_state(ICPState *ss) int ret; /* ICP for this CPU thread is not in use, exiting */ - if (!ss->cs) { + if (!icp->cs) { return; } - ret = kvm_vcpu_ioctl(ss->cs, KVM_GET_ONE_REG, ®); + ret = kvm_vcpu_ioctl(icp->cs, KVM_GET_ONE_REG, ®); if (ret != 0) { error_report("Unable to retrieve KVM interrupt controller state" - " for CPU %ld: %s", kvm_arch_vcpu_id(ss->cs), strerror(errno)); + " for CPU %ld: %s", kvm_arch_vcpu_id(icp->cs), strerror(errno)); exit(1); } - ss->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT; - ss->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT) + icp->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT; + icp->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT) & KVM_REG_PPC_ICP_MFRR_MASK; - ss->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT) + icp->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT) & KVM_REG_PPC_ICP_PPRI_MASK; } -static int icp_set_kvm_state(ICPState *ss, int version_id) +static int icp_set_kvm_state(ICPState *icp, int version_id) { uint64_t state; struct kvm_one_reg reg = { @@ -87,18 +83,18 @@ static int icp_set_kvm_state(ICPState *ss, int version_id) int ret; /* ICP for this CPU thread is not in use, exiting */ - if (!ss->cs) { + if (!icp->cs) { return 0; } - state = ((uint64_t)ss->xirr << KVM_REG_PPC_ICP_XISR_SHIFT) - | ((uint64_t)ss->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) - | ((uint64_t)ss->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT); + state = ((uint64_t)icp->xirr << KVM_REG_PPC_ICP_XISR_SHIFT) + | ((uint64_t)icp->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) + | ((uint64_t)icp->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT); - ret = kvm_vcpu_ioctl(ss->cs, KVM_SET_ONE_REG, ®); + ret = kvm_vcpu_ioctl(icp->cs, KVM_SET_ONE_REG, ®); if (ret != 0) { error_report("Unable to restore KVM interrupt controller state (0x%" - PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(ss->cs), + PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(icp->cs), strerror(errno)); return ret; } @@ -122,6 +118,34 @@ static void icp_kvm_reset(DeviceState *dev) icp_set_kvm_state(icp, 1); } +static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu) +{ + CPUState *cs = CPU(cpu); + int ret; + + if (kernel_xics_fd == -1) { + abort(); + } + + /* + * If we are reusing a parked vCPU fd corresponding to the CPU + * which was hot-removed earlier we don't have to renable + * KVM_CAP_IRQ_XICS capability again. + */ + if (icp->cap_irq_xics_enabled) { + return; + } + + ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, + kvm_arch_vcpu_id(cs)); + if (ret < 0) { + error_report("Unable to connect CPU%ld to kernel XICS: %s", + kvm_arch_vcpu_id(cs), strerror(errno)); + exit(1); + } + icp->cap_irq_xics_enabled = true; +} + static void icp_kvm_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -130,6 +154,7 @@ static void icp_kvm_class_init(ObjectClass *klass, void *data) dc->reset = icp_kvm_reset; icpc->pre_save = icp_get_kvm_state; icpc->post_load = icp_set_kvm_state; + icpc->cpu_setup = icp_kvm_cpu_setup; } static const TypeInfo icp_kvm_info = { @@ -145,7 +170,6 @@ static const TypeInfo icp_kvm_info = { */ static void ics_get_kvm_state(ICSState *ics) { - KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics); uint64_t state; struct kvm_device_attr attr = { .flags = 0, @@ -160,7 +184,7 @@ static void ics_get_kvm_state(ICSState *ics) attr.attr = i + ics->offset; - ret = ioctl(xicskvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr); + ret = ioctl(kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr); if (ret != 0) { error_report("Unable to retrieve KVM interrupt controller state" " for IRQ %d: %s", i + ics->offset, strerror(errno)); @@ -204,7 +228,6 @@ static void ics_get_kvm_state(ICSState *ics) static int ics_set_kvm_state(ICSState *ics, int version_id) { - KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics); uint64_t state; struct kvm_device_attr attr = { .flags = 0, @@ -238,7 +261,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id) } } - ret = ioctl(xicskvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr); + ret = ioctl(kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr); if (ret != 0) { error_report("Unable to restore KVM interrupt controller state" " for IRQs %d: %s", i + ics->offset, strerror(errno)); @@ -308,7 +331,7 @@ static void ics_kvm_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); ICSStateClass *icsc = ICS_BASE_CLASS(klass); - dc->realize = ics_kvm_realize; + icsc->realize = ics_kvm_realize; dc->reset = ics_kvm_reset; icsc->pre_save = ics_get_kvm_state; icsc->post_load = ics_set_kvm_state; @@ -324,57 +347,6 @@ static const TypeInfo ics_kvm_info = { /* * XICS-KVM */ -static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu) -{ - CPUState *cs; - ICPState *ss; - KVMXICSState *xicskvm = XICS_SPAPR_KVM(xics); - int ret; - - cs = CPU(cpu); - ss = &xics->ss[cs->cpu_index]; - - assert(cs->cpu_index < xics->nr_servers); - if (xicskvm->kernel_xics_fd == -1) { - abort(); - } - - /* - * If we are reusing a parked vCPU fd corresponding to the CPU - * which was hot-removed earlier we don't have to renable - * KVM_CAP_IRQ_XICS capability again. - */ - if (ss->cap_irq_xics_enabled) { - return; - } - - ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, xicskvm->kernel_xics_fd, - kvm_arch_vcpu_id(cs)); - if (ret < 0) { - error_report("Unable to connect CPU%ld to kernel XICS: %s", - kvm_arch_vcpu_id(cs), strerror(errno)); - exit(1); - } - ss->cap_irq_xics_enabled = true; -} - -static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, - Error **errp) -{ - ICSState *ics = QLIST_FIRST(&xics->ics); - - /* This needs to be deprecated ... */ - xics->nr_irqs = nr_irqs; - if (ics) { - ics->nr_irqs = nr_irqs; - } -} - -static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers, - Error **errp) -{ - xics_set_nr_servers(xics, nr_servers, TYPE_KVM_ICP, errp); -} static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, @@ -385,13 +357,9 @@ static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr, __func__); } -static void xics_kvm_realize(DeviceState *dev, Error **errp) +int xics_kvm_init(sPAPRMachineState *spapr, Error **errp) { - KVMXICSState *xicskvm = XICS_SPAPR_KVM(dev); - XICSState *xics = XICS_COMMON(dev); - ICSState *ics; - int i, rc; - Error *error = NULL; + int rc; struct kvm_create_device xics_create_device = { .type = KVM_DEV_TYPE_XICS, .flags = 0, @@ -439,72 +407,24 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp) goto fail; } - xicskvm->kernel_xics_fd = xics_create_device.fd; - - QLIST_FOREACH(ics, &xics->ics, list) { - object_property_set_bool(OBJECT(ics), true, "realized", &error); - if (error) { - error_propagate(errp, error); - goto fail; - } - } - - assert(xics->nr_servers); - for (i = 0; i < xics->nr_servers; i++) { - object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized", - &error); - if (error) { - error_propagate(errp, error); - goto fail; - } - } + kernel_xics_fd = xics_create_device.fd; kvm_kernel_irqchip = true; kvm_msi_via_irqfd_allowed = true; kvm_gsi_direct_mapping = true; - return; + return rc; fail: kvmppc_define_rtas_kernel_token(0, "ibm,set-xive"); kvmppc_define_rtas_kernel_token(0, "ibm,get-xive"); kvmppc_define_rtas_kernel_token(0, "ibm,int-on"); kvmppc_define_rtas_kernel_token(0, "ibm,int-off"); + return -1; } -static void xics_kvm_initfn(Object *obj) -{ - XICSState *xics = XICS_COMMON(obj); - ICSState *ics; - - ics = ICS_SIMPLE(object_new(TYPE_ICS_KVM)); - object_property_add_child(obj, "ics", OBJECT(ics), NULL); - ics->xics = xics; - QLIST_INSERT_HEAD(&xics->ics, ics, list); -} - -static void xics_kvm_class_init(ObjectClass *oc, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(oc); - XICSStateClass *xsc = XICS_COMMON_CLASS(oc); - - dc->realize = xics_kvm_realize; - xsc->cpu_setup = xics_kvm_cpu_setup; - xsc->set_nr_irqs = xics_kvm_set_nr_irqs; - xsc->set_nr_servers = xics_kvm_set_nr_servers; -} - -static const TypeInfo xics_spapr_kvm_info = { - .name = TYPE_XICS_SPAPR_KVM, - .parent = TYPE_XICS_COMMON, - .instance_size = sizeof(KVMXICSState), - .class_init = xics_kvm_class_init, - .instance_init = xics_kvm_initfn, -}; - static void xics_kvm_register_types(void) { - type_register_static(&xics_spapr_kvm_info); type_register_static(&ics_kvm_info); type_register_static(&icp_kvm_info); } diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index 2e3f1c5..84d24b2 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -44,7 +44,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); - ICPState *icp = &spapr->xics->ss[cs->cpu_index]; + ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index); target_ulong cppr = args[0]; icp_set_cppr(icp, cppr); @@ -56,12 +56,13 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr, { target_ulong server = xics_get_cpu_index_by_dt_id(args[0]); target_ulong mfrr = args[1]; + ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), server); - if (server >= spapr->xics->nr_servers) { + if (!icp) { return H_PARAMETER; } - icp_set_mfrr(spapr->xics->ss + server, mfrr); + icp_set_mfrr(icp, mfrr); return H_SUCCESS; } @@ -69,7 +70,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); - ICPState *icp = &spapr->xics->ss[cs->cpu_index]; + ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index); uint32_t xirr = icp_accept(icp); args[0] = xirr; @@ -80,7 +81,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); - ICPState *icp = &spapr->xics->ss[cs->cpu_index]; + ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index); uint32_t xirr = icp_accept(icp); args[0] = xirr; @@ -92,7 +93,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); - ICPState *icp = &spapr->xics->ss[cs->cpu_index]; + ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index); target_ulong xirr = args[0]; icp_eoi(icp, xirr); @@ -103,7 +104,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); - ICPState *icp = &spapr->xics->ss[cs->cpu_index]; + ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index); uint32_t mfrr; uint32_t xirr = icp_ipoll(icp, &mfrr); @@ -118,7 +119,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - ICSState *ics = QLIST_FIRST(&spapr->xics->ics); + ICSState *ics = spapr->ics; uint32_t nr, srcno, server, priority; if ((nargs != 3) || (nret != 1)) { @@ -134,7 +135,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, server = xics_get_cpu_index_by_dt_id(rtas_ld(args, 1)); priority = rtas_ld(args, 2); - if (!ics_valid_irq(ics, nr) || (server >= ics->xics->nr_servers) + if (!ics_valid_irq(ics, nr) || !xics_icp_get(XICS_FABRIC(spapr), server) || (priority > 0xff)) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; @@ -151,7 +152,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - ICSState *ics = QLIST_FIRST(&spapr->xics->ics); + ICSState *ics = spapr->ics; uint32_t nr, srcno; if ((nargs != 1) || (nret != 3)) { @@ -181,7 +182,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - ICSState *ics = QLIST_FIRST(&spapr->xics->ics); + ICSState *ics = spapr->ics; uint32_t nr, srcno; if ((nargs != 1) || (nret != 1)) { @@ -212,7 +213,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - ICSState *ics = QLIST_FIRST(&spapr->xics->ics); + ICSState *ics = spapr->ics; uint32_t nr, srcno; if ((nargs != 1) || (nret != 1)) { @@ -239,36 +240,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, - Error **errp) +int xics_spapr_init(sPAPRMachineState *spapr, Error **errp) { - ICSState *ics = QLIST_FIRST(&xics->ics); - - /* This needs to be deprecated ... */ - xics->nr_irqs = nr_irqs; - if (ics) { - ics->nr_irqs = nr_irqs; - } -} - -static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers, - Error **errp) -{ - xics_set_nr_servers(xics, nr_servers, TYPE_ICP, errp); -} - -static void xics_spapr_realize(DeviceState *dev, Error **errp) -{ - XICSState *xics = XICS_SPAPR(dev); - ICSState *ics; - Error *error = NULL; - int i; - - if (!xics->nr_servers) { - error_setg(errp, "Number of servers needs to be greater 0"); - return; - } - /* Registration of global state belongs into realize */ spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive); spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive); @@ -281,55 +254,9 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp) spapr_register_hypercall(H_XIRR_X, h_xirr_x); spapr_register_hypercall(H_EOI, h_eoi); spapr_register_hypercall(H_IPOLL, h_ipoll); - - QLIST_FOREACH(ics, &xics->ics, list) { - object_property_set_bool(OBJECT(ics), true, "realized", &error); - if (error) { - error_propagate(errp, error); - return; - } - } - - for (i = 0; i < xics->nr_servers; i++) { - object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized", - &error); - if (error) { - error_propagate(errp, error); - return; - } - } -} - -static void xics_spapr_initfn(Object *obj) -{ - XICSState *xics = XICS_SPAPR(obj); - ICSState *ics; - - ics = ICS_SIMPLE(object_new(TYPE_ICS_SIMPLE)); - object_property_add_child(obj, "ics", OBJECT(ics), NULL); - ics->xics = xics; - QLIST_INSERT_HEAD(&xics->ics, ics, list); -} - -static void xics_spapr_class_init(ObjectClass *oc, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(oc); - XICSStateClass *xsc = XICS_SPAPR_CLASS(oc); - - dc->realize = xics_spapr_realize; - xsc->set_nr_irqs = xics_spapr_set_nr_irqs; - xsc->set_nr_servers = xics_spapr_set_nr_servers; + return 0; } -static const TypeInfo xics_spapr_info = { - .name = TYPE_XICS_SPAPR, - .parent = TYPE_XICS_COMMON, - .instance_size = sizeof(XICSState), - .class_size = sizeof(XICSStateClass), - .class_init = xics_spapr_class_init, - .instance_init = xics_spapr_initfn, -}; - #define ICS_IRQ_FREE(ics, srcno) \ (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK))) @@ -354,9 +281,8 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum) return -1; } -int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp) +int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp) { - ICSState *ics = QLIST_FIRST(&xics->ics); int irq; if (!ics) { @@ -387,10 +313,9 @@ int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp) * Allocate block of consecutive IRQs, and return the number of the first IRQ in * the block. If align==true, aligns the first IRQ number to num. */ -int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align, - Error **errp) +int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi, + bool align, Error **errp) { - ICSState *ics = QLIST_FIRST(&xics->ics); int i, first = -1; if (!ics) { @@ -440,20 +365,18 @@ static void ics_free(ICSState *ics, int srcno, int num) } } -void xics_spapr_free(XICSState *xics, int irq, int num) +void spapr_ics_free(ICSState *ics, int irq, int num) { - ICSState *ics = xics_find_source(xics, irq); - - if (ics) { + if (ics_valid_irq(ics, irq)) { trace_xics_ics_free(0, irq, num); ics_free(ics, irq - ics->offset, num); } } -void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle) +void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle) { uint32_t interrupt_server_ranges_prop[] = { - 0, cpu_to_be32(xics->nr_servers), + 0, cpu_to_be32(nr_servers), }; int node; @@ -470,10 +393,3 @@ void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle) _FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle)); _FDT(fdt_setprop_cell(fdt, node, "phandle", phandle)); } - -static void xics_spapr_register_types(void) -{ - type_register_static(&xics_spapr_info); -} - -type_init(xics_spapr_register_types) diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 65ba188..aa5d2c1 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -141,9 +141,17 @@ static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr, static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp) { sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev); + int ret; if (nvram->blk) { nvram->size = blk_getlength(nvram->blk); + + ret = blk_set_perm(nvram->blk, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } } else { nvram->size = DEFAULT_NVRAM_SIZE; } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index a563555..273f1e4 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1530,6 +1530,34 @@ static const pci_class_desc pci_class_descriptions[] = { 0, NULL} }; +static void pci_for_each_device_under_bus_reverse(PCIBus *bus, + void (*fn)(PCIBus *b, + PCIDevice *d, + void *opaque), + void *opaque) +{ + PCIDevice *d; + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + d = bus->devices[ARRAY_SIZE(bus->devices) - 1 - devfn]; + if (d) { + fn(bus, d, opaque); + } + } +} + +void pci_for_each_device_reverse(PCIBus *bus, int bus_num, + void (*fn)(PCIBus *b, PCIDevice *d, void *opaque), + void *opaque) +{ + bus = pci_find_bus_nr(bus, bus_num); + + if (bus) { + pci_for_each_device_under_bus_reverse(bus, fn, opaque); + } +} + static void pci_for_each_device_under_bus(PCIBus *bus, void (*fn)(PCIBus *b, PCIDevice *d, void *opaque), diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 87d8366..81c6c1c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -63,6 +63,7 @@ #include "qemu/error-report.h" #include "trace.h" #include "hw/nmi.h" +#include "hw/intc/intc.h" #include "hw/compat.h" #include "qemu/cutils.h" @@ -95,37 +96,68 @@ #define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) -static XICSState *try_create_xics(const char *type, int nr_servers, - int nr_irqs, Error **errp) +static int try_create_xics(sPAPRMachineState *spapr, const char *type_ics, + const char *type_icp, int nr_servers, + int nr_irqs, Error **errp) { - Error *err = NULL; - DeviceState *dev; + XICSFabric *xi = XICS_FABRIC(spapr); + Error *err = NULL, *local_err = NULL; + ICSState *ics = NULL; + int i; - dev = qdev_create(NULL, type); - qdev_prop_set_uint32(dev, "nr_servers", nr_servers); - qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs); - object_property_set_bool(OBJECT(dev), true, "realized", &err); + ics = ICS_SIMPLE(object_new(type_ics)); + qdev_set_parent_bus(DEVICE(ics), sysbus_get_default()); + object_property_add_child(OBJECT(spapr), "ics", OBJECT(ics), NULL); + object_property_set_int(OBJECT(ics), nr_irqs, "nr-irqs", &err); + object_property_add_const_link(OBJECT(ics), "xics", OBJECT(xi), NULL); + object_property_set_bool(OBJECT(ics), true, "realized", &local_err); + error_propagate(&err, local_err); if (err) { - error_propagate(errp, err); - object_unparent(OBJECT(dev)); - return NULL; + goto error; } - return XICS_COMMON(dev); + + spapr->icps = g_malloc0(nr_servers * sizeof(ICPState)); + spapr->nr_servers = nr_servers; + + for (i = 0; i < nr_servers; i++) { + ICPState *icp = &spapr->icps[i]; + + object_initialize(icp, sizeof(*icp), type_icp); + qdev_set_parent_bus(DEVICE(icp), sysbus_get_default()); + object_property_add_child(OBJECT(spapr), "icp[*]", OBJECT(icp), NULL); + object_property_add_const_link(OBJECT(icp), "xics", OBJECT(xi), NULL); + object_property_set_bool(OBJECT(icp), true, "realized", &err); + if (err) { + goto error; + } + object_unref(OBJECT(icp)); + } + + spapr->ics = ics; + return 0; + +error: + error_propagate(errp, err); + if (ics) { + object_unparent(OBJECT(ics)); + } + return -1; } -static XICSState *xics_system_init(MachineState *machine, - int nr_servers, int nr_irqs, Error **errp) +static int xics_system_init(MachineState *machine, + int nr_servers, int nr_irqs, Error **errp) { - XICSState *xics = NULL; + int rc = -1; if (kvm_enabled()) { Error *err = NULL; - if (machine_kernel_irqchip_allowed(machine)) { - xics = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs, - &err); + if (machine_kernel_irqchip_allowed(machine) && + !xics_kvm_init(SPAPR_MACHINE(machine), errp)) { + rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_KVM, + TYPE_KVM_ICP, nr_servers, nr_irqs, &err); } - if (machine_kernel_irqchip_required(machine) && !xics) { + if (machine_kernel_irqchip_required(machine) && rc < 0) { error_reportf_err(err, "kernel_irqchip requested but unavailable: "); } else { @@ -133,11 +165,13 @@ static XICSState *xics_system_init(MachineState *machine, } } - if (!xics) { - xics = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, errp); + if (rc < 0) { + xics_spapr_init(SPAPR_MACHINE(machine), errp); + rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_SIMPLE, + TYPE_ICP, nr_servers, nr_irqs, errp); } - return xics; + return rc; } static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, @@ -924,7 +958,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2)); /* /interrupt controller */ - spapr_dt_xics(spapr->xics, fdt, PHANDLE_XICP); + spapr_dt_xics(spapr->nr_servers, fdt, PHANDLE_XICP); ret = spapr_populate_memory(spapr, fdt); if (ret < 0) { @@ -1053,6 +1087,62 @@ static void close_htab_fd(sPAPRMachineState *spapr) spapr->htab_fd = -1; } +static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp); + + return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1; +} + +static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp, + hwaddr ptex, int n) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp); + hwaddr pte_offset = ptex * HASH_PTE_SIZE_64; + + if (!spapr->htab) { + /* + * HTAB is controlled by KVM. Fetch into temporary buffer + */ + ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64); + kvmppc_read_hptes(hptes, ptex, n); + return hptes; + } + + /* + * HTAB is controlled by QEMU. Just point to the internally + * accessible PTEG. + */ + return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset); +} + +static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp, + const ppc_hash_pte64_t *hptes, + hwaddr ptex, int n) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp); + + if (!spapr->htab) { + g_free((void *)hptes); + } + + /* Nothing to do for qemu managed HPT */ +} + +static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex, + uint64_t pte0, uint64_t pte1) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp); + hwaddr offset = ptex * HASH_PTE_SIZE_64; + + if (!spapr->htab) { + kvmppc_write_hpte(ptex, pte0, pte1); + } else { + stq_p(spapr->htab + offset, pte0); + stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1); + } +} + static int spapr_hpt_shift_for_ramsize(uint64_t ramsize) { int shift; @@ -1252,6 +1342,13 @@ static int spapr_post_load(void *opaque, int version_id) sPAPRMachineState *spapr = (sPAPRMachineState *)opaque; int err = 0; + if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) { + int i; + for (i = 0; i < spapr->nr_servers; i++) { + icp_resend(&spapr->icps[i]); + } + } + /* In earlier versions, there was no separate qdev for the PAPR * RTC, so the RTC offset was stored directly in sPAPREnvironment. * So when migrating from those versions, poke the incoming offset @@ -1902,9 +1999,8 @@ static void ppc_spapr_init(MachineState *machine) load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD; /* Set up Interrupt Controller before we create the VCPUs */ - spapr->xics = xics_system_init(machine, - DIV_ROUND_UP(max_cpus * smt, smp_threads), - XICS_IRQS_SPAPR, &error_fatal); + xics_system_init(machine, DIV_ROUND_UP(max_cpus * smt, smp_threads), + XICS_IRQS_SPAPR, &error_fatal); /* Set up containers for ibm,client-set-architecture negotiated options */ spapr->ov5 = spapr_ovec_new(); @@ -2872,6 +2968,40 @@ static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index, *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE; } +static ICSState *spapr_ics_get(XICSFabric *dev, int irq) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(dev); + + return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL; +} + +static void spapr_ics_resend(XICSFabric *dev) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(dev); + + ics_resend(spapr->ics); +} + +static ICPState *spapr_icp_get(XICSFabric *xi, int server) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(xi); + + return (server < spapr->nr_servers) ? &spapr->icps[server] : NULL; +} + +static void spapr_pic_print_info(InterruptStatsProvider *obj, + Monitor *mon) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + int i; + + for (i = 0; i < spapr->nr_servers; i++) { + icp_pic_print_info(&spapr->icps[i], mon); + } + + ics_pic_print_info(spapr->ics, mon); +} + static void spapr_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -2880,6 +3010,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) NMIClass *nc = NMI_CLASS(oc); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc); + XICSFabricClass *xic = XICS_FABRIC_CLASS(oc); + InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc); mc->desc = "pSeries Logical Partition (PAPR compliant)"; @@ -2891,7 +3023,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->init = ppc_spapr_init; mc->reset = ppc_spapr_reset; mc->block_default_type = IF_SCSI; - mc->max_cpus = 255; + mc->max_cpus = 1024; mc->no_parallel = 1; mc->default_boot_order = ""; mc->default_ram_size = 512 * M_BYTE; @@ -2913,6 +3045,14 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) nc->nmi_monitor_handler = spapr_nmi; smc->phb_placement = spapr_phb_placement; vhc->hypercall = emulate_spapr_hypercall; + vhc->hpt_mask = spapr_hpt_mask; + vhc->map_hptes = spapr_map_hptes; + vhc->unmap_hptes = spapr_unmap_hptes; + vhc->store_hpte = spapr_store_hpte; + xic->ics_get = spapr_ics_get; + xic->ics_resend = spapr_ics_resend; + xic->icp_get = spapr_icp_get; + ispc->print_info = spapr_pic_print_info; } static const TypeInfo spapr_machine_info = { @@ -2929,6 +3069,8 @@ static const TypeInfo spapr_machine_info = { { TYPE_NMI }, { TYPE_HOTPLUG_HANDLER }, { TYPE_PPC_VIRTUAL_HYPERVISOR }, + { TYPE_XICS_FABRIC }, + { TYPE_INTERRUPT_STATS_PROVIDER }, { } }, }; diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 55cd045..90d682f 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -13,10 +13,12 @@ #include "hw/boards.h" #include "qapi/error.h" #include "sysemu/cpus.h" +#include "sysemu/kvm.h" #include "target/ppc/kvm_ppc.h" #include "hw/ppc/ppc.h" #include "target/ppc/mmu-hash64.h" #include "sysemu/numa.h" +#include "qemu/error-report.h" static void spapr_cpu_reset(void *opaque) { @@ -34,15 +36,26 @@ static void spapr_cpu_reset(void *opaque) env->spr[SPR_HIOR] = 0; - ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift, - &error_fatal); + /* + * This is a hack for the benefit of KVM PR - it abuses the SDR1 + * slot in kvm_sregs to communicate the userspace address of the + * HPT + */ + if (kvm_enabled()) { + env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab + | (spapr->htab_shift - 18); + if (kvmppc_put_books_sregs(cpu) < 0) { + error_report("Unable to update SDR1 in KVM"); + exit(1); + } + } } static void spapr_cpu_destroy(PowerPCCPU *cpu) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - xics_cpu_destroy(spapr->xics, cpu); + xics_cpu_destroy(XICS_FABRIC(spapr), cpu); qemu_unregister_reset(spapr_cpu_reset, cpu); } @@ -57,8 +70,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); /* Enable PAPR mode in TCG or KVM */ - cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); - cpu_ppc_set_papr(cpu); + cpu_ppc_set_papr(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); if (cpu->max_compat) { Error *local_err = NULL; @@ -76,7 +88,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, cs->numa_node = i; } - xics_cpu_setup(spapr->xics, cpu); + xics_cpu_setup(XICS_FABRIC(spapr), cpu); qemu_register_reset(spapr_cpu_reset, cpu); spapr_cpu_reset(cpu); diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index f85a9c3..24a5758 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -481,7 +481,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); - qemu_irq_pulse(xics_get_qirq(spapr->xics, + qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, RTAS_LOG_TYPE_EPOW))); } @@ -574,7 +574,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); - qemu_irq_pulse(xics_get_qirq(spapr->xics, + qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, RTAS_LOG_TYPE_HOTPLUG))); } @@ -695,7 +695,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, spapr_event_sources_get_source(spapr->event_sources, i); g_assert(source->enabled); - qemu_irq_pulse(xics_get_qirq(spapr->xics, source->irq)); + qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), source->irq)); } } @@ -752,7 +752,7 @@ void spapr_events_init(sPAPRMachineState *spapr) spapr->event_sources = spapr_event_sources_new(); spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW, - xics_spapr_alloc(spapr->xics, 0, false, + spapr_ics_alloc(spapr->ics, 0, false, &error_fatal)); /* NOTE: if machine supports modern/dedicated hotplug event source, @@ -765,7 +765,7 @@ void spapr_events_init(sPAPRMachineState *spapr) */ if (spapr->use_hotplug_event_source) { spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG, - xics_spapr_alloc(spapr->xics, 0, false, + spapr_ics_alloc(spapr->ics, 0, false, &error_fatal)); } diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 42d20e0..f05a90e 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -47,12 +47,12 @@ static bool has_spr(PowerPCCPU *cpu, int spr) return cpu->env.spr_cb[spr].name != NULL; } -static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index) +static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex) { /* - * hash value/pteg group index is normalized by htab_mask + * hash value/pteg group index is normalized by HPT mask */ - if (((pte_index & ~7ULL) / HPTES_PER_GROUP) & ~env->htab_mask) { + if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) { return false; } return true; @@ -77,15 +77,14 @@ static bool is_ram_address(sPAPRMachineState *spapr, hwaddr addr) static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { - CPUPPCState *env = &cpu->env; target_ulong flags = args[0]; - target_ulong pte_index = args[1]; + target_ulong ptex = args[1]; target_ulong pteh = args[2]; target_ulong ptel = args[3]; unsigned apshift; target_ulong raddr; - target_ulong index; - uint64_t token; + target_ulong slot; + const ppc_hash_pte64_t *hptes; apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel); if (!apshift) { @@ -116,36 +115,36 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr, pteh &= ~0x60ULL; - if (!valid_pte_index(env, pte_index)) { + if (!valid_ptex(cpu, ptex)) { return H_PARAMETER; } - index = 0; + slot = ptex & 7ULL; + ptex = ptex & ~7ULL; + if (likely((flags & H_EXACT) == 0)) { - pte_index &= ~7ULL; - token = ppc_hash64_start_access(cpu, pte_index); - for (; index < 8; index++) { - if (!(ppc_hash64_load_hpte0(cpu, token, index) & HPTE64_V_VALID)) { + hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); + for (slot = 0; slot < 8; slot++) { + if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) { break; } } - ppc_hash64_stop_access(cpu, token); - if (index == 8) { + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + if (slot == 8) { return H_PTEG_FULL; } } else { - token = ppc_hash64_start_access(cpu, pte_index); - if (ppc_hash64_load_hpte0(cpu, token, 0) & HPTE64_V_VALID) { - ppc_hash64_stop_access(cpu, token); + hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1); + if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) { + ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1); return H_PTEG_FULL; } - ppc_hash64_stop_access(cpu, token); + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); } - ppc_hash64_store_hpte(cpu, pte_index + index, - pteh | HPTE64_V_HPTE_DIRTY, ptel); + ppc_hash64_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel); - args[0] = pte_index + index; + args[0] = ptex + slot; return H_SUCCESS; } @@ -161,18 +160,17 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex, target_ulong flags, target_ulong *vp, target_ulong *rp) { - CPUPPCState *env = &cpu->env; - uint64_t token; + const ppc_hash_pte64_t *hptes; target_ulong v, r; - if (!valid_pte_index(env, ptex)) { + if (!valid_ptex(cpu, ptex)) { return REMOVE_PARM; } - token = ppc_hash64_start_access(cpu, ptex); - v = ppc_hash64_load_hpte0(cpu, token, 0); - r = ppc_hash64_load_hpte1(cpu, token, 0); - ppc_hash64_stop_access(cpu, token); + hptes = ppc_hash64_map_hptes(cpu, ptex, 1); + v = ppc_hash64_hpte0(cpu, hptes, 0); + r = ppc_hash64_hpte1(cpu, hptes, 0); + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); if ((v & HPTE64_V_VALID) == 0 || ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || @@ -191,11 +189,11 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, { CPUPPCState *env = &cpu->env; target_ulong flags = args[0]; - target_ulong pte_index = args[1]; + target_ulong ptex = args[1]; target_ulong avpn = args[2]; RemoveResult ret; - ret = remove_hpte(cpu, pte_index, avpn, flags, + ret = remove_hpte(cpu, ptex, avpn, flags, &args[0], &args[1]); switch (ret) { @@ -291,19 +289,19 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr, { CPUPPCState *env = &cpu->env; target_ulong flags = args[0]; - target_ulong pte_index = args[1]; + target_ulong ptex = args[1]; target_ulong avpn = args[2]; - uint64_t token; + const ppc_hash_pte64_t *hptes; target_ulong v, r; - if (!valid_pte_index(env, pte_index)) { + if (!valid_ptex(cpu, ptex)) { return H_PARAMETER; } - token = ppc_hash64_start_access(cpu, pte_index); - v = ppc_hash64_load_hpte0(cpu, token, 0); - r = ppc_hash64_load_hpte1(cpu, token, 0); - ppc_hash64_stop_access(cpu, token); + hptes = ppc_hash64_map_hptes(cpu, ptex, 1); + v = ppc_hash64_hpte0(cpu, hptes, 0); + r = ppc_hash64_hpte1(cpu, hptes, 0); + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); if ((v & HPTE64_V_VALID) == 0 || ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { @@ -315,36 +313,35 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr, r |= (flags << 55) & HPTE64_R_PP0; r |= (flags << 48) & HPTE64_R_KEY_HI; r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO); - ppc_hash64_store_hpte(cpu, pte_index, + ppc_hash64_store_hpte(cpu, ptex, (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0); - ppc_hash64_tlb_flush_hpte(cpu, pte_index, v, r); + ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); /* Flush the tlb */ check_tlb_flush(env, true); /* Don't need a memory barrier, due to qemu's global lock */ - ppc_hash64_store_hpte(cpu, pte_index, v | HPTE64_V_HPTE_DIRTY, r); + ppc_hash64_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r); return H_SUCCESS; } static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { - CPUPPCState *env = &cpu->env; target_ulong flags = args[0]; - target_ulong pte_index = args[1]; + target_ulong ptex = args[1]; uint8_t *hpte; int i, ridx, n_entries = 1; - if (!valid_pte_index(env, pte_index)) { + if (!valid_ptex(cpu, ptex)) { return H_PARAMETER; } if (flags & H_READ_4) { /* Clear the two low order bits */ - pte_index &= ~(3ULL); + ptex &= ~(3ULL); n_entries = 4; } - hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64); + hpte = spapr->htab + (ptex * HASH_PTE_SIZE_64); for (i = 0, ridx = 0; i < n_entries; i++) { args[ridx++] = ldq_p(hpte); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index fd6fc1d..2a3499e 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -43,6 +43,7 @@ #include "hw/pci/pci_bridge.h" #include "hw/pci/pci_bus.h" +#include "hw/pci/pci_ids.h" #include "hw/ppc/spapr_drc.h" #include "sysemu/device_tree.h" #include "sysemu/kvm.h" @@ -325,7 +326,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - xics_spapr_free(spapr->xics, msi->first_irq, msi->num); + spapr_ics_free(spapr->ics, msi->first_irq, msi->num); if (msi_present(pdev)) { spapr_msi_setmsg(pdev, 0, false, 0, 0); } @@ -363,7 +364,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, } /* Allocate MSIs */ - irq = xics_spapr_alloc_block(spapr->xics, req_num, false, + irq = spapr_ics_alloc_block(spapr->ics, req_num, false, ret_intr_type == RTAS_TYPE_MSI, &err); if (err) { error_reportf_err(err, "Can't allocate MSIs for device %x: ", @@ -374,7 +375,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, /* Release previous MSIs */ if (msi) { - xics_spapr_free(spapr->xics, msi->first_irq, msi->num); + spapr_ics_free(spapr->ics, msi->first_irq, msi->num); g_hash_table_remove(phb->msi, &config_addr); } @@ -736,7 +737,7 @@ static void spapr_msi_write(void *opaque, hwaddr addr, trace_spapr_pci_msi_write(addr, data, irq); - qemu_irq_pulse(xics_get_qirq(spapr->xics, irq)); + qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), irq)); } static const MemoryRegionOps spapr_msi_ops = { @@ -946,6 +947,274 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp) rp->assigned_len = assigned_idx * sizeof(ResourceFields); } +typedef struct PCIClass PCIClass; +typedef struct PCISubClass PCISubClass; +typedef struct PCIIFace PCIIFace; + +struct PCIIFace { + int iface; + const char *name; +}; + +struct PCISubClass { + int subclass; + const char *name; + const PCIIFace *iface; +}; + +struct PCIClass { + const char *name; + const PCISubClass *subc; +}; + +static const PCISubClass undef_subclass[] = { + { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass mass_subclass[] = { + { PCI_CLASS_STORAGE_SCSI, "scsi", NULL }, + { PCI_CLASS_STORAGE_IDE, "ide", NULL }, + { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL }, + { PCI_CLASS_STORAGE_IPI, "ipi", NULL }, + { PCI_CLASS_STORAGE_RAID, "raid", NULL }, + { PCI_CLASS_STORAGE_ATA, "ata", NULL }, + { PCI_CLASS_STORAGE_SATA, "sata", NULL }, + { PCI_CLASS_STORAGE_SAS, "sas", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass net_subclass[] = { + { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL }, + { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL }, + { PCI_CLASS_NETWORK_FDDI, "fddi", NULL }, + { PCI_CLASS_NETWORK_ATM, "atm", NULL }, + { PCI_CLASS_NETWORK_ISDN, "isdn", NULL }, + { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL }, + { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass displ_subclass[] = { + { PCI_CLASS_DISPLAY_VGA, "vga", NULL }, + { PCI_CLASS_DISPLAY_XGA, "xga", NULL }, + { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass media_subclass[] = { + { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL }, + { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL }, + { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass mem_subclass[] = { + { PCI_CLASS_MEMORY_RAM, "memory", NULL }, + { PCI_CLASS_MEMORY_FLASH, "flash", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass bridg_subclass[] = { + { PCI_CLASS_BRIDGE_HOST, "host", NULL }, + { PCI_CLASS_BRIDGE_ISA, "isa", NULL }, + { PCI_CLASS_BRIDGE_EISA, "eisa", NULL }, + { PCI_CLASS_BRIDGE_MC, "mca", NULL }, + { PCI_CLASS_BRIDGE_PCI, "pci", NULL }, + { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL }, + { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL }, + { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL }, + { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL }, + { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL }, + { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass comm_subclass[] = { + { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL }, + { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL }, + { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL }, + { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL }, + { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL }, + { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL }, + { 0xFF, NULL, NULL, }, +}; + +static const PCIIFace pic_iface[] = { + { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" }, + { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" }, + { 0xFF, NULL }, +}; + +static const PCISubClass sys_subclass[] = { + { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface }, + { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL }, + { PCI_CLASS_SYSTEM_TIMER, "timer", NULL }, + { PCI_CLASS_SYSTEM_RTC, "rtc", NULL }, + { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL }, + { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass inp_subclass[] = { + { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL }, + { PCI_CLASS_INPUT_PEN, "pen", NULL }, + { PCI_CLASS_INPUT_MOUSE, "mouse", NULL }, + { PCI_CLASS_INPUT_SCANNER, "scanner", NULL }, + { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass dock_subclass[] = { + { PCI_CLASS_DOCKING_GENERIC, "dock", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass cpu_subclass[] = { + { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL }, + { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL }, + { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL }, + { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCIIFace usb_iface[] = { + { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" }, + { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", }, + { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" }, + { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" }, + { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" }, + { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" }, + { 0xFF, NULL }, +}; + +static const PCISubClass ser_subclass[] = { + { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL }, + { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL }, + { PCI_CLASS_SERIAL_SSA, "ssa", NULL }, + { PCI_CLASS_SERIAL_USB, "usb", usb_iface }, + { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL }, + { PCI_CLASS_SERIAL_SMBUS, "smb", NULL }, + { PCI_CLASS_SERIAL_IB, "infiniband", NULL }, + { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL }, + { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL }, + { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass wrl_subclass[] = { + { PCI_CLASS_WIRELESS_IRDA, "irda", NULL }, + { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL }, + { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL }, + { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL }, + { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass sat_subclass[] = { + { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL }, + { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL }, + { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL }, + { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass crypt_subclass[] = { + { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL }, + { PCI_CLASS_CRYPT_ENTERTAINMENT, + "entertainment-encryption", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass spc_subclass[] = { + { PCI_CLASS_SP_DPIO, "dpio", NULL }, + { PCI_CLASS_SP_PERF, "counter", NULL }, + { PCI_CLASS_SP_SYNCH, "measurement", NULL }, + { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCIClass pci_classes[] = { + { "legacy-device", undef_subclass }, + { "mass-storage", mass_subclass }, + { "network", net_subclass }, + { "display", displ_subclass, }, + { "multimedia-device", media_subclass }, + { "memory-controller", mem_subclass }, + { "unknown-bridge", bridg_subclass }, + { "communication-controller", comm_subclass}, + { "system-peripheral", sys_subclass }, + { "input-controller", inp_subclass }, + { "docking-station", dock_subclass }, + { "cpu", cpu_subclass }, + { "serial-bus", ser_subclass }, + { "wireless-controller", wrl_subclass }, + { "intelligent-io", NULL }, + { "satellite-device", sat_subclass }, + { "encryption", crypt_subclass }, + { "data-processing-controller", spc_subclass }, +}; + +static const char *pci_find_device_name(uint8_t class, uint8_t subclass, + uint8_t iface) +{ + const PCIClass *pclass; + const PCISubClass *psubclass; + const PCIIFace *piface; + const char *name; + + if (class >= ARRAY_SIZE(pci_classes)) { + return "pci"; + } + + pclass = pci_classes + class; + name = pclass->name; + + if (pclass->subc == NULL) { + return name; + } + + psubclass = pclass->subc; + while ((psubclass->subclass & 0xff) != 0xff) { + if ((psubclass->subclass & 0xff) == subclass) { + name = psubclass->name; + break; + } + psubclass++; + } + + piface = psubclass->iface; + if (piface == NULL) { + return name; + } + while ((piface->iface & 0xff) != 0xff) { + if ((piface->iface & 0xff) == iface) { + name = piface->name; + break; + } + piface++; + } + + return name; +} + +static void pci_get_node_name(char *nodename, int len, PCIDevice *dev) +{ + int slot = PCI_SLOT(dev->devfn); + int func = PCI_FUNC(dev->devfn); + uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3); + const char *name; + + name = pci_find_device_name((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, + ccode & 0xff); + + if (func != 0) { + snprintf(nodename, len, "%s@%x,%x", name, slot, func); + } else { + snprintf(nodename, len, "%s@%x", name, slot); + } +} + static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb, PCIDevice *pdev); @@ -957,6 +1226,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, int pci_status, err; char *buf = NULL; uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev); + uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3); uint32_t max_msi, max_msix; if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == @@ -971,8 +1241,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, pci_default_read_config(dev, PCI_DEVICE_ID, 2))); _FDT(fdt_setprop_cell(fdt, offset, "revision-id", pci_default_read_config(dev, PCI_REVISION_ID, 1))); - _FDT(fdt_setprop_cell(fdt, offset, "class-code", - pci_default_read_config(dev, PCI_CLASS_PROG, 3))); + _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode)); if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) { _FDT(fdt_setprop_cell(fdt, offset, "interrupts", pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1))); @@ -1013,11 +1282,10 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0)); } - /* NOTE: this is normally generated by firmware via path/unit name, - * but in our case we must set it manually since it does not get - * processed by OF beforehand - */ - _FDT(fdt_setprop_string(fdt, offset, "name", "pci")); + _FDT(fdt_setprop_string(fdt, offset, "name", + pci_find_device_name((ccode >> 16) & 0xff, + (ccode >> 8) & 0xff, + ccode & 0xff))); buf = spapr_phb_get_loc_code(sphb, dev); if (!buf) { error_report("Failed setting the ibm,loc-code"); @@ -1061,15 +1329,9 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, void *fdt, int node_offset) { int offset, ret; - int slot = PCI_SLOT(dev->devfn); - int func = PCI_FUNC(dev->devfn); char nodename[FDT_NAME_MAX]; - if (func != 0) { - snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func); - } else { - snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot); - } + pci_get_node_name(nodename, FDT_NAME_MAX, dev); offset = fdt_add_subnode(fdt, node_offset, nodename); ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb); @@ -1485,7 +1747,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) uint32_t irq; Error *local_err = NULL; - irq = xics_spapr_alloc_block(spapr->xics, 1, true, false, &local_err); + irq = spapr_ics_alloc_block(spapr->ics, 1, true, false, &local_err); if (local_err) { error_propagate(errp, local_err); error_prepend(errp, "can't allocate LSIs: "); @@ -1782,9 +2044,9 @@ static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev, s_fdt.fdt = p->fdt; s_fdt.node_off = offset; s_fdt.sphb = p->sphb; - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - spapr_populate_pci_devices_dt, - &s_fdt); + pci_for_each_device_reverse(sec_bus, pci_bus_num(sec_bus), + spapr_populate_pci_devices_dt, + &s_fdt); } static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, @@ -1953,9 +2215,9 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, s_fdt.fdt = fdt; s_fdt.node_off = bus_off; s_fdt.sphb = phb; - pci_for_each_device(bus, pci_bus_num(bus), - spapr_populate_pci_devices_dt, - &s_fdt); + pci_for_each_device_reverse(bus, pci_bus_num(bus), + spapr_populate_pci_devices_dt, + &s_fdt); ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb), SPAPR_DR_CONNECTOR_TYPE_PCI); diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 8bfc5f9..a0ee4fd 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -454,7 +454,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) dev->qdev.id = id; } - dev->irq = xics_spapr_alloc(spapr->xics, dev->irq, false, &local_err); + dev->irq = spapr_ics_alloc(spapr->ics, dev->irq, false, &local_err); if (local_err) { error_propagate(errp, local_err); return; diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 2e2664f..7978c7d 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -20,6 +20,7 @@ #include "hw/s390x/virtio-ccw.h" #include "hw/s390x/css.h" #include "ipl.h" +#include "qemu/error-report.h" #define KERN_IMAGE_START 0x010000UL #define KERN_PARM_AREA 0x010480UL @@ -209,6 +210,7 @@ static Property s390_ipl_properties[] = { DEFINE_PROP_STRING("initrd", S390IPLState, initrd), DEFINE_PROP_STRING("cmdline", S390IPLState, cmdline), DEFINE_PROP_STRING("firmware", S390IPLState, firmware), + DEFINE_PROP_STRING("netboot_fw", S390IPLState, netboot_fw), DEFINE_PROP_BOOL("enforce_bios", S390IPLState, enforce_bios, false), DEFINE_PROP_BOOL("iplbext_migration", S390IPLState, iplbext_migration, true), @@ -226,6 +228,12 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl) TYPE_VIRTIO_CCW_DEVICE); SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st), TYPE_SCSI_DEVICE); + VirtIONet *vn = (VirtIONet *) object_dynamic_cast(OBJECT(dev_st), + TYPE_VIRTIO_NET); + + if (vn) { + ipl->netboot = true; + } if (virtio_ccw_dev) { CcwDevice *ccw_dev = CCW_DEVICE(virtio_ccw_dev); @@ -258,12 +266,86 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl) return false; } +static int load_netboot_image(Error **errp) +{ + S390IPLState *ipl = get_ipl_device(); + char *netboot_filename; + MemoryRegion *sysmem = get_system_memory(); + MemoryRegion *mr = NULL; + void *ram_ptr = NULL; + int img_size = -1; + + mr = memory_region_find(sysmem, 0, 1).mr; + if (!mr) { + error_setg(errp, "Failed to find memory region at address 0"); + return -1; + } + + ram_ptr = memory_region_get_ram_ptr(mr); + if (!ram_ptr) { + error_setg(errp, "No RAM found"); + goto unref_mr; + } + + netboot_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, ipl->netboot_fw); + if (netboot_filename == NULL) { + error_setg(errp, "Could not find network bootloader"); + goto unref_mr; + } + + img_size = load_elf_ram(netboot_filename, NULL, NULL, &ipl->start_addr, + NULL, NULL, 1, EM_S390, 0, 0, NULL, false); + + if (img_size < 0) { + img_size = load_image_size(netboot_filename, ram_ptr, ram_size); + ipl->start_addr = KERN_IMAGE_START; + } + + if (img_size < 0) { + error_setg(errp, "Failed to load network bootloader"); + } + + g_free(netboot_filename); + +unref_mr: + memory_region_unref(mr); + return img_size; +} + +static bool is_virtio_net_device(IplParameterBlock *iplb) +{ + uint8_t cssid; + uint8_t ssid; + uint16_t devno; + uint16_t schid; + SubchDev *sch = NULL; + + if (iplb->pbt != S390_IPL_TYPE_CCW) { + return false; + } + + devno = be16_to_cpu(iplb->ccw.devno); + ssid = iplb->ccw.ssid & 3; + + for (schid = 0; schid < MAX_SCHID; schid++) { + for (cssid = 0; cssid < MAX_CSSID; cssid++) { + sch = css_find_subch(1, cssid, ssid, schid); + + if (sch && sch->devno == devno) { + return sch->id.cu_model == VIRTIO_ID_NET; + } + } + } + return false; +} + void s390_ipl_update_diag308(IplParameterBlock *iplb) { S390IPLState *ipl = get_ipl_device(); ipl->iplb = *iplb; ipl->iplb_valid = true; + ipl->netboot = is_virtio_net_device(iplb); } IplParameterBlock *s390_ipl_get_iplb(void) @@ -287,6 +369,7 @@ void s390_reipl_request(void) void s390_ipl_prepare_cpu(S390CPU *cpu) { S390IPLState *ipl = get_ipl_device(); + Error *err = NULL; cpu->env.psw.addr = ipl->start_addr; cpu->env.psw.mask = IPL_PSW_MASK; @@ -297,6 +380,13 @@ void s390_ipl_prepare_cpu(S390CPU *cpu) ipl->iplb_valid = s390_gen_initial_iplb(ipl); } } + if (ipl->netboot) { + if (load_netboot_image(&err) < 0) { + error_report_err(err); + vm_stop(RUN_STATE_INTERNAL_ERROR); + } + ipl->iplb.ccw.netboot_start_addr = ipl->start_addr; + } } static void s390_ipl_reset(DeviceState *dev) diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h index c891095..46930e4 100644 --- a/hw/s390x/ipl.h +++ b/hw/s390x/ipl.h @@ -16,7 +16,8 @@ #include "cpu.h" struct IplBlockCcw { - uint8_t reserved0[85]; + uint64_t netboot_start_addr; + uint8_t reserved0[77]; uint8_t ssid; uint16_t devno; uint8_t vm_flags; @@ -100,12 +101,14 @@ struct S390IPLState { IplParameterBlock iplb; bool iplb_valid; bool reipl_requested; + bool netboot; /*< public >*/ char *kernel; char *initrd; char *cmdline; char *firmware; + char *netboot_fw; uint8_t cssid; uint8_t ssid; uint16_t devno; diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 4f0d62b..40914fd 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -116,7 +116,8 @@ static void ccw_init(MachineState *machine) /* get a BUS */ css_bus = virtual_css_bus_init(); s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline, - machine->initrd_filename, "s390-ccw.img", true); + machine->initrd_filename, "s390-ccw.img", + "s390-netboot.img", true); s390_flic_init(); dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE); diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c index 9cfb090..afa4148 100644 --- a/hw/s390x/s390-virtio.c +++ b/hw/s390x/s390-virtio.c @@ -65,6 +65,7 @@ void s390_init_ipl_dev(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, const char *firmware, + const char *netboot_fw, bool enforce_bios) { Object *new = object_new(TYPE_S390_IPL); @@ -78,6 +79,7 @@ void s390_init_ipl_dev(const char *kernel_filename, } qdev_prop_set_string(dev, "cmdline", kernel_cmdline); qdev_prop_set_string(dev, "firmware", firmware); + qdev_prop_set_string(dev, "netboot_fw", netboot_fw); qdev_prop_set_bit(dev, "enforce_bios", enforce_bios); object_property_add_child(qdev_get_machine(), TYPE_S390_IPL, new, NULL); diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h index f588b80..f2377a3 100644 --- a/hw/s390x/s390-virtio.h +++ b/hw/s390x/s390-virtio.h @@ -24,6 +24,7 @@ void s390_init_ipl_dev(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, const char *firmware, + const char *netboot_fw, bool enforce_bios); void s390_create_virtio_net(BusState *bus, const char *name); void s390_nmi(NMIState *n, int cpu_index, Error **errp); diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index bbfb5dc..a53f058 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2240,7 +2240,7 @@ static void scsi_disk_resize_cb(void *opaque) } } -static void scsi_cd_change_media_cb(void *opaque, bool load) +static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp) { SCSIDiskState *s = opaque; @@ -2328,7 +2328,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) return; } } - blkconf_apply_backend_options(&dev->conf); + blkconf_apply_backend_options(&dev->conf, + blk_is_read_only(s->qdev.conf.blk), + dev->type == TYPE_DISK, &err); + if (err) { + error_propagate(errp, err); + return; + } if (s->qdev.conf.discard_granularity == -1) { s->qdev.conf.discard_granularity = @@ -2380,7 +2386,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); if (!dev->conf.blk) { - dev->conf.blk = blk_new(); + dev->conf.blk = blk_new(0, BLK_PERM_ALL); } s->qdev.blocksize = 2048; diff --git a/hw/sd/core.c b/hw/sd/core.c index 14c2bdf..295dc44 100644 --- a/hw/sd/core.c +++ b/hw/sd/core.c @@ -131,6 +131,33 @@ void sdbus_set_readonly(SDBus *sdbus, bool readonly) } } +void sdbus_reparent_card(SDBus *from, SDBus *to) +{ + SDState *card = get_card(from); + SDCardClass *sc; + bool readonly; + + /* We directly reparent the card object rather than implementing this + * as a hotpluggable connection because we don't want to expose SD cards + * to users as being hotpluggable, and we can get away with it in this + * limited use case. This could perhaps be implemented more cleanly in + * future by adding support to the hotplug infrastructure for "device + * can be hotplugged only via code, not by user". + */ + + if (!card) { + return; + } + + sc = SD_CARD_GET_CLASS(card); + readonly = sc->get_readonly(card); + + sdbus_set_inserted(from, false); + qdev_set_parent_bus(DEVICE(card), &to->qbus); + sdbus_set_inserted(to, true); + sdbus_set_readonly(to, readonly); +} + static const TypeInfo sd_bus_info = { .name = TYPE_SD_BUS, .parent = TYPE_BUS, @@ -458,7 +458,7 @@ static bool sd_get_readonly(SDState *sd) return sd->wp_switch; } -static void sd_cardchange(void *opaque, bool load) +static void sd_cardchange(void *opaque, bool load, Error **errp) { SDState *sd = opaque; DeviceState *dev = DEVICE(sd); @@ -1887,6 +1887,7 @@ static void sd_instance_finalize(Object *obj) static void sd_realize(DeviceState *dev, Error **errp) { SDState *sd = SD_CARD(dev); + int ret; if (sd->blk && blk_is_read_only(sd->blk)) { error_setg(errp, "Cannot use read-only drive as SD card"); @@ -1894,6 +1895,11 @@ static void sd_realize(DeviceState *dev, Error **errp) } if (sd->blk) { + ret = blk_set_perm(sd->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } blk_set_dev_ops(sd->blk, &sd_block_ops, sd); } } diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs index fc99668..dd6f27e 100644 --- a/hw/timer/Makefile.objs +++ b/hw/timer/Makefile.objs @@ -1,5 +1,6 @@ common-obj-$(CONFIG_ARM_TIMER) += arm_timer.o common-obj-$(CONFIG_ARM_MPTIMER) += arm_mptimer.o +common-obj-$(CONFIG_ARM_V7M) += armv7m_systick.o common-obj-$(CONFIG_A9_GTIMER) += a9gtimer.o common-obj-$(CONFIG_CADENCE) += cadence_ttc.o common-obj-$(CONFIG_DS1338) += ds1338.o diff --git a/hw/timer/armv7m_systick.c b/hw/timer/armv7m_systick.c new file mode 100644 index 0000000..df8d280 --- /dev/null +++ b/hw/timer/armv7m_systick.c @@ -0,0 +1,240 @@ +/* + * ARMv7M SysTick timer + * + * Copyright (c) 2006-2007 CodeSourcery. + * Written by Paul Brook + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell + * + * This code is licensed under the GPL (version 2 or later). + */ + +#include "qemu/osdep.h" +#include "hw/timer/armv7m_systick.h" +#include "qemu-common.h" +#include "hw/sysbus.h" +#include "qemu/timer.h" +#include "qemu/log.h" +#include "trace.h" + +/* qemu timers run at 1GHz. We want something closer to 1MHz. */ +#define SYSTICK_SCALE 1000ULL + +#define SYSTICK_ENABLE (1 << 0) +#define SYSTICK_TICKINT (1 << 1) +#define SYSTICK_CLKSOURCE (1 << 2) +#define SYSTICK_COUNTFLAG (1 << 16) + +int system_clock_scale; + +/* Conversion factor from qemu timer to SysTick frequencies. */ +static inline int64_t systick_scale(SysTickState *s) +{ + if (s->control & SYSTICK_CLKSOURCE) { + return system_clock_scale; + } else { + return 1000; + } +} + +static void systick_reload(SysTickState *s, int reset) +{ + /* The Cortex-M3 Devices Generic User Guide says that "When the + * ENABLE bit is set to 1, the counter loads the RELOAD value from the + * SYST RVR register and then counts down". So, we need to check the + * ENABLE bit before reloading the value. + */ + trace_systick_reload(); + + if ((s->control & SYSTICK_ENABLE) == 0) { + return; + } + + if (reset) { + s->tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + } + s->tick += (s->reload + 1) * systick_scale(s); + timer_mod(s->timer, s->tick); +} + +static void systick_timer_tick(void *opaque) +{ + SysTickState *s = (SysTickState *)opaque; + + trace_systick_timer_tick(); + + s->control |= SYSTICK_COUNTFLAG; + if (s->control & SYSTICK_TICKINT) { + /* Tell the NVIC to pend the SysTick exception */ + qemu_irq_pulse(s->irq); + } + if (s->reload == 0) { + s->control &= ~SYSTICK_ENABLE; + } else { + systick_reload(s, 0); + } +} + +static uint64_t systick_read(void *opaque, hwaddr addr, unsigned size) +{ + SysTickState *s = opaque; + uint32_t val; + + switch (addr) { + case 0x0: /* SysTick Control and Status. */ + val = s->control; + s->control &= ~SYSTICK_COUNTFLAG; + break; + case 0x4: /* SysTick Reload Value. */ + val = s->reload; + break; + case 0x8: /* SysTick Current Value. */ + { + int64_t t; + + if ((s->control & SYSTICK_ENABLE) == 0) { + val = 0; + break; + } + t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + if (t >= s->tick) { + val = 0; + break; + } + val = ((s->tick - (t + 1)) / systick_scale(s)) + 1; + /* The interrupt in triggered when the timer reaches zero. + However the counter is not reloaded until the next clock + tick. This is a hack to return zero during the first tick. */ + if (val > s->reload) { + val = 0; + } + break; + } + case 0xc: /* SysTick Calibration Value. */ + val = 10000; + break; + default: + val = 0; + qemu_log_mask(LOG_GUEST_ERROR, + "SysTick: Bad read offset 0x%" HWADDR_PRIx "\n", addr); + break; + } + + trace_systick_read(addr, val, size); + return val; +} + +static void systick_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + SysTickState *s = opaque; + + trace_systick_write(addr, value, size); + + switch (addr) { + case 0x0: /* SysTick Control and Status. */ + { + uint32_t oldval = s->control; + + s->control &= 0xfffffff8; + s->control |= value & 7; + if ((oldval ^ value) & SYSTICK_ENABLE) { + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + if (value & SYSTICK_ENABLE) { + if (s->tick) { + s->tick += now; + timer_mod(s->timer, s->tick); + } else { + systick_reload(s, 1); + } + } else { + timer_del(s->timer); + s->tick -= now; + if (s->tick < 0) { + s->tick = 0; + } + } + } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) { + /* This is a hack. Force the timer to be reloaded + when the reference clock is changed. */ + systick_reload(s, 1); + } + break; + } + case 0x4: /* SysTick Reload Value. */ + s->reload = value; + break; + case 0x8: /* SysTick Current Value. Writes reload the timer. */ + systick_reload(s, 1); + s->control &= ~SYSTICK_COUNTFLAG; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "SysTick: Bad write offset 0x%" HWADDR_PRIx "\n", addr); + } +} + +static const MemoryRegionOps systick_ops = { + .read = systick_read, + .write = systick_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static void systick_reset(DeviceState *dev) +{ + SysTickState *s = SYSTICK(dev); + + s->control = 0; + s->reload = 0; + s->tick = 0; + timer_del(s->timer); +} + +static void systick_instance_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + SysTickState *s = SYSTICK(obj); + + memory_region_init_io(&s->iomem, obj, &systick_ops, s, "systick", 0xe0); + sysbus_init_mmio(sbd, &s->iomem); + sysbus_init_irq(sbd, &s->irq); + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s); +} + +static const VMStateDescription vmstate_systick = { + .name = "armv7m_systick", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(control, SysTickState), + VMSTATE_UINT32(reload, SysTickState), + VMSTATE_INT64(tick, SysTickState), + VMSTATE_TIMER_PTR(timer, SysTickState), + VMSTATE_END_OF_LIST() + } +}; + +static void systick_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_systick; + dc->reset = systick_reset; +} + +static const TypeInfo armv7m_systick_info = { + .name = TYPE_SYSTICK, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_init = systick_instance_init, + .instance_size = sizeof(SysTickState), + .class_init = systick_class_init, +}; + +static void armv7m_systick_register_types(void) +{ + type_register_static(&armv7m_systick_info); +} + +type_init(armv7m_systick_register_types) diff --git a/hw/timer/trace-events b/hw/timer/trace-events index 3495c41..d17cfe6 100644 --- a/hw/timer/trace-events +++ b/hw/timer/trace-events @@ -49,3 +49,9 @@ aspeed_timer_ctrl_pulse_enable(uint8_t i, bool enable) "Timer %" PRIu8 ": %d" aspeed_timer_set_ctrl2(uint32_t value) "Value: 0x%" PRIx32 aspeed_timer_set_value(int timer, int reg, uint32_t value) "Timer %d register %d: 0x%" PRIx32 aspeed_timer_read(uint64_t offset, unsigned size, uint64_t value) "From 0x%" PRIx64 ": of size %u: 0x%" PRIx64 + +# hw/timer/armv7m_systick.c +systick_reload(void) "systick reload" +systick_timer_tick(void) "systick reload" +systick_read(uint64_t addr, uint32_t value, unsigned size) "systick read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" +systick_write(uint64_t addr, uint32_t value, unsigned size) "systick write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" diff --git a/hw/usb/bus.c b/hw/usb/bus.c index efe4b8e..24f1608 100644 --- a/hw/usb/bus.c +++ b/hw/usb/bus.c @@ -8,7 +8,6 @@ #include "monitor/monitor.h" #include "trace.h" #include "qemu/cutils.h" -#include "migration/migration.h" static void usb_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent); @@ -688,8 +687,6 @@ USBDevice *usbdevice_create(const char *cmdline) const char *params; int len; USBDevice *dev; - ObjectClass *klass; - DeviceClass *dc; params = strchr(cmdline,':'); if (params) { @@ -724,22 +721,6 @@ USBDevice *usbdevice_create(const char *cmdline) return NULL; } - klass = object_class_by_name(f->name); - if (klass == NULL) { - error_report("Device '%s' not found", f->name); - return NULL; - } - - dc = DEVICE_CLASS(klass); - - if (only_migratable) { - if (dc->vmsd->unmigratable) { - error_report("Device %s is not migratable, but --only-migratable " - "was specified", f->name); - return NULL; - } - } - if (f->usbdevice_init) { dev = f->usbdevice_init(bus, params); } else { diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index c607f76..8a61ec9 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -589,6 +589,13 @@ static const struct SCSIBusInfo usb_msd_scsi_info_bot = { .load_request = usb_msd_load_request, }; +static void usb_msd_unrealize_storage(USBDevice *dev, Error **errp) +{ + MSDState *s = USB_STORAGE_DEV(dev); + + object_unref(OBJECT(&s->bus)); +} + static void usb_msd_realize_storage(USBDevice *dev, Error **errp) { MSDState *s = USB_STORAGE_DEV(dev); @@ -603,7 +610,11 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp) blkconf_serial(&s->conf, &dev->serial); blkconf_blocksizes(&s->conf); - blkconf_apply_backend_options(&s->conf); + blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, &err); + if (err) { + error_propagate(errp, err); + return; + } /* * Hack alert: this pretends to be a block device, but it's really @@ -635,6 +646,13 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp) s->scsi_dev = scsi_dev; } +static void usb_msd_unrealize_bot(USBDevice *dev, Error **errp) +{ + MSDState *s = USB_STORAGE_DEV(dev); + + object_unref(OBJECT(&s->bus)); +} + static void usb_msd_realize_bot(USBDevice *dev, Error **errp) { MSDState *s = USB_STORAGE_DEV(dev); @@ -755,6 +773,7 @@ static void usb_msd_class_initfn_storage(ObjectClass *klass, void *data) USBDeviceClass *uc = USB_DEVICE_CLASS(klass); uc->realize = usb_msd_realize_storage; + uc->unrealize = usb_msd_unrealize_storage; dc->props = msd_properties; } @@ -817,6 +836,7 @@ static void usb_msd_class_initfn_bot(ObjectClass *klass, void *data) USBDeviceClass *uc = USB_DEVICE_CLASS(klass); uc->realize = usb_msd_realize_bot; + uc->unrealize = usb_msd_unrealize_bot; uc->attached_settable = true; } diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c index 3b26655..fffc424 100644 --- a/hw/usb/dev-uas.c +++ b/hw/usb/dev-uas.c @@ -896,6 +896,8 @@ static void usb_uas_unrealize(USBDevice *dev, Error **errp) UASDevice *uas = USB_UAS(dev); qemu_bh_delete(uas->status_bh); + + object_unref(OBJECT(&uas->bus)); } static void usb_uas_realize(USBDevice *dev, Error **errp) diff --git a/include/block/block.h b/include/block/block.h index bde5ebd..c7c4a3a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -82,6 +82,7 @@ typedef struct HDGeometry { } HDGeometry; #define BDRV_O_RDWR 0x0002 +#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ #define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ #define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ @@ -187,6 +188,42 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_MAX, } BlockOpType; +/* Block node permission constants */ +enum { + /** + * A user that has the "permission" of consistent reads is guaranteed that + * their view of the contents of the block device is complete and + * self-consistent, representing the contents of a disk at a specific + * point. + * + * For most block devices (including their backing files) this is true, but + * the property cannot be maintained in a few situations like for + * intermediate nodes of a commit block job. + */ + BLK_PERM_CONSISTENT_READ = 0x01, + + /** This permission is required to change the visible disk contents. */ + BLK_PERM_WRITE = 0x02, + + /** + * This permission (which is weaker than BLK_PERM_WRITE) is both enough and + * required for writes to the block node when the caller promises that + * the visible disk content doesn't change. + */ + BLK_PERM_WRITE_UNCHANGED = 0x04, + + /** This permission is required to change the size of a block node. */ + BLK_PERM_RESIZE = 0x08, + + /** + * This permission is required to change the node that this BdrvChild + * points to. + */ + BLK_PERM_GRAPH_MOD = 0x10, + + BLK_PERM_ALL = 0x1f, +}; + /* disk I/O throttling */ void bdrv_init(void); void bdrv_init_with_whitelist(void); @@ -199,7 +236,8 @@ int bdrv_create(BlockDriver *drv, const char* filename, QemuOpts *opts, Error **errp); int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); BlockDriverState *bdrv_new(void); -void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp); void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new); @@ -210,7 +248,8 @@ BdrvChild *bdrv_open_child(const char *filename, BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp); -void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp); int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); BlockDriverState *bdrv_open(const char *filename, const char *reference, @@ -484,7 +523,8 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, - const BdrvChildRole *child_role); + const BdrvChildRole *child_role, + Error **errp); bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); diff --git a/include/block/block_int.h b/include/block/block_int.h index 1670941..a57c0bf 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -320,6 +320,59 @@ struct BlockDriver { void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, Error **errp); + /** + * Informs the block driver that a permission change is intended. The + * driver checks whether the change is permissible and may take other + * preparations for the change (e.g. get file system locks). This operation + * is always followed either by a call to either .bdrv_set_perm or + * .bdrv_abort_perm_update. + * + * Checks whether the requested set of cumulative permissions in @perm + * can be granted for accessing @bs and whether no other users are using + * permissions other than those given in @shared (both arguments take + * BLK_PERM_* bitmasks). + * + * If both conditions are met, 0 is returned. Otherwise, -errno is returned + * and errp is set to an error describing the conflict. + */ + int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, + uint64_t shared, Error **errp); + + /** + * Called to inform the driver that the set of cumulative set of used + * permissions for @bs has changed to @perm, and the set of sharable + * permission to @shared. The driver can use this to propagate changes to + * its children (i.e. request permissions only if a parent actually needs + * them). + * + * This function is only invoked after bdrv_check_perm(), so block drivers + * may rely on preparations made in their .bdrv_check_perm implementation. + */ + void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared); + + /* + * Called to inform the driver that after a previous bdrv_check_perm() + * call, the permission update is not performed and any preparations made + * for it (e.g. taken file locks) need to be undone. + * + * This function can be called even for nodes that never saw a + * bdrv_check_perm() call. It is a no-op then. + */ + void (*bdrv_abort_perm_update)(BlockDriverState *bs); + + /** + * Returns in @nperm and @nshared the permissions that the driver for @bs + * needs on its child @c, based on the cumulative permissions requested by + * the parents in @parent_perm and @parent_shared. + * + * If @c is NULL, return the permissions for attaching a new child for the + * given @role. + */ + void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared); + QLIST_ENTRY(BlockDriver) list; }; @@ -388,6 +441,10 @@ typedef struct BdrvAioNotifier { } BdrvAioNotifier; struct BdrvChildRole { + /* If true, bdrv_replace_in_backing_chain() doesn't change the node this + * BdrvChild points to. */ + bool stay_at_node; + void (*inherit_options)(int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options); @@ -399,6 +456,12 @@ struct BdrvChildRole { * name), or NULL if the parent can't provide a better name. */ const char* (*get_name)(BdrvChild *child); + /* Returns a malloced string that describes the parent of the child for a + * human reader. This could be a node-name, BlockBackend name, qdev ID or + * QOM path of the device owning the BlockBackend, job type and ID etc. The + * caller is responsible for freeing the memory. */ + char* (*get_parent_desc)(BdrvChild *child); + /* * If this pair of functions is implemented, the parent doesn't issue new * requests after returning from .drained_begin() until .drained_end() is @@ -409,16 +472,32 @@ struct BdrvChildRole { */ void (*drained_begin)(BdrvChild *child); void (*drained_end)(BdrvChild *child); + + void (*attach)(BdrvChild *child); + void (*detach)(BdrvChild *child); }; extern const BdrvChildRole child_file; extern const BdrvChildRole child_format; +extern const BdrvChildRole child_backing; struct BdrvChild { BlockDriverState *bs; char *name; const BdrvChildRole *role; void *opaque; + + /** + * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) + */ + uint64_t perm; + + /** + * Permissions that can still be granted to other users of @bs while this + * BdrvChild is still attached to it. (BLK_PERM_* bitmask) + */ + uint64_t shared_perm; + QLIST_ENTRY(BdrvChild) next; QLIST_ENTRY(BdrvChild) next_parent; }; @@ -701,13 +780,16 @@ void stream_start(const char *job_id, BlockDriverState *bs, * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @on_error: The action to take upon error. * @backing_file_str: String to use as the backing file in @top's overlay + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @top. NULL means + * that a node name should be autogenerated. * @errp: Error object. * */ void commit_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, BlockdevOnError on_error, const char *backing_file_str, - Error **errp); + const char *filter_node_name, Error **errp); /** * commit_active_start: * @job_id: The id of the newly-created job, or %NULL to use the @@ -718,6 +800,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, * See @BlockJobCreateFlags * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @on_error: The action to take upon error. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. * @errp: Error object. @@ -727,8 +812,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, void commit_active_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, int creation_flags, int64_t speed, BlockdevOnError on_error, - BlockCompletionFunc *cb, - void *opaque, Error **errp, bool auto_complete); + const char *filter_node_name, + BlockCompletionFunc *cb, void *opaque, Error **errp, + bool auto_complete); /* * mirror_start: * @job_id: The id of the newly-created job, or %NULL to use the @@ -745,6 +831,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. * @unmap: Whether to unmap target where source sectors only contain zeroes. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the mirror job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. * @errp: Error object. * * Start a mirroring operation on @bs. Clusters that are allocated @@ -758,7 +847,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, Error **errp); + bool unmap, const char *filter_node_name, Error **errp); /* * backup_job_create: @@ -796,11 +885,36 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr); BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, - void *opaque); + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp); void bdrv_root_unref_child(BdrvChild *child); +int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp); +void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +void bdrv_child_abort_perm_update(BdrvChild *c); +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp); + +/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by + * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to + * all children */ +void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared); + +/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by + * (non-raw) image formats: Like above for bs->backing, but for bs->file it + * requires WRITE | RESIZE for read-write images, always requires + * CONSISTENT_READ and doesn't share WRITE. */ +void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared); + const char *bdrv_get_parent_name(const BlockDriverState *bs); -void blk_dev_change_media_cb(BlockBackend *blk, bool load); +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); bool blk_dev_has_removable_media(BlockBackend *blk); bool blk_dev_has_tray(BlockBackend *blk); void blk_dev_eject_request(BlockBackend *blk, bool force); diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 1acb256..9e906f7 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -169,13 +169,25 @@ BlockJob *block_job_get(const char *id); /** * block_job_add_bdrv: * @job: A block job + * @name: The name to assign to the new BdrvChild * @bs: A BlockDriverState that is involved in @job + * @perm, @shared_perm: Permissions to request on the node * * Add @bs to the list of BlockDriverState that are involved in * @job. This means that all operations will be blocked on @bs while * @job exists. */ -void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs); +int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp); + +/** + * block_job_remove_all_bdrv: + * @job: The block job + * + * Remove all BlockDriverStates from the list of nodes that are involved in the + * job. This removes the blockers added with block_job_add_bdrv(). + */ +void block_job_remove_all_bdrv(BlockJob *job); /** * block_job_set_speed: diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 8223822..3f86cc5 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -119,6 +119,7 @@ struct BlockJobDriver { * generated automatically. * @job_type: The class object for the newly-created job. * @bs: The block + * @perm, @shared_perm: Permissions to request for @bs * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. @@ -134,7 +135,8 @@ struct BlockJobDriver { * called from a wrapper that is specific to the job type. */ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - BlockDriverState *bs, int64_t speed, int flags, + BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, BlockCompletionFunc *cb, void *opaque, Error **errp); /** diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index bd15853..8c305aa 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -64,6 +64,7 @@ void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev); void qemu_ram_unset_idstr(RAMBlock *block); const char *qemu_ram_get_idstr(RAMBlock *rb); size_t qemu_ram_pagesize(RAMBlock *block); +size_t qemu_ram_pagesize_largest(void); void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf, int len, int is_write); @@ -105,6 +106,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr, ram_addr_t offset, ram_addr_t length, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); +int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); #endif diff --git a/include/glib-compat.h b/include/glib-compat.h index 0cd24ff..863c8cf 100644 --- a/include/glib-compat.h +++ b/include/glib-compat.h @@ -328,4 +328,25 @@ static inline void g_source_set_name_by_id(guint tag, const char *name) #define g_test_subprocess() (0) #endif + +#if !GLIB_CHECK_VERSION(2, 34, 0) +static inline void +g_test_add_data_func_full(const char *path, + gpointer data, + gpointer fn, + gpointer data_free_func) +{ +#if GLIB_CHECK_VERSION(2, 26, 0) + /* back-compat casts, remove this once we can require new-enough glib */ + g_test_add_vtable(path, 0, data, NULL, + (GTestFixtureFunc)fn, (GTestFixtureFunc) data_free_func); +#else + /* back-compat casts, remove this once we can require new-enough glib */ + g_test_add_vtable(path, 0, data, NULL, + (void (*)(void)) fn, (void (*)(void)) data_free_func); +#endif +} +#endif + + #endif diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h index c175c0e..a3f79d3 100644 --- a/include/hw/arm/arm.h +++ b/include/hw/arm/arm.h @@ -26,6 +26,18 @@ typedef enum { /* armv7m.c */ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, const char *kernel_filename, const char *cpu_model); +/** + * armv7m_load_kernel: + * @cpu: CPU + * @kernel_filename: file to load + * @mem_size: mem_size: maximum image size to load + * + * Load the guest image for an ARMv7M system. This must be called by + * any ARMv7M board, either directly or via armv7m_init(). (This is + * necessary to ensure that the CPU resets correctly on system reset, + * as well as for kernel loading.) + */ +void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size); /* * struct used as a parameter of the arm_load_kernel machine init diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h new file mode 100644 index 0000000..a9b3f2a --- /dev/null +++ b/include/hw/arm/armv7m.h @@ -0,0 +1,63 @@ +/* + * ARMv7M CPU object + * + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell <peter.maydell@linaro.org> + * + * This code is licensed under the GPL version 2 or later. + */ + +#ifndef HW_ARM_ARMV7M_H +#define HW_ARM_ARMV7M_H + +#include "hw/sysbus.h" +#include "hw/arm/armv7m_nvic.h" + +#define TYPE_BITBAND "ARM,bitband-memory" +#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND) + +typedef struct { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + AddressSpace *source_as; + MemoryRegion iomem; + uint32_t base; + MemoryRegion *source_memory; +} BitBandState; + +#define TYPE_ARMV7M "armv7m" +#define ARMV7M(obj) OBJECT_CHECK(ARMv7MState, (obj), TYPE_ARMV7M) + +#define ARMV7M_NUM_BITBANDS 2 + +/* ARMv7M container object. + * + Unnamed GPIO input lines: external IRQ lines for the NVIC + * + Named GPIO output SYSRESETREQ: signalled for guest AIRCR.SYSRESETREQ + * + Property "cpu-model": CPU model to instantiate + * + Property "num-irq": number of external IRQ lines + * + Property "memory": MemoryRegion defining the physical address space + * that CPU accesses see. (The NVIC, bitbanding and other CPU-internal + * devices will be automatically layered on top of this view.) + */ +typedef struct ARMv7MState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + NVICState nvic; + BitBandState bitband[ARMV7M_NUM_BITBANDS]; + ARMCPU *cpu; + + /* MemoryRegion we pass to the CPU, with our devices layered on + * top of the ones the board provides in board_memory. + */ + MemoryRegion container; + + /* Properties */ + char *cpu_model; + /* MemoryRegion the board provides to us (with its devices, RAM, etc) */ + MemoryRegion *board_memory; +} ARMv7MState; + +#endif diff --git a/include/hw/arm/armv7m_nvic.h b/include/hw/arm/armv7m_nvic.h new file mode 100644 index 0000000..1d145fb --- /dev/null +++ b/include/hw/arm/armv7m_nvic.h @@ -0,0 +1,62 @@ +/* + * ARMv7M NVIC object + * + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell <peter.maydell@linaro.org> + * + * This code is licensed under the GPL version 2 or later. + */ + +#ifndef HW_ARM_ARMV7M_NVIC_H +#define HW_ARM_ARMV7M_NVIC_H + +#include "target/arm/cpu.h" +#include "hw/sysbus.h" +#include "hw/timer/armv7m_systick.h" + +#define TYPE_NVIC "armv7m_nvic" + +#define NVIC(obj) \ + OBJECT_CHECK(NVICState, (obj), TYPE_NVIC) + +/* Highest permitted number of exceptions (architectural limit) */ +#define NVIC_MAX_VECTORS 512 + +typedef struct VecInfo { + /* Exception priorities can range from -3 to 255; only the unmodifiable + * priority values for RESET, NMI and HardFault can be negative. + */ + int16_t prio; + uint8_t enabled; + uint8_t pending; + uint8_t active; + uint8_t level; /* exceptions <=15 never set level */ +} VecInfo; + +typedef struct NVICState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + ARMCPU *cpu; + + VecInfo vectors[NVIC_MAX_VECTORS]; + uint32_t prigroup; + + /* vectpending and exception_prio are both cached state that can + * be recalculated from the vectors[] array and the prigroup field. + */ + unsigned int vectpending; /* highest prio pending enabled exception */ + int exception_prio; /* group prio of the highest prio active exception */ + + MemoryRegion sysregmem; + MemoryRegion container; + + uint32_t num_irq; + qemu_irq excpout; + qemu_irq sysresetreq; + + SysTickState systick; +} NVICState; + +#endif diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h index 31241c7..122b286 100644 --- a/include/hw/arm/bcm2835_peripherals.h +++ b/include/hw/arm/bcm2835_peripherals.h @@ -22,6 +22,8 @@ #include "hw/misc/bcm2835_rng.h" #include "hw/misc/bcm2835_mbox.h" #include "hw/sd/sdhci.h" +#include "hw/sd/bcm2835_sdhost.h" +#include "hw/gpio/bcm2835_gpio.h" #define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals" #define BCM2835_PERIPHERALS(obj) \ @@ -45,6 +47,8 @@ typedef struct BCM2835PeripheralState { BCM2835RngState rng; BCM2835MboxState mboxes; SDHCIState sdhci; + BCM2835SDHostState sdhost; + BCM2835GpioState gpio; } BCM2835PeripheralState; #endif /* BCM2835_PERIPHERALS_H */ diff --git a/include/hw/arm/stm32f205_soc.h b/include/hw/arm/stm32f205_soc.h index 1332141..e2dce11 100644 --- a/include/hw/arm/stm32f205_soc.h +++ b/include/hw/arm/stm32f205_soc.h @@ -31,6 +31,7 @@ #include "hw/adc/stm32f2xx_adc.h" #include "hw/or-irq.h" #include "hw/ssi/stm32f2xx_spi.h" +#include "hw/arm/armv7m.h" #define TYPE_STM32F205_SOC "stm32f205-soc" #define STM32F205_SOC(obj) \ @@ -51,9 +52,10 @@ typedef struct STM32F205State { SysBusDevice parent_obj; /*< public >*/ - char *kernel_filename; char *cpu_model; + ARMv7MState armv7m; + STM32F2XXSyscfgState syscfg; STM32F2XXUsartState usart[STM_NUM_USARTS]; STM32F2XXTimerState timer[STM_NUM_TIMERS]; diff --git a/include/hw/block/block.h b/include/hw/block/block.h index df9d207..f3f6e8e 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -26,6 +26,7 @@ typedef struct BlockConf { /* geometry, not all devices use this */ uint32_t cyls, heads, secs; OnOffAuto wce; + bool share_rw; BlockdevOnError rerror; BlockdevOnError werror; } BlockConf; @@ -53,7 +54,9 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \ DEFINE_PROP_UINT32("discard_granularity", _state, \ _conf.discard_granularity, -1), \ - DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, ON_OFF_AUTO_AUTO) + DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \ + ON_OFF_AUTO_AUTO), \ + DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false) #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf) \ DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0), \ @@ -73,7 +76,8 @@ void blkconf_geometry(BlockConf *conf, int *trans, unsigned cyls_max, unsigned heads_max, unsigned secs_max, Error **errp); void blkconf_blocksizes(BlockConf *conf); -void blkconf_apply_backend_options(BlockConf *conf); +void blkconf_apply_backend_options(BlockConf *conf, bool readonly, + bool resizable, Error **errp); /* Hard disk geometry */ diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h index 25659b9..a172a60 100644 --- a/include/hw/elf_ops.h +++ b/include/hw/elf_ops.h @@ -264,7 +264,7 @@ static int glue(load_elf, SZ)(const char *name, int fd, int must_swab, uint64_t *pentry, uint64_t *lowaddr, uint64_t *highaddr, int elf_machine, int clear_lsb, int data_swab, - AddressSpace *as) + AddressSpace *as, bool load_rom) { struct elfhdr ehdr; struct elf_phdr *phdr = NULL, *ph; @@ -403,10 +403,15 @@ static int glue(load_elf, SZ)(const char *name, int fd, *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr; } - snprintf(label, sizeof(label), "phdr #%d: %s", i, name); + if (load_rom) { + snprintf(label, sizeof(label), "phdr #%d: %s", i, name); - /* rom_add_elf_program() seize the ownership of 'data' */ - rom_add_elf_program(label, data, file_size, mem_size, addr, as); + /* rom_add_elf_program() seize the ownership of 'data' */ + rom_add_elf_program(label, data, file_size, mem_size, addr, as); + } else { + cpu_physical_memory_write(addr, data, file_size); + g_free(data); + } total_size += mem_size; if (addr < low) diff --git a/include/hw/gpio/bcm2835_gpio.h b/include/hw/gpio/bcm2835_gpio.h new file mode 100644 index 0000000..9f8e0c7 --- /dev/null +++ b/include/hw/gpio/bcm2835_gpio.h @@ -0,0 +1,39 @@ +/* + * Raspberry Pi (BCM2835) GPIO Controller + * + * Copyright (c) 2017 Antfield SAS + * + * Authors: + * Clement Deschamps <clement.deschamps@antfield.fr> + * Luc Michel <luc.michel@antfield.fr> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_GPIO_H +#define BCM2835_GPIO_H + +#include "hw/sd/sd.h" + +typedef struct BCM2835GpioState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + + /* SDBus selector */ + SDBus sdbus; + SDBus *sdbus_sdhci; + SDBus *sdbus_sdhost; + + uint8_t fsel[54]; + uint32_t lev0, lev1; + uint8_t sd_fsel; + qemu_irq out[54]; +} BCM2835GpioState; + +#define TYPE_BCM2835_GPIO "bcm2835_gpio" +#define BCM2835_GPIO(obj) \ + OBJECT_CHECK(BCM2835GpioState, (obj), TYPE_BCM2835_GPIO) + +#endif diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h index 4156051..bccdfe1 100644 --- a/include/hw/intc/arm_gicv3_common.h +++ b/include/hw/intc/arm_gicv3_common.h @@ -172,6 +172,7 @@ struct GICv3CPUState { uint8_t gicr_ipriorityr[GIC_INTERNAL]; /* CPU interface */ + uint64_t icc_sre_el1; uint64_t icc_ctlr_el1[2]; uint64_t icc_pmr_el1; uint64_t icc_bpr[3]; diff --git a/include/hw/loader.h b/include/hw/loader.h index 40c4153..490c9ff 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -65,7 +65,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); #define ELF_LOAD_WRONG_ENDIAN -4 const char *load_elf_strerror(int error); -/** load_elf_as: +/** load_elf_ram: * @filename: Path of ELF file * @translate_fn: optional function to translate load addresses * @translate_opaque: opaque data passed to @translate_fn @@ -81,6 +81,7 @@ const char *load_elf_strerror(int error); * words and 3 for within doublewords. * @as: The AddressSpace to load the ELF to. The value of address_space_memory * is used if nothing is supplied here. + * @load_rom : Load ELF binary as ROM * * Load an ELF file's contents to the emulated system's address space. * Clients may optionally specify a callback to perform address @@ -93,6 +94,16 @@ const char *load_elf_strerror(int error); * If @elf_machine is EM_NONE then the machine type will be read from the * ELF header and no checks will be carried out against the machine type. */ +int load_elf_ram(const char *filename, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, int big_endian, int elf_machine, + int clear_lsb, int data_swab, AddressSpace *as, + bool load_rom); + +/** load_elf_as: + * Same as load_elf_ram(), but always loads the elf as ROM + */ int load_elf_as(const char *filename, uint64_t (*translate_fn)(void *, uint64_t), void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 092294e..dfa7614 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -106,7 +106,7 @@ static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - return xics_get_qirq(spapr->xics, phb->lsi_table[pin].irq); + return xics_get_qirq(XICS_FABRIC(spapr), phb->lsi_table[pin].irq); } PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index); diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 6983f13..9349acb 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -429,6 +429,10 @@ int pci_bus_numa_node(PCIBus *bus); void pci_for_each_device(PCIBus *bus, int bus_num, void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque), void *opaque); +void pci_for_each_device_reverse(PCIBus *bus, int bus_num, + void (*fn)(PCIBus *bus, PCIDevice *d, + void *opaque), + void *opaque); void pci_for_each_bus_depth_first(PCIBus *bus, void *(*begin)(PCIBus *bus, void *parent_state), void (*end)(PCIBus *bus, void *state), diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index d77ca60..d22ad8d 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -13,41 +13,84 @@ /* Device classes and subclasses */ -#define PCI_BASE_CLASS_STORAGE 0x01 -#define PCI_BASE_CLASS_NETWORK 0x02 +#define PCI_CLASS_NOT_DEFINED 0x0000 +#define PCI_CLASS_NOT_DEFINED_VGA 0x0001 +#define PCI_BASE_CLASS_STORAGE 0x01 #define PCI_CLASS_STORAGE_SCSI 0x0100 #define PCI_CLASS_STORAGE_IDE 0x0101 +#define PCI_CLASS_STORAGE_FLOPPY 0x0102 +#define PCI_CLASS_STORAGE_IPI 0x0103 #define PCI_CLASS_STORAGE_RAID 0x0104 +#define PCI_CLASS_STORAGE_ATA 0x0105 #define PCI_CLASS_STORAGE_SATA 0x0106 +#define PCI_CLASS_STORAGE_SAS 0x0107 #define PCI_CLASS_STORAGE_EXPRESS 0x0108 #define PCI_CLASS_STORAGE_OTHER 0x0180 +#define PCI_BASE_CLASS_NETWORK 0x02 #define PCI_CLASS_NETWORK_ETHERNET 0x0200 +#define PCI_CLASS_NETWORK_TOKEN_RING 0x0201 +#define PCI_CLASS_NETWORK_FDDI 0x0202 +#define PCI_CLASS_NETWORK_ATM 0x0203 +#define PCI_CLASS_NETWORK_ISDN 0x0204 +#define PCI_CLASS_NETWORK_WORLDFIP 0x0205 +#define PCI_CLASS_NETWORK_PICMG214 0x0206 #define PCI_CLASS_NETWORK_OTHER 0x0280 +#define PCI_BASE_CLASS_DISPLAY 0x03 #define PCI_CLASS_DISPLAY_VGA 0x0300 +#define PCI_CLASS_DISPLAY_XGA 0x0301 +#define PCI_CLASS_DISPLAY_3D 0x0302 #define PCI_CLASS_DISPLAY_OTHER 0x0380 +#define PCI_BASE_CLASS_MULTIMEDIA 0x04 +#define PCI_CLASS_MULTIMEDIA_VIDEO 0x0400 #define PCI_CLASS_MULTIMEDIA_AUDIO 0x0401 +#define PCI_CLASS_MULTIMEDIA_PHONE 0x0402 +#define PCI_CLASS_MULTIMEDIA_OTHER 0x0480 +#define PCI_BASE_CLASS_MEMORY 0x05 #define PCI_CLASS_MEMORY_RAM 0x0500 +#define PCI_CLASS_MEMORY_FLASH 0x0501 +#define PCI_CLASS_MEMORY_OTHER 0x0580 -#define PCI_CLASS_SYSTEM_SDHCI 0x0805 -#define PCI_CLASS_SYSTEM_OTHER 0x0880 - -#define PCI_CLASS_SERIAL_USB 0x0c03 -#define PCI_CLASS_SERIAL_SMBUS 0x0c05 - +#define PCI_BASE_CLASS_BRIDGE 0x06 #define PCI_CLASS_BRIDGE_HOST 0x0600 #define PCI_CLASS_BRIDGE_ISA 0x0601 +#define PCI_CLASS_BRIDGE_EISA 0x0602 +#define PCI_CLASS_BRIDGE_MC 0x0603 #define PCI_CLASS_BRIDGE_PCI 0x0604 #define PCI_CLASS_BRIDGE_PCI_INF_SUB 0x01 +#define PCI_CLASS_BRIDGE_PCMCIA 0x0605 +#define PCI_CLASS_BRIDGE_NUBUS 0x0606 +#define PCI_CLASS_BRIDGE_CARDBUS 0x0607 +#define PCI_CLASS_BRIDGE_RACEWAY 0x0608 +#define PCI_CLASS_BRIDGE_PCI_SEMITP 0x0609 +#define PCI_CLASS_BRIDGE_IB_PCI 0x060a #define PCI_CLASS_BRIDGE_OTHER 0x0680 +#define PCI_BASE_CLASS_COMMUNICATION 0x07 #define PCI_CLASS_COMMUNICATION_SERIAL 0x0700 +#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701 +#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702 +#define PCI_CLASS_COMMUNICATION_MODEM 0x0703 +#define PCI_CLASS_COMMUNICATION_GPIB 0x0704 +#define PCI_CLASS_COMMUNICATION_SC 0x0705 #define PCI_CLASS_COMMUNICATION_OTHER 0x0780 +#define PCI_BASE_CLASS_SYSTEM 0x08 +#define PCI_CLASS_SYSTEM_PIC 0x0800 +#define PCI_CLASS_SYSTEM_PIC_IOAPIC 0x080010 +#define PCI_CLASS_SYSTEM_PIC_IOXAPIC 0x080020 +#define PCI_CLASS_SYSTEM_DMA 0x0801 +#define PCI_CLASS_SYSTEM_TIMER 0x0802 +#define PCI_CLASS_SYSTEM_RTC 0x0803 +#define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804 +#define PCI_CLASS_SYSTEM_SDHCI 0x0805 +#define PCI_CLASS_SYSTEM_OTHER 0x0880 + +#define PCI_BASE_CLASS_INPUT 0x09 #define PCI_CLASS_INPUT_KEYBOARD 0x0900 #define PCI_CLASS_INPUT_PEN 0x0901 #define PCI_CLASS_INPUT_MOUSE 0x0902 @@ -55,8 +98,59 @@ #define PCI_CLASS_INPUT_GAMEPORT 0x0904 #define PCI_CLASS_INPUT_OTHER 0x0980 -#define PCI_CLASS_PROCESSOR_CO 0x0b40 +#define PCI_BASE_CLASS_DOCKING 0x0a +#define PCI_CLASS_DOCKING_GENERIC 0x0a00 +#define PCI_CLASS_DOCKING_OTHER 0x0a80 + +#define PCI_BASE_CLASS_PROCESSOR 0x0b +#define PCI_CLASS_PROCESSOR_PENTIUM 0x0b02 #define PCI_CLASS_PROCESSOR_POWERPC 0x0b20 +#define PCI_CLASS_PROCESSOR_MIPS 0x0b30 +#define PCI_CLASS_PROCESSOR_CO 0x0b40 + +#define PCI_BASE_CLASS_SERIAL 0x0c +#define PCI_CLASS_SERIAL_FIREWIRE 0x0c00 +#define PCI_CLASS_SERIAL_ACCESS 0x0c01 +#define PCI_CLASS_SERIAL_SSA 0x0c02 +#define PCI_CLASS_SERIAL_USB 0x0c03 +#define PCI_CLASS_SERIAL_USB_UHCI 0x0c0300 +#define PCI_CLASS_SERIAL_USB_OHCI 0x0c0310 +#define PCI_CLASS_SERIAL_USB_EHCI 0x0c0320 +#define PCI_CLASS_SERIAL_USB_XHCI 0x0c0330 +#define PCI_CLASS_SERIAL_USB_UNKNOWN 0x0c0380 +#define PCI_CLASS_SERIAL_USB_DEVICE 0x0c03fe +#define PCI_CLASS_SERIAL_FIBER 0x0c04 +#define PCI_CLASS_SERIAL_SMBUS 0x0c05 +#define PCI_CLASS_SERIAL_IB 0x0c06 +#define PCI_CLASS_SERIAL_IPMI 0x0c07 +#define PCI_CLASS_SERIAL_SERCOS 0x0c08 +#define PCI_CLASS_SERIAL_CANBUS 0x0c09 + +#define PCI_BASE_CLASS_WIRELESS 0x0d +#define PCI_CLASS_WIRELESS_IRDA 0x0d00 +#define PCI_CLASS_WIRELESS_CIR 0x0d01 +#define PCI_CLASS_WIRELESS_RF_CONTROLLER 0x0d10 +#define PCI_CLASS_WIRELESS_BLUETOOTH 0x0d11 +#define PCI_CLASS_WIRELESS_BROADBAND 0x0d12 +#define PCI_CLASS_WIRELESS_OTHER 0x0d80 + +#define PCI_BASE_CLASS_SATELLITE 0x0f +#define PCI_CLASS_SATELLITE_TV 0x0f00 +#define PCI_CLASS_SATELLITE_AUDIO 0x0f01 +#define PCI_CLASS_SATELLITE_VOICE 0x0f03 +#define PCI_CLASS_SATELLITE_DATA 0x0f04 + +#define PCI_BASE_CLASS_CRYPT 0x10 +#define PCI_CLASS_CRYPT_NETWORK 0x1000 +#define PCI_CLASS_CRYPT_ENTERTAINMENT 0x1001 +#define PCI_CLASS_CRYPT_OTHER 0x1080 + +#define PCI_BASE_CLASS_SIGNAL_PROCESSING 0x11 +#define PCI_CLASS_SP_DPIO 0x1100 +#define PCI_CLASS_SP_PERF 0x1101 +#define PCI_CLASS_SP_SYNCH 0x1110 +#define PCI_CLASS_SP_MANAGEMENT 0x1120 +#define PCI_CLASS_SP_OTHER 0x1180 #define PCI_CLASS_OTHERS 0xff diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index f9b17d8..cfd2711 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -58,7 +58,7 @@ struct sPAPRMachineState { struct VIOsPAPRBus *vio_bus; QLIST_HEAD(, sPAPRPHBState) phbs; struct sPAPRNVRAM *nvram; - XICSState *xics; + ICSState *ics; DeviceState *rtc; void *htab; @@ -94,6 +94,9 @@ struct sPAPRMachineState { /*< public >*/ char *kvm_type; MemoryHotplugState hotplug_memory; + + uint32_t nr_servers; + ICPState *icps; }; #define H_SUCCESS 0 diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h index fc6f673..2e9685a 100644 --- a/include/hw/ppc/spapr_vio.h +++ b/include/hw/ppc/spapr_vio.h @@ -87,7 +87,7 @@ static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - return xics_get_qirq(spapr->xics, dev->irq); + return xics_get_qirq(XICS_FABRIC(spapr), dev->irq); } static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr, diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 3f0c316..1945913 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -30,29 +30,6 @@ #include "hw/sysbus.h" -#define TYPE_XICS_COMMON "xics-common" -#define XICS_COMMON(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_COMMON) - -/* - * Retain xics as the type name to be compatible for migration. Rest all the - * functions, class and variables are renamed as xics_spapr. - */ -#define TYPE_XICS_SPAPR "xics" -#define XICS_SPAPR(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_SPAPR) - -#define TYPE_XICS_SPAPR_KVM "xics-spapr-kvm" -#define XICS_SPAPR_KVM(obj) \ - OBJECT_CHECK(KVMXICSState, (obj), TYPE_XICS_SPAPR_KVM) - -#define XICS_COMMON_CLASS(klass) \ - OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_COMMON) -#define XICS_SPAPR_CLASS(klass) \ - OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_SPAPR) -#define XICS_COMMON_GET_CLASS(obj) \ - OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_COMMON) -#define XICS_SPAPR_GET_CLASS(obj) \ - OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_SPAPR) - #define XICS_IPI 0x2 #define XICS_BUID 0x1 #define XICS_IRQ_BASE (XICS_BUID << 12) @@ -62,31 +39,12 @@ * (the kernel implementation supports more but we don't exploit * that yet) */ -typedef struct XICSStateClass XICSStateClass; -typedef struct XICSState XICSState; typedef struct ICPStateClass ICPStateClass; typedef struct ICPState ICPState; typedef struct ICSStateClass ICSStateClass; typedef struct ICSState ICSState; typedef struct ICSIRQState ICSIRQState; - -struct XICSStateClass { - DeviceClass parent_class; - - void (*cpu_setup)(XICSState *icp, PowerPCCPU *cpu); - void (*set_nr_irqs)(XICSState *icp, uint32_t nr_irqs, Error **errp); - void (*set_nr_servers)(XICSState *icp, uint32_t nr_servers, Error **errp); -}; - -struct XICSState { - /*< private >*/ - SysBusDevice parent_obj; - /*< public >*/ - uint32_t nr_servers; - uint32_t nr_irqs; - ICPState *ss; - QLIST_HEAD(, ICSState) ics; -}; +typedef struct XICSFabric XICSFabric; #define TYPE_ICP "icp" #define ICP(obj) OBJECT_CHECK(ICPState, (obj), TYPE_ICP) @@ -104,6 +62,7 @@ struct ICPStateClass { void (*pre_save)(ICPState *s); int (*post_load)(ICPState *s, int version_id); + void (*cpu_setup)(ICPState *icp, PowerPCCPU *cpu); }; struct ICPState { @@ -118,7 +77,7 @@ struct ICPState { qemu_irq output; bool cap_irq_xics_enabled; - XICSState *xics; + XICSFabric *xics; }; #define TYPE_ICS_BASE "ics-base" @@ -139,6 +98,7 @@ struct ICPState { struct ICSStateClass { DeviceClass parent_class; + void (*realize)(DeviceState *dev, Error **errp); void (*pre_save)(ICSState *s); int (*post_load)(ICSState *s, int version_id); void (*reject)(ICSState *s, uint32_t irq); @@ -154,8 +114,7 @@ struct ICSState { uint32_t offset; qemu_irq *qirqs; ICSIRQState *irqs; - XICSState *xics; - QLIST_ENTRY(ICSState) list; + XICSFabric *xics; }; static inline bool ics_valid_irq(ICSState *ics, uint32_t nr) @@ -180,19 +139,37 @@ struct ICSIRQState { uint8_t flags; }; +typedef struct XICSFabric { + Object parent; +} XICSFabric; + +#define TYPE_XICS_FABRIC "xics-fabric" +#define XICS_FABRIC(obj) \ + OBJECT_CHECK(XICSFabric, (obj), TYPE_XICS_FABRIC) +#define XICS_FABRIC_CLASS(klass) \ + OBJECT_CLASS_CHECK(XICSFabricClass, (klass), TYPE_XICS_FABRIC) +#define XICS_FABRIC_GET_CLASS(obj) \ + OBJECT_GET_CLASS(XICSFabricClass, (obj), TYPE_XICS_FABRIC) + +typedef struct XICSFabricClass { + InterfaceClass parent; + ICSState *(*ics_get)(XICSFabric *xi, int irq); + void (*ics_resend)(XICSFabric *xi); + ICPState *(*icp_get)(XICSFabric *xi, int server); +} XICSFabricClass; + #define XICS_IRQS_SPAPR 1024 -qemu_irq xics_get_qirq(XICSState *icp, int irq); -int xics_spapr_alloc(XICSState *icp, int irq_hint, bool lsi, Error **errp); -int xics_spapr_alloc_block(XICSState *icp, int num, bool lsi, bool align, +int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp); +int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi, bool align, Error **errp); -void xics_spapr_free(XICSState *icp, int irq, int num); -void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle); +void spapr_ics_free(ICSState *ics, int irq, int num); +void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle); -void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu); -void xics_cpu_destroy(XICSState *icp, PowerPCCPU *cpu); -void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers, - const char *typename, Error **errp); +qemu_irq xics_get_qirq(XICSFabric *xi, int irq); +ICPState *xics_icp_get(XICSFabric *xi, int server); +void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu); +void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu); /* Internal XICS interfaces */ int xics_get_cpu_index_by_dt_id(int cpu_dt_id); @@ -207,7 +184,15 @@ void ics_simple_write_xive(ICSState *ics, int nr, int server, uint8_t priority, uint8_t saved_priority); void ics_set_irq_type(ICSState *ics, int srcno, bool lsi); +void icp_pic_print_info(ICPState *icp, Monitor *mon); +void ics_pic_print_info(ICSState *ics, Monitor *mon); + +void ics_resend(ICSState *ics); +void icp_resend(ICPState *ss); + +typedef struct sPAPRMachineState sPAPRMachineState; -ICSState *xics_find_source(XICSState *icp, int irq); +int xics_kvm_init(sPAPRMachineState *spapr, Error **errp); +int xics_spapr_init(sPAPRMachineState *spapr, Error **errp); #endif /* XICS_H */ diff --git a/include/hw/ptimer.h b/include/hw/ptimer.h index 48cccbd..eafc3f0 100644 --- a/include/hw/ptimer.h +++ b/include/hw/ptimer.h @@ -60,6 +60,7 @@ typedef struct ptimer_state ptimer_state; typedef void (*ptimer_cb)(void *opaque); ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask); +void ptimer_free(ptimer_state *s); void ptimer_set_period(ptimer_state *s, int64_t period); void ptimer_set_freq(ptimer_state *s, uint32_t freq); uint64_t ptimer_get_limit(ptimer_state *s); diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h index 79909b2..96caefe 100644 --- a/include/hw/sd/sd.h +++ b/include/hw/sd/sd.h @@ -140,6 +140,17 @@ uint8_t sdbus_read_data(SDBus *sd); bool sdbus_data_ready(SDBus *sd); bool sdbus_get_inserted(SDBus *sd); bool sdbus_get_readonly(SDBus *sd); +/** + * sdbus_reparent_card: Reparent an SD card from one controller to another + * @from: controller bus to remove card from + * @to: controller bus to move card to + * + * Reparent an SD card, effectively unplugging it from one controller + * and inserting it into another. This is useful for SoCs like the + * bcm2835 which have two SD controllers and connect a single SD card + * to them, selected by the guest reprogramming GPIO line routing. + */ +void sdbus_reparent_card(SDBus *from, SDBus *to); /* Functions to be used by SD devices to report back to qdevified controllers */ void sdbus_set_inserted(SDBus *sd, bool inserted); diff --git a/include/hw/timer/armv7m_systick.h b/include/hw/timer/armv7m_systick.h new file mode 100644 index 0000000..cca04de --- /dev/null +++ b/include/hw/timer/armv7m_systick.h @@ -0,0 +1,34 @@ +/* + * ARMv7M SysTick timer + * + * Copyright (c) 2006-2007 CodeSourcery. + * Written by Paul Brook + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell + * + * This code is licensed under the GPL (version 2 or later). + */ + +#ifndef HW_TIMER_ARMV7M_SYSTICK_H +#define HW_TIMER_ARMV7M_SYSTICK_H + +#include "hw/sysbus.h" + +#define TYPE_SYSTICK "armv7m_systick" + +#define SYSTICK(obj) OBJECT_CHECK(SysTickState, (obj), TYPE_SYSTICK) + +typedef struct SysTickState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint32_t control; + uint32_t reload; + int64_t tick; + QEMUTimer *timer; + MemoryRegion iomem; + qemu_irq irq; +} SysTickState; + +#endif diff --git a/include/migration/migration.h b/include/migration/migration.h index 1735d66..5720c88 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -22,6 +22,7 @@ #include "qapi-types.h" #include "exec/cpu-common.h" #include "qemu/coroutine_int.h" +#include "qom/object.h" #define QEMU_VM_FILE_MAGIC 0x5145564d #define QEMU_VM_FILE_VERSION_COMPAT 0x00000002 @@ -92,6 +93,7 @@ struct MigrationIncomingState { */ QemuEvent main_thread_load_event; + size_t largest_page_size; bool have_fault_thread; QemuThread fault_thread; QemuSemaphore fault_thread_sem; @@ -107,6 +109,7 @@ struct MigrationIncomingState { QEMUFile *to_src_file; QemuMutex rp_mutex; /* We send replies from multiple threads */ void *postcopy_tmp_page; + void *postcopy_tmp_zero_page; QEMUBH *bh; @@ -313,6 +316,8 @@ int migrate_add_blocker(Error *reason, Error **errp); */ void migrate_del_blocker(Error *reason); +int check_migratable(Object *obj, Error **err); + bool migrate_release_ram(void); bool migrate_postcopy_ram(void); bool migrate_zero_blocks(void); @@ -375,6 +380,7 @@ void global_state_store_running(void); void flush_page_queue(MigrationState *ms); int ram_save_queue_pages(MigrationState *ms, const char *rbname, ram_addr_t start, ram_addr_t len); +uint64_t ram_pagesize_summary(void); PostcopyState postcopy_state_get(void); /* Set the state and return the old state */ diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h index b6a7491f..8e036b9 100644 --- a/include/migration/postcopy-ram.h +++ b/include/migration/postcopy-ram.h @@ -35,13 +35,6 @@ int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages); int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis); /* - * Discard the contents of 'length' bytes from 'start' - * We can assume that if we've been called postcopy_ram_hosttest returned true - */ -int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, - size_t length); - -/* * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard * however leaving it until after precopy means that most of the precopy * data is still THPd @@ -81,13 +74,15 @@ void postcopy_discard_send_finish(MigrationState *ms, * to use other postcopy_ routines to allocate. * returns 0 on success */ -int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from); +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, + size_t pagesize); /* * Place a zero page at (host) atomically * returns 0 on success */ -int postcopy_place_page_zero(MigrationIncomingState *mis, void *host); +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, + size_t pagesize); /* * Allocate a page of memory that can be mapped at a later point in time diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 63e7b02..f2dbf84 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -253,6 +253,10 @@ extern const VMStateInfo vmstate_info_uint16; extern const VMStateInfo vmstate_info_uint32; extern const VMStateInfo vmstate_info_uint64; +/** Put this in the stream when migrating a null pointer.*/ +#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ +extern const VMStateInfo vmstate_info_nullptr; + extern const VMStateInfo vmstate_info_float64; extern const VMStateInfo vmstate_info_cpudouble; diff --git a/include/qemu-io.h b/include/qemu-io.h index 4d402b9..196fde0 100644 --- a/include/qemu-io.h +++ b/include/qemu-io.h @@ -36,6 +36,7 @@ typedef struct cmdinfo { const char *args; const char *oneline; helpfunc_t help; + uint64_t perm; } cmdinfo_t; extern bool qemuio_misalign; diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 9abed51..26e6285 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -610,7 +610,10 @@ void timer_deinit(QEMUTimer *ts); * * Free a timer (it must not be on the active list) */ -void timer_free(QEMUTimer *ts); +static inline void timer_free(QEMUTimer *ts) +{ + g_free(ts); +} /** * timer_del: diff --git a/include/standard-headers/asm-x86/hyperv.h b/include/standard-headers/asm-x86/hyperv.h index 47b38fb..eca9a2c 100644 --- a/include/standard-headers/asm-x86/hyperv.h +++ b/include/standard-headers/asm-x86/hyperv.h @@ -73,6 +73,9 @@ */ #define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8) +/* Crash MSR available */ +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10) + /* * Feature identification: EBX indicates which flags were specified at * partition creation. The format is the same as the partition creation @@ -144,6 +147,11 @@ */ #define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) +/* + * Crash notification flag. + */ +#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) + /* MSR used to identify the guest OS. */ #define HV_X64_MSR_GUEST_OS_ID 0x40000000 diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index 5c10f7e..c8b3338 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -640,7 +640,7 @@ * Control a data application associated with the currently viewed channel, * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) */ -#define KEY_DATA 0x275 +#define KEY_DATA 0x277 #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index e5a2e68..634c9c4 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -23,6 +23,14 @@ #define LINUX_PCI_REGS_H /* + * Conventional PCI and PCI-X Mode 1 devices have 256 bytes of + * configuration space. PCI-X Mode 2 and PCIe devices have 4096 bytes of + * configuration space. + */ +#define PCI_CFG_SPACE_SIZE 256 +#define PCI_CFG_SPACE_EXP_SIZE 4096 + +/* * Under PCI, each device has 256 bytes of configuration address space, * of which the first 64 bytes are standardized as follows: */ @@ -674,6 +682,7 @@ #define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */ #define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */ #define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */ +#define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */ #define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PTM @@ -965,6 +974,7 @@ #define PCI_EXP_DPC_STATUS 8 /* DPC Status */ #define PCI_EXP_DPC_STATUS_TRIGGER 0x01 /* Trigger Status */ #define PCI_EXP_DPC_STATUS_INTERRUPT 0x08 /* Interrupt Status */ +#define PCI_EXP_DPC_RP_BUSY 0x10 /* Root Port Busy */ #define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ @@ -977,4 +987,19 @@ #define PCI_PTM_CTRL_ENABLE 0x00000001 /* PTM enable */ #define PCI_PTM_CTRL_ROOT 0x00000002 /* Root select */ +/* L1 PM Substates */ +#define PCI_L1SS_CAP 4 /* capability register */ +#define PCI_L1SS_CAP_PCIPM_L1_2 1 /* PCI PM L1.2 Support */ +#define PCI_L1SS_CAP_PCIPM_L1_1 2 /* PCI PM L1.1 Support */ +#define PCI_L1SS_CAP_ASPM_L1_2 4 /* ASPM L1.2 Support */ +#define PCI_L1SS_CAP_ASPM_L1_1 8 /* ASPM L1.1 Support */ +#define PCI_L1SS_CAP_L1_PM_SS 16 /* L1 PM Substates Support */ +#define PCI_L1SS_CTL1 8 /* Control Register 1 */ +#define PCI_L1SS_CTL1_PCIPM_L1_2 1 /* PCI PM L1.2 Enable */ +#define PCI_L1SS_CTL1_PCIPM_L1_1 2 /* PCI PM L1.1 Support */ +#define PCI_L1SS_CTL1_ASPM_L1_2 4 /* ASPM L1.2 Support */ +#define PCI_L1SS_CTL1_ASPM_L1_1 8 /* ASPM L1.1 Support */ +#define PCI_L1SS_CTL1_L1SS_MASK 0x0000000F +#define PCI_L1SS_CTL2 0xC /* Control Register 2 */ + #endif /* LINUX_PCI_REGS_H */ diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h index fe74e42..6d5c3b2 100644 --- a/include/standard-headers/linux/virtio_ids.h +++ b/include/standard-headers/linux/virtio_ids.h @@ -43,4 +43,5 @@ #define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ + #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index f365a51..096c17f 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -34,7 +34,7 @@ typedef struct BlockDevOps { * changes. Sure would be useful if it did. * Device models with removable media must implement this callback. */ - void (*change_media_cb)(void *opaque, bool load); + void (*change_media_cb)(void *opaque, bool load, Error **errp); /* * Runs when an eject request is issued from the monitor, the tray * is closed, and the medium is locked. @@ -84,7 +84,7 @@ typedef struct BlockBackendPublic { QLIST_ENTRY(BlockBackendPublic) round_robin; } BlockBackendPublic; -BlockBackend *blk_new(void); +BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm); BlockBackend *blk_new_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp); int blk_get_refcnt(BlockBackend *blk); @@ -102,9 +102,12 @@ BlockBackend *blk_by_public(BlockBackendPublic *public); BlockDriverState *blk_bs(BlockBackend *blk); void blk_remove_bs(BlockBackend *blk); -void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs); +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); bool bdrv_has_blk(BlockDriverState *bs); bool bdrv_is_root_node(BlockDriverState *bs); +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp); +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); void blk_iostatus_enable(BlockBackend *blk); diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h index 2fb7859..1101d55 100644 --- a/linux-headers/asm-arm/kvm.h +++ b/linux-headers/asm-arm/kvm.h @@ -87,9 +87,11 @@ struct kvm_regs { /* Supported VGICv3 address types */ #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 +#define KVM_VGIC_ITS_ADDR_TYPE 4 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) +#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K) #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */ @@ -179,10 +181,23 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* KVM_IRQ_LINE irq field index values */ diff --git a/linux-headers/asm-arm/unistd-common.h b/linux-headers/asm-arm/unistd-common.h new file mode 100644 index 0000000..13a74af --- /dev/null +++ b/linux-headers/asm-arm/unistd-common.h @@ -0,0 +1,357 @@ +#ifndef _ASM_ARM_UNISTD_COMMON_H +#define _ASM_ARM_UNISTD_COMMON_H 1 + +#define __NR_restart_syscall (__NR_SYSCALL_BASE + 0) +#define __NR_exit (__NR_SYSCALL_BASE + 1) +#define __NR_fork (__NR_SYSCALL_BASE + 2) +#define __NR_read (__NR_SYSCALL_BASE + 3) +#define __NR_write (__NR_SYSCALL_BASE + 4) +#define __NR_open (__NR_SYSCALL_BASE + 5) +#define __NR_close (__NR_SYSCALL_BASE + 6) +#define __NR_creat (__NR_SYSCALL_BASE + 8) +#define __NR_link (__NR_SYSCALL_BASE + 9) +#define __NR_unlink (__NR_SYSCALL_BASE + 10) +#define __NR_execve (__NR_SYSCALL_BASE + 11) +#define __NR_chdir (__NR_SYSCALL_BASE + 12) +#define __NR_mknod (__NR_SYSCALL_BASE + 14) +#define __NR_chmod (__NR_SYSCALL_BASE + 15) +#define __NR_lchown (__NR_SYSCALL_BASE + 16) +#define __NR_lseek (__NR_SYSCALL_BASE + 19) +#define __NR_getpid (__NR_SYSCALL_BASE + 20) +#define __NR_mount (__NR_SYSCALL_BASE + 21) +#define __NR_setuid (__NR_SYSCALL_BASE + 23) +#define __NR_getuid (__NR_SYSCALL_BASE + 24) +#define __NR_ptrace (__NR_SYSCALL_BASE + 26) +#define __NR_pause (__NR_SYSCALL_BASE + 29) +#define __NR_access (__NR_SYSCALL_BASE + 33) +#define __NR_nice (__NR_SYSCALL_BASE + 34) +#define __NR_sync (__NR_SYSCALL_BASE + 36) +#define __NR_kill (__NR_SYSCALL_BASE + 37) +#define __NR_rename (__NR_SYSCALL_BASE + 38) +#define __NR_mkdir (__NR_SYSCALL_BASE + 39) +#define __NR_rmdir (__NR_SYSCALL_BASE + 40) +#define __NR_dup (__NR_SYSCALL_BASE + 41) +#define __NR_pipe (__NR_SYSCALL_BASE + 42) +#define __NR_times (__NR_SYSCALL_BASE + 43) +#define __NR_brk (__NR_SYSCALL_BASE + 45) +#define __NR_setgid (__NR_SYSCALL_BASE + 46) +#define __NR_getgid (__NR_SYSCALL_BASE + 47) +#define __NR_geteuid (__NR_SYSCALL_BASE + 49) +#define __NR_getegid (__NR_SYSCALL_BASE + 50) +#define __NR_acct (__NR_SYSCALL_BASE + 51) +#define __NR_umount2 (__NR_SYSCALL_BASE + 52) +#define __NR_ioctl (__NR_SYSCALL_BASE + 54) +#define __NR_fcntl (__NR_SYSCALL_BASE + 55) +#define __NR_setpgid (__NR_SYSCALL_BASE + 57) +#define __NR_umask (__NR_SYSCALL_BASE + 60) +#define __NR_chroot (__NR_SYSCALL_BASE + 61) +#define __NR_ustat (__NR_SYSCALL_BASE + 62) +#define __NR_dup2 (__NR_SYSCALL_BASE + 63) +#define __NR_getppid (__NR_SYSCALL_BASE + 64) +#define __NR_getpgrp (__NR_SYSCALL_BASE + 65) +#define __NR_setsid (__NR_SYSCALL_BASE + 66) +#define __NR_sigaction (__NR_SYSCALL_BASE + 67) +#define __NR_setreuid (__NR_SYSCALL_BASE + 70) +#define __NR_setregid (__NR_SYSCALL_BASE + 71) +#define __NR_sigsuspend (__NR_SYSCALL_BASE + 72) +#define __NR_sigpending (__NR_SYSCALL_BASE + 73) +#define __NR_sethostname (__NR_SYSCALL_BASE + 74) +#define __NR_setrlimit (__NR_SYSCALL_BASE + 75) +#define __NR_getrusage (__NR_SYSCALL_BASE + 77) +#define __NR_gettimeofday (__NR_SYSCALL_BASE + 78) +#define __NR_settimeofday (__NR_SYSCALL_BASE + 79) +#define __NR_getgroups (__NR_SYSCALL_BASE + 80) +#define __NR_setgroups (__NR_SYSCALL_BASE + 81) +#define __NR_symlink (__NR_SYSCALL_BASE + 83) +#define __NR_readlink (__NR_SYSCALL_BASE + 85) +#define __NR_uselib (__NR_SYSCALL_BASE + 86) +#define __NR_swapon (__NR_SYSCALL_BASE + 87) +#define __NR_reboot (__NR_SYSCALL_BASE + 88) +#define __NR_munmap (__NR_SYSCALL_BASE + 91) +#define __NR_truncate (__NR_SYSCALL_BASE + 92) +#define __NR_ftruncate (__NR_SYSCALL_BASE + 93) +#define __NR_fchmod (__NR_SYSCALL_BASE + 94) +#define __NR_fchown (__NR_SYSCALL_BASE + 95) +#define __NR_getpriority (__NR_SYSCALL_BASE + 96) +#define __NR_setpriority (__NR_SYSCALL_BASE + 97) +#define __NR_statfs (__NR_SYSCALL_BASE + 99) +#define __NR_fstatfs (__NR_SYSCALL_BASE + 100) +#define __NR_syslog (__NR_SYSCALL_BASE + 103) +#define __NR_setitimer (__NR_SYSCALL_BASE + 104) +#define __NR_getitimer (__NR_SYSCALL_BASE + 105) +#define __NR_stat (__NR_SYSCALL_BASE + 106) +#define __NR_lstat (__NR_SYSCALL_BASE + 107) +#define __NR_fstat (__NR_SYSCALL_BASE + 108) +#define __NR_vhangup (__NR_SYSCALL_BASE + 111) +#define __NR_wait4 (__NR_SYSCALL_BASE + 114) +#define __NR_swapoff (__NR_SYSCALL_BASE + 115) +#define __NR_sysinfo (__NR_SYSCALL_BASE + 116) +#define __NR_fsync (__NR_SYSCALL_BASE + 118) +#define __NR_sigreturn (__NR_SYSCALL_BASE + 119) +#define __NR_clone (__NR_SYSCALL_BASE + 120) +#define __NR_setdomainname (__NR_SYSCALL_BASE + 121) +#define __NR_uname (__NR_SYSCALL_BASE + 122) +#define __NR_adjtimex (__NR_SYSCALL_BASE + 124) +#define __NR_mprotect (__NR_SYSCALL_BASE + 125) +#define __NR_sigprocmask (__NR_SYSCALL_BASE + 126) +#define __NR_init_module (__NR_SYSCALL_BASE + 128) +#define __NR_delete_module (__NR_SYSCALL_BASE + 129) +#define __NR_quotactl (__NR_SYSCALL_BASE + 131) +#define __NR_getpgid (__NR_SYSCALL_BASE + 132) +#define __NR_fchdir (__NR_SYSCALL_BASE + 133) +#define __NR_bdflush (__NR_SYSCALL_BASE + 134) +#define __NR_sysfs (__NR_SYSCALL_BASE + 135) +#define __NR_personality (__NR_SYSCALL_BASE + 136) +#define __NR_setfsuid (__NR_SYSCALL_BASE + 138) +#define __NR_setfsgid (__NR_SYSCALL_BASE + 139) +#define __NR__llseek (__NR_SYSCALL_BASE + 140) +#define __NR_getdents (__NR_SYSCALL_BASE + 141) +#define __NR__newselect (__NR_SYSCALL_BASE + 142) +#define __NR_flock (__NR_SYSCALL_BASE + 143) +#define __NR_msync (__NR_SYSCALL_BASE + 144) +#define __NR_readv (__NR_SYSCALL_BASE + 145) +#define __NR_writev (__NR_SYSCALL_BASE + 146) +#define __NR_getsid (__NR_SYSCALL_BASE + 147) +#define __NR_fdatasync (__NR_SYSCALL_BASE + 148) +#define __NR__sysctl (__NR_SYSCALL_BASE + 149) +#define __NR_mlock (__NR_SYSCALL_BASE + 150) +#define __NR_munlock (__NR_SYSCALL_BASE + 151) +#define __NR_mlockall (__NR_SYSCALL_BASE + 152) +#define __NR_munlockall (__NR_SYSCALL_BASE + 153) +#define __NR_sched_setparam (__NR_SYSCALL_BASE + 154) +#define __NR_sched_getparam (__NR_SYSCALL_BASE + 155) +#define __NR_sched_setscheduler (__NR_SYSCALL_BASE + 156) +#define __NR_sched_getscheduler (__NR_SYSCALL_BASE + 157) +#define __NR_sched_yield (__NR_SYSCALL_BASE + 158) +#define __NR_sched_get_priority_max (__NR_SYSCALL_BASE + 159) +#define __NR_sched_get_priority_min (__NR_SYSCALL_BASE + 160) +#define __NR_sched_rr_get_interval (__NR_SYSCALL_BASE + 161) +#define __NR_nanosleep (__NR_SYSCALL_BASE + 162) +#define __NR_mremap (__NR_SYSCALL_BASE + 163) +#define __NR_setresuid (__NR_SYSCALL_BASE + 164) +#define __NR_getresuid (__NR_SYSCALL_BASE + 165) +#define __NR_poll (__NR_SYSCALL_BASE + 168) +#define __NR_nfsservctl (__NR_SYSCALL_BASE + 169) +#define __NR_setresgid (__NR_SYSCALL_BASE + 170) +#define __NR_getresgid (__NR_SYSCALL_BASE + 171) +#define __NR_prctl (__NR_SYSCALL_BASE + 172) +#define __NR_rt_sigreturn (__NR_SYSCALL_BASE + 173) +#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) +#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) +#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176) +#define __NR_rt_sigtimedwait (__NR_SYSCALL_BASE + 177) +#define __NR_rt_sigqueueinfo (__NR_SYSCALL_BASE + 178) +#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179) +#define __NR_pread64 (__NR_SYSCALL_BASE + 180) +#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181) +#define __NR_chown (__NR_SYSCALL_BASE + 182) +#define __NR_getcwd (__NR_SYSCALL_BASE + 183) +#define __NR_capget (__NR_SYSCALL_BASE + 184) +#define __NR_capset (__NR_SYSCALL_BASE + 185) +#define __NR_sigaltstack (__NR_SYSCALL_BASE + 186) +#define __NR_sendfile (__NR_SYSCALL_BASE + 187) +#define __NR_vfork (__NR_SYSCALL_BASE + 190) +#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191) +#define __NR_mmap2 (__NR_SYSCALL_BASE + 192) +#define __NR_truncate64 (__NR_SYSCALL_BASE + 193) +#define __NR_ftruncate64 (__NR_SYSCALL_BASE + 194) +#define __NR_stat64 (__NR_SYSCALL_BASE + 195) +#define __NR_lstat64 (__NR_SYSCALL_BASE + 196) +#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) +#define __NR_lchown32 (__NR_SYSCALL_BASE + 198) +#define __NR_getuid32 (__NR_SYSCALL_BASE + 199) +#define __NR_getgid32 (__NR_SYSCALL_BASE + 200) +#define __NR_geteuid32 (__NR_SYSCALL_BASE + 201) +#define __NR_getegid32 (__NR_SYSCALL_BASE + 202) +#define __NR_setreuid32 (__NR_SYSCALL_BASE + 203) +#define __NR_setregid32 (__NR_SYSCALL_BASE + 204) +#define __NR_getgroups32 (__NR_SYSCALL_BASE + 205) +#define __NR_setgroups32 (__NR_SYSCALL_BASE + 206) +#define __NR_fchown32 (__NR_SYSCALL_BASE + 207) +#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208) +#define __NR_getresuid32 (__NR_SYSCALL_BASE + 209) +#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210) +#define __NR_getresgid32 (__NR_SYSCALL_BASE + 211) +#define __NR_chown32 (__NR_SYSCALL_BASE + 212) +#define __NR_setuid32 (__NR_SYSCALL_BASE + 213) +#define __NR_setgid32 (__NR_SYSCALL_BASE + 214) +#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215) +#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216) +#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) +#define __NR_pivot_root (__NR_SYSCALL_BASE + 218) +#define __NR_mincore (__NR_SYSCALL_BASE + 219) +#define __NR_madvise (__NR_SYSCALL_BASE + 220) +#define __NR_fcntl64 (__NR_SYSCALL_BASE + 221) +#define __NR_gettid (__NR_SYSCALL_BASE + 224) +#define __NR_readahead (__NR_SYSCALL_BASE + 225) +#define __NR_setxattr (__NR_SYSCALL_BASE + 226) +#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227) +#define __NR_fsetxattr (__NR_SYSCALL_BASE + 228) +#define __NR_getxattr (__NR_SYSCALL_BASE + 229) +#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230) +#define __NR_fgetxattr (__NR_SYSCALL_BASE + 231) +#define __NR_listxattr (__NR_SYSCALL_BASE + 232) +#define __NR_llistxattr (__NR_SYSCALL_BASE + 233) +#define __NR_flistxattr (__NR_SYSCALL_BASE + 234) +#define __NR_removexattr (__NR_SYSCALL_BASE + 235) +#define __NR_lremovexattr (__NR_SYSCALL_BASE + 236) +#define __NR_fremovexattr (__NR_SYSCALL_BASE + 237) +#define __NR_tkill (__NR_SYSCALL_BASE + 238) +#define __NR_sendfile64 (__NR_SYSCALL_BASE + 239) +#define __NR_futex (__NR_SYSCALL_BASE + 240) +#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241) +#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242) +#define __NR_io_setup (__NR_SYSCALL_BASE + 243) +#define __NR_io_destroy (__NR_SYSCALL_BASE + 244) +#define __NR_io_getevents (__NR_SYSCALL_BASE + 245) +#define __NR_io_submit (__NR_SYSCALL_BASE + 246) +#define __NR_io_cancel (__NR_SYSCALL_BASE + 247) +#define __NR_exit_group (__NR_SYSCALL_BASE + 248) +#define __NR_lookup_dcookie (__NR_SYSCALL_BASE + 249) +#define __NR_epoll_create (__NR_SYSCALL_BASE + 250) +#define __NR_epoll_ctl (__NR_SYSCALL_BASE + 251) +#define __NR_epoll_wait (__NR_SYSCALL_BASE + 252) +#define __NR_remap_file_pages (__NR_SYSCALL_BASE + 253) +#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256) +#define __NR_timer_create (__NR_SYSCALL_BASE + 257) +#define __NR_timer_settime (__NR_SYSCALL_BASE + 258) +#define __NR_timer_gettime (__NR_SYSCALL_BASE + 259) +#define __NR_timer_getoverrun (__NR_SYSCALL_BASE + 260) +#define __NR_timer_delete (__NR_SYSCALL_BASE + 261) +#define __NR_clock_settime (__NR_SYSCALL_BASE + 262) +#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263) +#define __NR_clock_getres (__NR_SYSCALL_BASE + 264) +#define __NR_clock_nanosleep (__NR_SYSCALL_BASE + 265) +#define __NR_statfs64 (__NR_SYSCALL_BASE + 266) +#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267) +#define __NR_tgkill (__NR_SYSCALL_BASE + 268) +#define __NR_utimes (__NR_SYSCALL_BASE + 269) +#define __NR_arm_fadvise64_64 (__NR_SYSCALL_BASE + 270) +#define __NR_pciconfig_iobase (__NR_SYSCALL_BASE + 271) +#define __NR_pciconfig_read (__NR_SYSCALL_BASE + 272) +#define __NR_pciconfig_write (__NR_SYSCALL_BASE + 273) +#define __NR_mq_open (__NR_SYSCALL_BASE + 274) +#define __NR_mq_unlink (__NR_SYSCALL_BASE + 275) +#define __NR_mq_timedsend (__NR_SYSCALL_BASE + 276) +#define __NR_mq_timedreceive (__NR_SYSCALL_BASE + 277) +#define __NR_mq_notify (__NR_SYSCALL_BASE + 278) +#define __NR_mq_getsetattr (__NR_SYSCALL_BASE + 279) +#define __NR_waitid (__NR_SYSCALL_BASE + 280) +#define __NR_socket (__NR_SYSCALL_BASE + 281) +#define __NR_bind (__NR_SYSCALL_BASE + 282) +#define __NR_connect (__NR_SYSCALL_BASE + 283) +#define __NR_listen (__NR_SYSCALL_BASE + 284) +#define __NR_accept (__NR_SYSCALL_BASE + 285) +#define __NR_getsockname (__NR_SYSCALL_BASE + 286) +#define __NR_getpeername (__NR_SYSCALL_BASE + 287) +#define __NR_socketpair (__NR_SYSCALL_BASE + 288) +#define __NR_send (__NR_SYSCALL_BASE + 289) +#define __NR_sendto (__NR_SYSCALL_BASE + 290) +#define __NR_recv (__NR_SYSCALL_BASE + 291) +#define __NR_recvfrom (__NR_SYSCALL_BASE + 292) +#define __NR_shutdown (__NR_SYSCALL_BASE + 293) +#define __NR_setsockopt (__NR_SYSCALL_BASE + 294) +#define __NR_getsockopt (__NR_SYSCALL_BASE + 295) +#define __NR_sendmsg (__NR_SYSCALL_BASE + 296) +#define __NR_recvmsg (__NR_SYSCALL_BASE + 297) +#define __NR_semop (__NR_SYSCALL_BASE + 298) +#define __NR_semget (__NR_SYSCALL_BASE + 299) +#define __NR_semctl (__NR_SYSCALL_BASE + 300) +#define __NR_msgsnd (__NR_SYSCALL_BASE + 301) +#define __NR_msgrcv (__NR_SYSCALL_BASE + 302) +#define __NR_msgget (__NR_SYSCALL_BASE + 303) +#define __NR_msgctl (__NR_SYSCALL_BASE + 304) +#define __NR_shmat (__NR_SYSCALL_BASE + 305) +#define __NR_shmdt (__NR_SYSCALL_BASE + 306) +#define __NR_shmget (__NR_SYSCALL_BASE + 307) +#define __NR_shmctl (__NR_SYSCALL_BASE + 308) +#define __NR_add_key (__NR_SYSCALL_BASE + 309) +#define __NR_request_key (__NR_SYSCALL_BASE + 310) +#define __NR_keyctl (__NR_SYSCALL_BASE + 311) +#define __NR_semtimedop (__NR_SYSCALL_BASE + 312) +#define __NR_vserver (__NR_SYSCALL_BASE + 313) +#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314) +#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315) +#define __NR_inotify_init (__NR_SYSCALL_BASE + 316) +#define __NR_inotify_add_watch (__NR_SYSCALL_BASE + 317) +#define __NR_inotify_rm_watch (__NR_SYSCALL_BASE + 318) +#define __NR_mbind (__NR_SYSCALL_BASE + 319) +#define __NR_get_mempolicy (__NR_SYSCALL_BASE + 320) +#define __NR_set_mempolicy (__NR_SYSCALL_BASE + 321) +#define __NR_openat (__NR_SYSCALL_BASE + 322) +#define __NR_mkdirat (__NR_SYSCALL_BASE + 323) +#define __NR_mknodat (__NR_SYSCALL_BASE + 324) +#define __NR_fchownat (__NR_SYSCALL_BASE + 325) +#define __NR_futimesat (__NR_SYSCALL_BASE + 326) +#define __NR_fstatat64 (__NR_SYSCALL_BASE + 327) +#define __NR_unlinkat (__NR_SYSCALL_BASE + 328) +#define __NR_renameat (__NR_SYSCALL_BASE + 329) +#define __NR_linkat (__NR_SYSCALL_BASE + 330) +#define __NR_symlinkat (__NR_SYSCALL_BASE + 331) +#define __NR_readlinkat (__NR_SYSCALL_BASE + 332) +#define __NR_fchmodat (__NR_SYSCALL_BASE + 333) +#define __NR_faccessat (__NR_SYSCALL_BASE + 334) +#define __NR_pselect6 (__NR_SYSCALL_BASE + 335) +#define __NR_ppoll (__NR_SYSCALL_BASE + 336) +#define __NR_unshare (__NR_SYSCALL_BASE + 337) +#define __NR_set_robust_list (__NR_SYSCALL_BASE + 338) +#define __NR_get_robust_list (__NR_SYSCALL_BASE + 339) +#define __NR_splice (__NR_SYSCALL_BASE + 340) +#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE + 341) +#define __NR_tee (__NR_SYSCALL_BASE + 342) +#define __NR_vmsplice (__NR_SYSCALL_BASE + 343) +#define __NR_move_pages (__NR_SYSCALL_BASE + 344) +#define __NR_getcpu (__NR_SYSCALL_BASE + 345) +#define __NR_epoll_pwait (__NR_SYSCALL_BASE + 346) +#define __NR_kexec_load (__NR_SYSCALL_BASE + 347) +#define __NR_utimensat (__NR_SYSCALL_BASE + 348) +#define __NR_signalfd (__NR_SYSCALL_BASE + 349) +#define __NR_timerfd_create (__NR_SYSCALL_BASE + 350) +#define __NR_eventfd (__NR_SYSCALL_BASE + 351) +#define __NR_fallocate (__NR_SYSCALL_BASE + 352) +#define __NR_timerfd_settime (__NR_SYSCALL_BASE + 353) +#define __NR_timerfd_gettime (__NR_SYSCALL_BASE + 354) +#define __NR_signalfd4 (__NR_SYSCALL_BASE + 355) +#define __NR_eventfd2 (__NR_SYSCALL_BASE + 356) +#define __NR_epoll_create1 (__NR_SYSCALL_BASE + 357) +#define __NR_dup3 (__NR_SYSCALL_BASE + 358) +#define __NR_pipe2 (__NR_SYSCALL_BASE + 359) +#define __NR_inotify_init1 (__NR_SYSCALL_BASE + 360) +#define __NR_preadv (__NR_SYSCALL_BASE + 361) +#define __NR_pwritev (__NR_SYSCALL_BASE + 362) +#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE + 363) +#define __NR_perf_event_open (__NR_SYSCALL_BASE + 364) +#define __NR_recvmmsg (__NR_SYSCALL_BASE + 365) +#define __NR_accept4 (__NR_SYSCALL_BASE + 366) +#define __NR_fanotify_init (__NR_SYSCALL_BASE + 367) +#define __NR_fanotify_mark (__NR_SYSCALL_BASE + 368) +#define __NR_prlimit64 (__NR_SYSCALL_BASE + 369) +#define __NR_name_to_handle_at (__NR_SYSCALL_BASE + 370) +#define __NR_open_by_handle_at (__NR_SYSCALL_BASE + 371) +#define __NR_clock_adjtime (__NR_SYSCALL_BASE + 372) +#define __NR_syncfs (__NR_SYSCALL_BASE + 373) +#define __NR_sendmmsg (__NR_SYSCALL_BASE + 374) +#define __NR_setns (__NR_SYSCALL_BASE + 375) +#define __NR_process_vm_readv (__NR_SYSCALL_BASE + 376) +#define __NR_process_vm_writev (__NR_SYSCALL_BASE + 377) +#define __NR_kcmp (__NR_SYSCALL_BASE + 378) +#define __NR_finit_module (__NR_SYSCALL_BASE + 379) +#define __NR_sched_setattr (__NR_SYSCALL_BASE + 380) +#define __NR_sched_getattr (__NR_SYSCALL_BASE + 381) +#define __NR_renameat2 (__NR_SYSCALL_BASE + 382) +#define __NR_seccomp (__NR_SYSCALL_BASE + 383) +#define __NR_getrandom (__NR_SYSCALL_BASE + 384) +#define __NR_memfd_create (__NR_SYSCALL_BASE + 385) +#define __NR_bpf (__NR_SYSCALL_BASE + 386) +#define __NR_execveat (__NR_SYSCALL_BASE + 387) +#define __NR_userfaultfd (__NR_SYSCALL_BASE + 388) +#define __NR_membarrier (__NR_SYSCALL_BASE + 389) +#define __NR_mlock2 (__NR_SYSCALL_BASE + 390) +#define __NR_copy_file_range (__NR_SYSCALL_BASE + 391) +#define __NR_preadv2 (__NR_SYSCALL_BASE + 392) +#define __NR_pwritev2 (__NR_SYSCALL_BASE + 393) +#define __NR_pkey_mprotect (__NR_SYSCALL_BASE + 394) +#define __NR_pkey_alloc (__NR_SYSCALL_BASE + 395) +#define __NR_pkey_free (__NR_SYSCALL_BASE + 396) + +#endif /* _ASM_ARM_UNISTD_COMMON_H */ diff --git a/linux-headers/asm-arm/unistd-eabi.h b/linux-headers/asm-arm/unistd-eabi.h new file mode 100644 index 0000000..266f1fc --- /dev/null +++ b/linux-headers/asm-arm/unistd-eabi.h @@ -0,0 +1,5 @@ +#ifndef _ASM_ARM_UNISTD_EABI_H +#define _ASM_ARM_UNISTD_EABI_H 1 + + +#endif /* _ASM_ARM_UNISTD_EABI_H */ diff --git a/linux-headers/asm-arm/unistd-oabi.h b/linux-headers/asm-arm/unistd-oabi.h new file mode 100644 index 0000000..47d9afb --- /dev/null +++ b/linux-headers/asm-arm/unistd-oabi.h @@ -0,0 +1,17 @@ +#ifndef _ASM_ARM_UNISTD_OABI_H +#define _ASM_ARM_UNISTD_OABI_H 1 + +#define __NR_time (__NR_SYSCALL_BASE + 13) +#define __NR_umount (__NR_SYSCALL_BASE + 22) +#define __NR_stime (__NR_SYSCALL_BASE + 25) +#define __NR_alarm (__NR_SYSCALL_BASE + 27) +#define __NR_utime (__NR_SYSCALL_BASE + 30) +#define __NR_getrlimit (__NR_SYSCALL_BASE + 76) +#define __NR_select (__NR_SYSCALL_BASE + 82) +#define __NR_readdir (__NR_SYSCALL_BASE + 89) +#define __NR_mmap (__NR_SYSCALL_BASE + 90) +#define __NR_socketcall (__NR_SYSCALL_BASE + 102) +#define __NR_syscall (__NR_SYSCALL_BASE + 113) +#define __NR_ipc (__NR_SYSCALL_BASE + 117) + +#endif /* _ASM_ARM_UNISTD_OABI_H */ diff --git a/linux-headers/asm-arm/unistd.h b/linux-headers/asm-arm/unistd.h index ceb5450..155571b 100644 --- a/linux-headers/asm-arm/unistd.h +++ b/linux-headers/asm-arm/unistd.h @@ -17,409 +17,14 @@ #if defined(__thumb__) || defined(__ARM_EABI__) #define __NR_SYSCALL_BASE 0 +#include <asm/unistd-eabi.h> #else #define __NR_SYSCALL_BASE __NR_OABI_SYSCALL_BASE +#include <asm/unistd-oabi.h> #endif -/* - * This file contains the system call numbers. - */ - -#define __NR_restart_syscall (__NR_SYSCALL_BASE+ 0) -#define __NR_exit (__NR_SYSCALL_BASE+ 1) -#define __NR_fork (__NR_SYSCALL_BASE+ 2) -#define __NR_read (__NR_SYSCALL_BASE+ 3) -#define __NR_write (__NR_SYSCALL_BASE+ 4) -#define __NR_open (__NR_SYSCALL_BASE+ 5) -#define __NR_close (__NR_SYSCALL_BASE+ 6) - /* 7 was sys_waitpid */ -#define __NR_creat (__NR_SYSCALL_BASE+ 8) -#define __NR_link (__NR_SYSCALL_BASE+ 9) -#define __NR_unlink (__NR_SYSCALL_BASE+ 10) -#define __NR_execve (__NR_SYSCALL_BASE+ 11) -#define __NR_chdir (__NR_SYSCALL_BASE+ 12) -#define __NR_time (__NR_SYSCALL_BASE+ 13) -#define __NR_mknod (__NR_SYSCALL_BASE+ 14) -#define __NR_chmod (__NR_SYSCALL_BASE+ 15) -#define __NR_lchown (__NR_SYSCALL_BASE+ 16) - /* 17 was sys_break */ - /* 18 was sys_stat */ -#define __NR_lseek (__NR_SYSCALL_BASE+ 19) -#define __NR_getpid (__NR_SYSCALL_BASE+ 20) -#define __NR_mount (__NR_SYSCALL_BASE+ 21) -#define __NR_umount (__NR_SYSCALL_BASE+ 22) -#define __NR_setuid (__NR_SYSCALL_BASE+ 23) -#define __NR_getuid (__NR_SYSCALL_BASE+ 24) -#define __NR_stime (__NR_SYSCALL_BASE+ 25) -#define __NR_ptrace (__NR_SYSCALL_BASE+ 26) -#define __NR_alarm (__NR_SYSCALL_BASE+ 27) - /* 28 was sys_fstat */ -#define __NR_pause (__NR_SYSCALL_BASE+ 29) -#define __NR_utime (__NR_SYSCALL_BASE+ 30) - /* 31 was sys_stty */ - /* 32 was sys_gtty */ -#define __NR_access (__NR_SYSCALL_BASE+ 33) -#define __NR_nice (__NR_SYSCALL_BASE+ 34) - /* 35 was sys_ftime */ -#define __NR_sync (__NR_SYSCALL_BASE+ 36) -#define __NR_kill (__NR_SYSCALL_BASE+ 37) -#define __NR_rename (__NR_SYSCALL_BASE+ 38) -#define __NR_mkdir (__NR_SYSCALL_BASE+ 39) -#define __NR_rmdir (__NR_SYSCALL_BASE+ 40) -#define __NR_dup (__NR_SYSCALL_BASE+ 41) -#define __NR_pipe (__NR_SYSCALL_BASE+ 42) -#define __NR_times (__NR_SYSCALL_BASE+ 43) - /* 44 was sys_prof */ -#define __NR_brk (__NR_SYSCALL_BASE+ 45) -#define __NR_setgid (__NR_SYSCALL_BASE+ 46) -#define __NR_getgid (__NR_SYSCALL_BASE+ 47) - /* 48 was sys_signal */ -#define __NR_geteuid (__NR_SYSCALL_BASE+ 49) -#define __NR_getegid (__NR_SYSCALL_BASE+ 50) -#define __NR_acct (__NR_SYSCALL_BASE+ 51) -#define __NR_umount2 (__NR_SYSCALL_BASE+ 52) - /* 53 was sys_lock */ -#define __NR_ioctl (__NR_SYSCALL_BASE+ 54) -#define __NR_fcntl (__NR_SYSCALL_BASE+ 55) - /* 56 was sys_mpx */ -#define __NR_setpgid (__NR_SYSCALL_BASE+ 57) - /* 58 was sys_ulimit */ - /* 59 was sys_olduname */ -#define __NR_umask (__NR_SYSCALL_BASE+ 60) -#define __NR_chroot (__NR_SYSCALL_BASE+ 61) -#define __NR_ustat (__NR_SYSCALL_BASE+ 62) -#define __NR_dup2 (__NR_SYSCALL_BASE+ 63) -#define __NR_getppid (__NR_SYSCALL_BASE+ 64) -#define __NR_getpgrp (__NR_SYSCALL_BASE+ 65) -#define __NR_setsid (__NR_SYSCALL_BASE+ 66) -#define __NR_sigaction (__NR_SYSCALL_BASE+ 67) - /* 68 was sys_sgetmask */ - /* 69 was sys_ssetmask */ -#define __NR_setreuid (__NR_SYSCALL_BASE+ 70) -#define __NR_setregid (__NR_SYSCALL_BASE+ 71) -#define __NR_sigsuspend (__NR_SYSCALL_BASE+ 72) -#define __NR_sigpending (__NR_SYSCALL_BASE+ 73) -#define __NR_sethostname (__NR_SYSCALL_BASE+ 74) -#define __NR_setrlimit (__NR_SYSCALL_BASE+ 75) -#define __NR_getrlimit (__NR_SYSCALL_BASE+ 76) /* Back compat 2GB limited rlimit */ -#define __NR_getrusage (__NR_SYSCALL_BASE+ 77) -#define __NR_gettimeofday (__NR_SYSCALL_BASE+ 78) -#define __NR_settimeofday (__NR_SYSCALL_BASE+ 79) -#define __NR_getgroups (__NR_SYSCALL_BASE+ 80) -#define __NR_setgroups (__NR_SYSCALL_BASE+ 81) -#define __NR_select (__NR_SYSCALL_BASE+ 82) -#define __NR_symlink (__NR_SYSCALL_BASE+ 83) - /* 84 was sys_lstat */ -#define __NR_readlink (__NR_SYSCALL_BASE+ 85) -#define __NR_uselib (__NR_SYSCALL_BASE+ 86) -#define __NR_swapon (__NR_SYSCALL_BASE+ 87) -#define __NR_reboot (__NR_SYSCALL_BASE+ 88) -#define __NR_readdir (__NR_SYSCALL_BASE+ 89) -#define __NR_mmap (__NR_SYSCALL_BASE+ 90) -#define __NR_munmap (__NR_SYSCALL_BASE+ 91) -#define __NR_truncate (__NR_SYSCALL_BASE+ 92) -#define __NR_ftruncate (__NR_SYSCALL_BASE+ 93) -#define __NR_fchmod (__NR_SYSCALL_BASE+ 94) -#define __NR_fchown (__NR_SYSCALL_BASE+ 95) -#define __NR_getpriority (__NR_SYSCALL_BASE+ 96) -#define __NR_setpriority (__NR_SYSCALL_BASE+ 97) - /* 98 was sys_profil */ -#define __NR_statfs (__NR_SYSCALL_BASE+ 99) -#define __NR_fstatfs (__NR_SYSCALL_BASE+100) - /* 101 was sys_ioperm */ -#define __NR_socketcall (__NR_SYSCALL_BASE+102) -#define __NR_syslog (__NR_SYSCALL_BASE+103) -#define __NR_setitimer (__NR_SYSCALL_BASE+104) -#define __NR_getitimer (__NR_SYSCALL_BASE+105) -#define __NR_stat (__NR_SYSCALL_BASE+106) -#define __NR_lstat (__NR_SYSCALL_BASE+107) -#define __NR_fstat (__NR_SYSCALL_BASE+108) - /* 109 was sys_uname */ - /* 110 was sys_iopl */ -#define __NR_vhangup (__NR_SYSCALL_BASE+111) - /* 112 was sys_idle */ -#define __NR_syscall (__NR_SYSCALL_BASE+113) /* syscall to call a syscall! */ -#define __NR_wait4 (__NR_SYSCALL_BASE+114) -#define __NR_swapoff (__NR_SYSCALL_BASE+115) -#define __NR_sysinfo (__NR_SYSCALL_BASE+116) -#define __NR_ipc (__NR_SYSCALL_BASE+117) -#define __NR_fsync (__NR_SYSCALL_BASE+118) -#define __NR_sigreturn (__NR_SYSCALL_BASE+119) -#define __NR_clone (__NR_SYSCALL_BASE+120) -#define __NR_setdomainname (__NR_SYSCALL_BASE+121) -#define __NR_uname (__NR_SYSCALL_BASE+122) - /* 123 was sys_modify_ldt */ -#define __NR_adjtimex (__NR_SYSCALL_BASE+124) -#define __NR_mprotect (__NR_SYSCALL_BASE+125) -#define __NR_sigprocmask (__NR_SYSCALL_BASE+126) - /* 127 was sys_create_module */ -#define __NR_init_module (__NR_SYSCALL_BASE+128) -#define __NR_delete_module (__NR_SYSCALL_BASE+129) - /* 130 was sys_get_kernel_syms */ -#define __NR_quotactl (__NR_SYSCALL_BASE+131) -#define __NR_getpgid (__NR_SYSCALL_BASE+132) -#define __NR_fchdir (__NR_SYSCALL_BASE+133) -#define __NR_bdflush (__NR_SYSCALL_BASE+134) -#define __NR_sysfs (__NR_SYSCALL_BASE+135) -#define __NR_personality (__NR_SYSCALL_BASE+136) - /* 137 was sys_afs_syscall */ -#define __NR_setfsuid (__NR_SYSCALL_BASE+138) -#define __NR_setfsgid (__NR_SYSCALL_BASE+139) -#define __NR__llseek (__NR_SYSCALL_BASE+140) -#define __NR_getdents (__NR_SYSCALL_BASE+141) -#define __NR__newselect (__NR_SYSCALL_BASE+142) -#define __NR_flock (__NR_SYSCALL_BASE+143) -#define __NR_msync (__NR_SYSCALL_BASE+144) -#define __NR_readv (__NR_SYSCALL_BASE+145) -#define __NR_writev (__NR_SYSCALL_BASE+146) -#define __NR_getsid (__NR_SYSCALL_BASE+147) -#define __NR_fdatasync (__NR_SYSCALL_BASE+148) -#define __NR__sysctl (__NR_SYSCALL_BASE+149) -#define __NR_mlock (__NR_SYSCALL_BASE+150) -#define __NR_munlock (__NR_SYSCALL_BASE+151) -#define __NR_mlockall (__NR_SYSCALL_BASE+152) -#define __NR_munlockall (__NR_SYSCALL_BASE+153) -#define __NR_sched_setparam (__NR_SYSCALL_BASE+154) -#define __NR_sched_getparam (__NR_SYSCALL_BASE+155) -#define __NR_sched_setscheduler (__NR_SYSCALL_BASE+156) -#define __NR_sched_getscheduler (__NR_SYSCALL_BASE+157) -#define __NR_sched_yield (__NR_SYSCALL_BASE+158) -#define __NR_sched_get_priority_max (__NR_SYSCALL_BASE+159) -#define __NR_sched_get_priority_min (__NR_SYSCALL_BASE+160) -#define __NR_sched_rr_get_interval (__NR_SYSCALL_BASE+161) -#define __NR_nanosleep (__NR_SYSCALL_BASE+162) -#define __NR_mremap (__NR_SYSCALL_BASE+163) -#define __NR_setresuid (__NR_SYSCALL_BASE+164) -#define __NR_getresuid (__NR_SYSCALL_BASE+165) - /* 166 was sys_vm86 */ - /* 167 was sys_query_module */ -#define __NR_poll (__NR_SYSCALL_BASE+168) -#define __NR_nfsservctl (__NR_SYSCALL_BASE+169) -#define __NR_setresgid (__NR_SYSCALL_BASE+170) -#define __NR_getresgid (__NR_SYSCALL_BASE+171) -#define __NR_prctl (__NR_SYSCALL_BASE+172) -#define __NR_rt_sigreturn (__NR_SYSCALL_BASE+173) -#define __NR_rt_sigaction (__NR_SYSCALL_BASE+174) -#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE+175) -#define __NR_rt_sigpending (__NR_SYSCALL_BASE+176) -#define __NR_rt_sigtimedwait (__NR_SYSCALL_BASE+177) -#define __NR_rt_sigqueueinfo (__NR_SYSCALL_BASE+178) -#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE+179) -#define __NR_pread64 (__NR_SYSCALL_BASE+180) -#define __NR_pwrite64 (__NR_SYSCALL_BASE+181) -#define __NR_chown (__NR_SYSCALL_BASE+182) -#define __NR_getcwd (__NR_SYSCALL_BASE+183) -#define __NR_capget (__NR_SYSCALL_BASE+184) -#define __NR_capset (__NR_SYSCALL_BASE+185) -#define __NR_sigaltstack (__NR_SYSCALL_BASE+186) -#define __NR_sendfile (__NR_SYSCALL_BASE+187) - /* 188 reserved */ - /* 189 reserved */ -#define __NR_vfork (__NR_SYSCALL_BASE+190) -#define __NR_ugetrlimit (__NR_SYSCALL_BASE+191) /* SuS compliant getrlimit */ -#define __NR_mmap2 (__NR_SYSCALL_BASE+192) -#define __NR_truncate64 (__NR_SYSCALL_BASE+193) -#define __NR_ftruncate64 (__NR_SYSCALL_BASE+194) -#define __NR_stat64 (__NR_SYSCALL_BASE+195) -#define __NR_lstat64 (__NR_SYSCALL_BASE+196) -#define __NR_fstat64 (__NR_SYSCALL_BASE+197) -#define __NR_lchown32 (__NR_SYSCALL_BASE+198) -#define __NR_getuid32 (__NR_SYSCALL_BASE+199) -#define __NR_getgid32 (__NR_SYSCALL_BASE+200) -#define __NR_geteuid32 (__NR_SYSCALL_BASE+201) -#define __NR_getegid32 (__NR_SYSCALL_BASE+202) -#define __NR_setreuid32 (__NR_SYSCALL_BASE+203) -#define __NR_setregid32 (__NR_SYSCALL_BASE+204) -#define __NR_getgroups32 (__NR_SYSCALL_BASE+205) -#define __NR_setgroups32 (__NR_SYSCALL_BASE+206) -#define __NR_fchown32 (__NR_SYSCALL_BASE+207) -#define __NR_setresuid32 (__NR_SYSCALL_BASE+208) -#define __NR_getresuid32 (__NR_SYSCALL_BASE+209) -#define __NR_setresgid32 (__NR_SYSCALL_BASE+210) -#define __NR_getresgid32 (__NR_SYSCALL_BASE+211) -#define __NR_chown32 (__NR_SYSCALL_BASE+212) -#define __NR_setuid32 (__NR_SYSCALL_BASE+213) -#define __NR_setgid32 (__NR_SYSCALL_BASE+214) -#define __NR_setfsuid32 (__NR_SYSCALL_BASE+215) -#define __NR_setfsgid32 (__NR_SYSCALL_BASE+216) -#define __NR_getdents64 (__NR_SYSCALL_BASE+217) -#define __NR_pivot_root (__NR_SYSCALL_BASE+218) -#define __NR_mincore (__NR_SYSCALL_BASE+219) -#define __NR_madvise (__NR_SYSCALL_BASE+220) -#define __NR_fcntl64 (__NR_SYSCALL_BASE+221) - /* 222 for tux */ - /* 223 is unused */ -#define __NR_gettid (__NR_SYSCALL_BASE+224) -#define __NR_readahead (__NR_SYSCALL_BASE+225) -#define __NR_setxattr (__NR_SYSCALL_BASE+226) -#define __NR_lsetxattr (__NR_SYSCALL_BASE+227) -#define __NR_fsetxattr (__NR_SYSCALL_BASE+228) -#define __NR_getxattr (__NR_SYSCALL_BASE+229) -#define __NR_lgetxattr (__NR_SYSCALL_BASE+230) -#define __NR_fgetxattr (__NR_SYSCALL_BASE+231) -#define __NR_listxattr (__NR_SYSCALL_BASE+232) -#define __NR_llistxattr (__NR_SYSCALL_BASE+233) -#define __NR_flistxattr (__NR_SYSCALL_BASE+234) -#define __NR_removexattr (__NR_SYSCALL_BASE+235) -#define __NR_lremovexattr (__NR_SYSCALL_BASE+236) -#define __NR_fremovexattr (__NR_SYSCALL_BASE+237) -#define __NR_tkill (__NR_SYSCALL_BASE+238) -#define __NR_sendfile64 (__NR_SYSCALL_BASE+239) -#define __NR_futex (__NR_SYSCALL_BASE+240) -#define __NR_sched_setaffinity (__NR_SYSCALL_BASE+241) -#define __NR_sched_getaffinity (__NR_SYSCALL_BASE+242) -#define __NR_io_setup (__NR_SYSCALL_BASE+243) -#define __NR_io_destroy (__NR_SYSCALL_BASE+244) -#define __NR_io_getevents (__NR_SYSCALL_BASE+245) -#define __NR_io_submit (__NR_SYSCALL_BASE+246) -#define __NR_io_cancel (__NR_SYSCALL_BASE+247) -#define __NR_exit_group (__NR_SYSCALL_BASE+248) -#define __NR_lookup_dcookie (__NR_SYSCALL_BASE+249) -#define __NR_epoll_create (__NR_SYSCALL_BASE+250) -#define __NR_epoll_ctl (__NR_SYSCALL_BASE+251) -#define __NR_epoll_wait (__NR_SYSCALL_BASE+252) -#define __NR_remap_file_pages (__NR_SYSCALL_BASE+253) - /* 254 for set_thread_area */ - /* 255 for get_thread_area */ -#define __NR_set_tid_address (__NR_SYSCALL_BASE+256) -#define __NR_timer_create (__NR_SYSCALL_BASE+257) -#define __NR_timer_settime (__NR_SYSCALL_BASE+258) -#define __NR_timer_gettime (__NR_SYSCALL_BASE+259) -#define __NR_timer_getoverrun (__NR_SYSCALL_BASE+260) -#define __NR_timer_delete (__NR_SYSCALL_BASE+261) -#define __NR_clock_settime (__NR_SYSCALL_BASE+262) -#define __NR_clock_gettime (__NR_SYSCALL_BASE+263) -#define __NR_clock_getres (__NR_SYSCALL_BASE+264) -#define __NR_clock_nanosleep (__NR_SYSCALL_BASE+265) -#define __NR_statfs64 (__NR_SYSCALL_BASE+266) -#define __NR_fstatfs64 (__NR_SYSCALL_BASE+267) -#define __NR_tgkill (__NR_SYSCALL_BASE+268) -#define __NR_utimes (__NR_SYSCALL_BASE+269) -#define __NR_arm_fadvise64_64 (__NR_SYSCALL_BASE+270) -#define __NR_pciconfig_iobase (__NR_SYSCALL_BASE+271) -#define __NR_pciconfig_read (__NR_SYSCALL_BASE+272) -#define __NR_pciconfig_write (__NR_SYSCALL_BASE+273) -#define __NR_mq_open (__NR_SYSCALL_BASE+274) -#define __NR_mq_unlink (__NR_SYSCALL_BASE+275) -#define __NR_mq_timedsend (__NR_SYSCALL_BASE+276) -#define __NR_mq_timedreceive (__NR_SYSCALL_BASE+277) -#define __NR_mq_notify (__NR_SYSCALL_BASE+278) -#define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279) -#define __NR_waitid (__NR_SYSCALL_BASE+280) -#define __NR_socket (__NR_SYSCALL_BASE+281) -#define __NR_bind (__NR_SYSCALL_BASE+282) -#define __NR_connect (__NR_SYSCALL_BASE+283) -#define __NR_listen (__NR_SYSCALL_BASE+284) -#define __NR_accept (__NR_SYSCALL_BASE+285) -#define __NR_getsockname (__NR_SYSCALL_BASE+286) -#define __NR_getpeername (__NR_SYSCALL_BASE+287) -#define __NR_socketpair (__NR_SYSCALL_BASE+288) -#define __NR_send (__NR_SYSCALL_BASE+289) -#define __NR_sendto (__NR_SYSCALL_BASE+290) -#define __NR_recv (__NR_SYSCALL_BASE+291) -#define __NR_recvfrom (__NR_SYSCALL_BASE+292) -#define __NR_shutdown (__NR_SYSCALL_BASE+293) -#define __NR_setsockopt (__NR_SYSCALL_BASE+294) -#define __NR_getsockopt (__NR_SYSCALL_BASE+295) -#define __NR_sendmsg (__NR_SYSCALL_BASE+296) -#define __NR_recvmsg (__NR_SYSCALL_BASE+297) -#define __NR_semop (__NR_SYSCALL_BASE+298) -#define __NR_semget (__NR_SYSCALL_BASE+299) -#define __NR_semctl (__NR_SYSCALL_BASE+300) -#define __NR_msgsnd (__NR_SYSCALL_BASE+301) -#define __NR_msgrcv (__NR_SYSCALL_BASE+302) -#define __NR_msgget (__NR_SYSCALL_BASE+303) -#define __NR_msgctl (__NR_SYSCALL_BASE+304) -#define __NR_shmat (__NR_SYSCALL_BASE+305) -#define __NR_shmdt (__NR_SYSCALL_BASE+306) -#define __NR_shmget (__NR_SYSCALL_BASE+307) -#define __NR_shmctl (__NR_SYSCALL_BASE+308) -#define __NR_add_key (__NR_SYSCALL_BASE+309) -#define __NR_request_key (__NR_SYSCALL_BASE+310) -#define __NR_keyctl (__NR_SYSCALL_BASE+311) -#define __NR_semtimedop (__NR_SYSCALL_BASE+312) -#define __NR_vserver (__NR_SYSCALL_BASE+313) -#define __NR_ioprio_set (__NR_SYSCALL_BASE+314) -#define __NR_ioprio_get (__NR_SYSCALL_BASE+315) -#define __NR_inotify_init (__NR_SYSCALL_BASE+316) -#define __NR_inotify_add_watch (__NR_SYSCALL_BASE+317) -#define __NR_inotify_rm_watch (__NR_SYSCALL_BASE+318) -#define __NR_mbind (__NR_SYSCALL_BASE+319) -#define __NR_get_mempolicy (__NR_SYSCALL_BASE+320) -#define __NR_set_mempolicy (__NR_SYSCALL_BASE+321) -#define __NR_openat (__NR_SYSCALL_BASE+322) -#define __NR_mkdirat (__NR_SYSCALL_BASE+323) -#define __NR_mknodat (__NR_SYSCALL_BASE+324) -#define __NR_fchownat (__NR_SYSCALL_BASE+325) -#define __NR_futimesat (__NR_SYSCALL_BASE+326) -#define __NR_fstatat64 (__NR_SYSCALL_BASE+327) -#define __NR_unlinkat (__NR_SYSCALL_BASE+328) -#define __NR_renameat (__NR_SYSCALL_BASE+329) -#define __NR_linkat (__NR_SYSCALL_BASE+330) -#define __NR_symlinkat (__NR_SYSCALL_BASE+331) -#define __NR_readlinkat (__NR_SYSCALL_BASE+332) -#define __NR_fchmodat (__NR_SYSCALL_BASE+333) -#define __NR_faccessat (__NR_SYSCALL_BASE+334) -#define __NR_pselect6 (__NR_SYSCALL_BASE+335) -#define __NR_ppoll (__NR_SYSCALL_BASE+336) -#define __NR_unshare (__NR_SYSCALL_BASE+337) -#define __NR_set_robust_list (__NR_SYSCALL_BASE+338) -#define __NR_get_robust_list (__NR_SYSCALL_BASE+339) -#define __NR_splice (__NR_SYSCALL_BASE+340) -#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE+341) +#include <asm/unistd-common.h> #define __NR_sync_file_range2 __NR_arm_sync_file_range -#define __NR_tee (__NR_SYSCALL_BASE+342) -#define __NR_vmsplice (__NR_SYSCALL_BASE+343) -#define __NR_move_pages (__NR_SYSCALL_BASE+344) -#define __NR_getcpu (__NR_SYSCALL_BASE+345) -#define __NR_epoll_pwait (__NR_SYSCALL_BASE+346) -#define __NR_kexec_load (__NR_SYSCALL_BASE+347) -#define __NR_utimensat (__NR_SYSCALL_BASE+348) -#define __NR_signalfd (__NR_SYSCALL_BASE+349) -#define __NR_timerfd_create (__NR_SYSCALL_BASE+350) -#define __NR_eventfd (__NR_SYSCALL_BASE+351) -#define __NR_fallocate (__NR_SYSCALL_BASE+352) -#define __NR_timerfd_settime (__NR_SYSCALL_BASE+353) -#define __NR_timerfd_gettime (__NR_SYSCALL_BASE+354) -#define __NR_signalfd4 (__NR_SYSCALL_BASE+355) -#define __NR_eventfd2 (__NR_SYSCALL_BASE+356) -#define __NR_epoll_create1 (__NR_SYSCALL_BASE+357) -#define __NR_dup3 (__NR_SYSCALL_BASE+358) -#define __NR_pipe2 (__NR_SYSCALL_BASE+359) -#define __NR_inotify_init1 (__NR_SYSCALL_BASE+360) -#define __NR_preadv (__NR_SYSCALL_BASE+361) -#define __NR_pwritev (__NR_SYSCALL_BASE+362) -#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363) -#define __NR_perf_event_open (__NR_SYSCALL_BASE+364) -#define __NR_recvmmsg (__NR_SYSCALL_BASE+365) -#define __NR_accept4 (__NR_SYSCALL_BASE+366) -#define __NR_fanotify_init (__NR_SYSCALL_BASE+367) -#define __NR_fanotify_mark (__NR_SYSCALL_BASE+368) -#define __NR_prlimit64 (__NR_SYSCALL_BASE+369) -#define __NR_name_to_handle_at (__NR_SYSCALL_BASE+370) -#define __NR_open_by_handle_at (__NR_SYSCALL_BASE+371) -#define __NR_clock_adjtime (__NR_SYSCALL_BASE+372) -#define __NR_syncfs (__NR_SYSCALL_BASE+373) -#define __NR_sendmmsg (__NR_SYSCALL_BASE+374) -#define __NR_setns (__NR_SYSCALL_BASE+375) -#define __NR_process_vm_readv (__NR_SYSCALL_BASE+376) -#define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) -#define __NR_kcmp (__NR_SYSCALL_BASE+378) -#define __NR_finit_module (__NR_SYSCALL_BASE+379) -#define __NR_sched_setattr (__NR_SYSCALL_BASE+380) -#define __NR_sched_getattr (__NR_SYSCALL_BASE+381) -#define __NR_renameat2 (__NR_SYSCALL_BASE+382) -#define __NR_seccomp (__NR_SYSCALL_BASE+383) -#define __NR_getrandom (__NR_SYSCALL_BASE+384) -#define __NR_memfd_create (__NR_SYSCALL_BASE+385) -#define __NR_bpf (__NR_SYSCALL_BASE+386) -#define __NR_execveat (__NR_SYSCALL_BASE+387) -#define __NR_userfaultfd (__NR_SYSCALL_BASE+388) -#define __NR_membarrier (__NR_SYSCALL_BASE+389) -#define __NR_mlock2 (__NR_SYSCALL_BASE+390) -#define __NR_copy_file_range (__NR_SYSCALL_BASE+391) -#define __NR_preadv2 (__NR_SYSCALL_BASE+392) -#define __NR_pwritev2 (__NR_SYSCALL_BASE+393) /* * The following SWIs are ARM private. @@ -431,22 +36,4 @@ #define __ARM_NR_usr32 (__ARM_NR_BASE+4) #define __ARM_NR_set_tls (__ARM_NR_BASE+5) -/* - * The following syscalls are obsolete and no longer available for EABI. - */ -#if defined(__ARM_EABI__) -#undef __NR_time -#undef __NR_umount -#undef __NR_stime -#undef __NR_alarm -#undef __NR_utime -#undef __NR_getrlimit -#undef __NR_select -#undef __NR_readdir -#undef __NR_mmap -#undef __NR_socketcall -#undef __NR_syscall -#undef __NR_ipc -#endif - #endif /* __ASM_ARM_UNISTD_H */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index fd5a276..651ec30 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -201,10 +201,23 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* Device Control API on vcpu fd */ diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index c93cf35..4edbe4b 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -413,6 +413,26 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* For KVM_PPC_CONFIGURE_V3_MMU */ +struct kvm_ppc_mmuv3_cfg { + __u64 flags; + __u64 process_table; /* second doubleword of partition table entry */ +}; + +/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */ +#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */ +#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */ + +/* For KVM_PPC_GET_RMMU_INFO */ +struct kvm_ppc_rmmu_info { + struct kvm_ppc_radix_geom { + __u8 page_shift; + __u8 level_bits[4]; + __u8 pad[3]; + } geometries[8]; + __u32 ap_encodings[8]; +}; + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) @@ -573,6 +593,10 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) #define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) +/* POWER9 registers */ +#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) +#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ @@ -596,6 +620,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) +#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a) /* PPC64 eXternal Interrupt Controller Specification */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ @@ -608,5 +633,7 @@ struct kvm_get_htab_header { #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) #define KVM_XICS_MASKED (1ULL << 41) #define KVM_XICS_PENDING (1ULL << 42) +#define KVM_XICS_PRESENTED (1ULL << 43) +#define KVM_XICS_QUEUED (1ULL << 44) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/linux-headers/asm-powerpc/unistd.h b/linux-headers/asm-powerpc/unistd.h index 1e66eba..598043c 100644 --- a/linux-headers/asm-powerpc/unistd.h +++ b/linux-headers/asm-powerpc/unistd.h @@ -392,5 +392,6 @@ #define __NR_copy_file_range 379 #define __NR_preadv2 380 #define __NR_pwritev2 381 +#define __NR_kexec_file_load 382 #endif /* _ASM_POWERPC_UNISTD_H_ */ diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h index e41c5c1..3a53979 100644 --- a/linux-headers/asm-x86/kvm_para.h +++ b/linux-headers/asm-x86/kvm_para.h @@ -45,7 +45,18 @@ struct kvm_steal_time { __u64 steal; __u32 version; __u32 flags; - __u32 pad[12]; + __u8 preempted; + __u8 u8_pad[3]; + __u32 pad[11]; +}; + +#define KVM_CLOCK_PAIRING_WALLCLOCK 0 +struct kvm_clock_pairing { + __s64 sec; + __s64 nsec; + __u64 tsc; + __u32 flags; + __u32 pad[9]; }; #define KVM_STEAL_ALIGNMENT_BITS 5 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index bb0ed71..4e082a8 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -218,7 +218,8 @@ struct kvm_hyperv_exit { struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 padding1[7]; + __u8 immediate_exit; + __u8 padding1[6]; /* out */ __u32 exit_reason; @@ -651,6 +652,9 @@ struct kvm_enable_cap { }; /* for KVM_PPC_GET_PVINFO */ + +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + struct kvm_ppc_pvinfo { /* out */ __u32 flags; @@ -682,7 +686,12 @@ struct kvm_ppc_smmu_info { struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; }; -#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) +/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +struct kvm_ppc_resize_hpt { + __u64 flags; + __u32 shift; + __u32 pad; +}; #define KVMIO 0xAE @@ -870,6 +879,10 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_USER_INSTR0 130 #define KVM_CAP_MSI_DEVID 131 #define KVM_CAP_PPC_HTM 132 +#define KVM_CAP_SPAPR_RESIZE_HPT 133 +#define KVM_CAP_PPC_MMU_RADIX 134 +#define KVM_CAP_PPC_MMU_HASH_V3 135 +#define KVM_CAP_IMMEDIATE_EXIT 136 #ifdef KVM_CAP_IRQ_ROUTING @@ -1186,6 +1199,13 @@ struct kvm_s390_ucas_mapping { #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) /* Available with KVM_CAP_PPC_RTAS */ #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) +/* Available with KVM_CAP_SPAPR_RESIZE_HPT */ +#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt) +#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt) +/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */ +#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) +/* Available with KVM_CAP_PPC_RADIX_MMU */ +#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h index e61661e..15b24ff 100644 --- a/linux-headers/linux/kvm_para.h +++ b/linux-headers/linux/kvm_para.h @@ -14,6 +14,7 @@ #define KVM_EFAULT EFAULT #define KVM_E2BIG E2BIG #define KVM_EPERM EPERM +#define KVM_EOPNOTSUPP 95 #define KVM_HC_VAPIC_POLL_IRQ 1 #define KVM_HC_MMU_OP 2 @@ -23,6 +24,7 @@ #define KVM_HC_MIPS_GET_CLOCK_FREQ 6 #define KVM_HC_MIPS_EXIT_VM 7 #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 +#define KVM_HC_CLOCK_PAIRING 9 /* * hypercalls use architecture specific diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 19e8453..2ed5dc3 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -11,13 +11,18 @@ #include <linux/types.h> -#define UFFD_API ((__u64)0xAA) /* - * After implementing the respective features it will become: - * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ - * UFFD_FEATURE_EVENT_FORK) + * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and + * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In + * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ + * means the userland is reading). */ -#define UFFD_API_FEATURES (0) +#define UFFD_API ((__u64)0xAA) +#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ + UFFD_FEATURE_EVENT_REMAP | \ + UFFD_FEATURE_EVENT_MADVDONTNEED | \ + UFFD_FEATURE_MISSING_HUGETLBFS | \ + UFFD_FEATURE_MISSING_SHMEM) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -26,6 +31,9 @@ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_ZEROPAGE) +#define UFFD_API_RANGE_IOCTLS_BASIC \ + ((__u64)1 << _UFFDIO_WAKE | \ + (__u64)1 << _UFFDIO_COPY) /* * Valid ioctl command number range with this API is from 0x00 to @@ -72,6 +80,21 @@ struct uffd_msg { } pagefault; struct { + __u32 ufd; + } fork; + + struct { + __u64 from; + __u64 to; + __u64 len; + } remap; + + struct { + __u64 start; + __u64 end; + } madv_dn; + + struct { /* unused reserved fields */ __u64 reserved1; __u64 reserved2; @@ -84,9 +107,9 @@ struct uffd_msg { * Start at 0x12 and not at 0 to be more strict against bugs. */ #define UFFD_EVENT_PAGEFAULT 0x12 -#if 0 /* not available yet */ #define UFFD_EVENT_FORK 0x13 -#endif +#define UFFD_EVENT_REMAP 0x14 +#define UFFD_EVENT_MADVDONTNEED 0x15 /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ @@ -104,11 +127,37 @@ struct uffdio_api { * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE * are to be considered implicitly always enabled in all kernels as * long as the uffdio_api.api requested matches UFFD_API. + * + * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER + * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on + * hugetlbfs virtual memory ranges. Adding or not adding + * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has + * no real functional effect after UFFDIO_API returns, but + * it's only useful for an initial feature set probe at + * UFFDIO_API time. There are two ways to use it: + * + * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the + * uffdio_api.features before calling UFFDIO_API, an error + * will be returned by UFFDIO_API on a kernel without + * hugetlbfs missing support + * + * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in + * uffdio_api.features and instead it will be set by the + * kernel in the uffdio_api.features if the kernel supports + * it, so userland can later check if the feature flag is + * present in uffdio_api.features after UFFDIO_API + * succeeded. + * + * UFFD_FEATURE_MISSING_SHMEM works the same as + * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem + * (i.e. tmpfs and other shmem based APIs). */ -#if 0 /* not available yet */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) -#endif +#define UFFD_FEATURE_EVENT_REMAP (1<<2) +#define UFFD_FEATURE_EVENT_MADVDONTNEED (1<<3) +#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) +#define UFFD_FEATURE_MISSING_SHMEM (1<<5) __u64 features; __u64 ioctls; diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 759b850..531cb2e 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -203,6 +203,16 @@ struct vfio_device_info { }; #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) +/* + * Vendor driver using Mediated device framework should provide device_api + * attribute in supported type attribute groups. Device API string should be one + * of the following corresponding to device flags in vfio_device_info structure. + */ + +#define VFIO_DEVICE_API_PCI_STRING "vfio-pci" +#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" +#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" + /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, * struct vfio_region_info) diff --git a/migration/block.c b/migration/block.c index ebc10e6..1941bc2 100644 --- a/migration/block.c +++ b/migration/block.c @@ -379,7 +379,7 @@ static void unset_dirty_tracking(void) } } -static void init_blk_migration(QEMUFile *f) +static int init_blk_migration(QEMUFile *f) { BlockDriverState *bs; BlkMigDevState *bmds; @@ -390,6 +390,8 @@ static void init_blk_migration(QEMUFile *f) BlkMigDevState *bmds; BlockDriverState *bs; } *bmds_bs; + Error *local_err = NULL; + int ret; block_mig_state.submitted = 0; block_mig_state.read_done = 0; @@ -411,11 +413,12 @@ static void init_blk_migration(QEMUFile *f) sectors = bdrv_nb_sectors(bs); if (sectors <= 0) { + ret = sectors; goto out; } bmds = g_new0(BlkMigDevState, 1); - bmds->blk = blk_new(); + bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); bmds->blk_name = g_strdup(bdrv_get_device_name(bs)); bmds->bulk_completed = 0; bmds->total_sectors = sectors; @@ -445,7 +448,11 @@ static void init_blk_migration(QEMUFile *f) BlockDriverState *bs = bmds_bs[i].bs; if (bmds) { - blk_insert_bs(bmds->blk, bs); + ret = blk_insert_bs(bmds->blk, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto out; + } alloc_aio_bitmap(bmds); error_setg(&bmds->blocker, "block device is in use by migration"); @@ -453,8 +460,10 @@ static void init_blk_migration(QEMUFile *f) } } + ret = 0; out: g_free(bmds_bs); + return ret; } /* Called with no lock taken. */ @@ -705,7 +714,11 @@ static int block_save_setup(QEMUFile *f, void *opaque) block_mig_state.submitted, block_mig_state.transferred); qemu_mutex_lock_iothread(); - init_blk_migration(f); + ret = init_blk_migration(f); + if (ret < 0) { + qemu_mutex_unlock_iothread(); + return ret; + } /* start track dirty blocks */ ret = set_dirty_tracking(); diff --git a/migration/colo.c b/migration/colo.c index 712308e..c19eb3f 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -19,6 +19,8 @@ #include "qemu/error-report.h" #include "qapi/error.h" #include "migration/failover.h" +#include "replication.h" +#include "qmp-commands.h" static bool vmstate_loading; @@ -147,6 +149,53 @@ void colo_do_failover(MigrationState *s) } } +void qmp_xen_set_replication(bool enable, bool primary, + bool has_failover, bool failover, + Error **errp) +{ + ReplicationMode mode = primary ? + REPLICATION_MODE_PRIMARY : + REPLICATION_MODE_SECONDARY; + + if (has_failover && enable) { + error_setg(errp, "Parameter 'failover' is only for" + " stopping replication"); + return; + } + + if (enable) { + replication_start_all(mode, errp); + } else { + if (!has_failover) { + failover = NULL; + } + replication_stop_all(failover, failover ? NULL : errp); + } +} + +ReplicationStatus *qmp_query_xen_replication_status(Error **errp) +{ + Error *err = NULL; + ReplicationStatus *s = g_new0(ReplicationStatus, 1); + + replication_get_error_all(&err); + if (err) { + s->error = true; + s->has_desc = true; + s->desc = g_strdup(error_get_pretty(err)); + } else { + s->error = false; + } + + error_free(err); + return s; +} + +void qmp_xen_colo_do_checkpoint(Error **errp) +{ + replication_do_checkpoint_all(errp); +} + static void colo_send_message(QEMUFile *f, COLOMessage msg, Error **errp) { diff --git a/migration/migration.c b/migration/migration.c index c6ae69d..3dab684 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -49,6 +49,10 @@ * for sending the last part */ #define DEFAULT_MIGRATE_SET_DOWNTIME 300 +/* Maximum migrate downtime set to 2000 seconds */ +#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 +#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) + /* Default compression thread count */ #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 /* Default decompression thread count, usually decompression is at @@ -383,6 +387,7 @@ static void process_incoming_migration_co(void *opaque) int ret; mis->from_src_file = f; + mis->largest_page_size = qemu_ram_pagesize_largest(); postcopy_state_set(POSTCOPY_INCOMING_NONE); migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_ACTIVE); @@ -843,10 +848,11 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp) return; } if (params->has_downtime_limit && - (params->downtime_limit < 0 || params->downtime_limit > 2000000)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "downtime_limit", - "an integer in the range of 0 to 2000000 milliseconds"); + (params->downtime_limit < 0 || + params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { + error_setg(errp, "Parameter 'downtime_limit' expects an integer in " + "the range of 0 to %d milliseconds", + MAX_MIGRATE_DOWNTIME); return; } if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) { @@ -1145,6 +1151,21 @@ void migrate_del_blocker(Error *reason) migration_blockers = g_slist_remove(migration_blockers, reason); } +int check_migratable(Object *obj, Error **err) +{ + DeviceClass *dc = DEVICE_GET_CLASS(obj); + if (only_migratable && dc->vmsd) { + if (dc->vmsd->unmigratable) { + error_setg(err, "Device %s is not migratable, but " + "--only-migratable was specified", + object_get_typename(obj)); + return -1; + } + } + + return 0; +} + void qmp_migrate_incoming(const char *uri, Error **errp) { Error *local_err = NULL; @@ -1289,6 +1310,13 @@ void qmp_migrate_set_speed(int64_t value, Error **errp) void qmp_migrate_set_downtime(double value, Error **errp) { + if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) { + error_setg(errp, "Parameter 'downtime_limit' expects an integer in " + "the range of 0 to %d seconds", + MAX_MIGRATE_DOWNTIME_SECONDS); + return; + } + value *= 1000; /* Convert to milliseconds */ value = MAX(0, MIN(INT64_MAX, value)); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index a40dddb..effbeb6 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -81,25 +81,18 @@ static bool ufd_version_check(int ufd) return false; } - return true; -} - -/* - * Check for things that postcopy won't support; returns 0 if the block - * is fine. - */ -static int check_range(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque) -{ - RAMBlock *rb = qemu_ram_block_by_name(block_name); - - if (qemu_ram_pagesize(rb) > getpagesize()) { - error_report("Postcopy doesn't support large page sizes yet (%s)", - block_name); - return -E2BIG; + if (getpagesize() != ram_pagesize_summary()) { + bool have_hp = false; + /* We've got a huge page */ +#ifdef UFFD_FEATURE_MISSING_HUGETLBFS + have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS; +#endif + if (!have_hp) { + error_report("Userfault on this host does not support huge pages"); + return false; + } } - - return 0; + return true; } /* @@ -122,12 +115,6 @@ bool postcopy_ram_supported_by_host(void) goto out; } - /* Check for anything about the RAMBlocks we don't support */ - if (qemu_ram_foreach_block(check_range, NULL)) { - /* check_range will have printed its own error */ - goto out; - } - ufd = syscall(__NR_userfaultfd, O_CLOEXEC); if (ufd == -1) { error_report("%s: userfaultfd not available: %s", __func__, @@ -200,27 +187,6 @@ out: return ret; } -/** - * postcopy_ram_discard_range: Discard a range of memory. - * We can assume that if we've been called postcopy_ram_hosttest returned true. - * - * @mis: Current incoming migration state. - * @start, @length: range of memory to discard. - * - * returns: 0 on success. - */ -int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, - size_t length) -{ - trace_postcopy_ram_discard_range(start, length); - if (madvise(start, length, MADV_DONTNEED)) { - error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno)); - return -1; - } - - return 0; -} - /* * Setup an area of RAM so that it *can* be used for postcopy later; this * must be done right at the start prior to pre-copy. @@ -239,7 +205,7 @@ static int init_range(const char *block_name, void *host_addr, * - we're going to get the copy from the source anyway. * (Precopy will just overwrite this data, so doesn't need the discard) */ - if (postcopy_ram_discard_range(mis, host_addr, length)) { + if (ram_discard_range(mis, block_name, 0, length)) { return -1; } @@ -342,9 +308,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); if (mis->postcopy_tmp_page) { - munmap(mis->postcopy_tmp_page, getpagesize()); + munmap(mis->postcopy_tmp_page, mis->largest_page_size); mis->postcopy_tmp_page = NULL; } + if (mis->postcopy_tmp_zero_page) { + munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size); + mis->postcopy_tmp_zero_page = NULL; + } trace_postcopy_ram_incoming_cleanup_exit(); return 0; } @@ -408,6 +378,10 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, error_report("%s userfault register: %s", __func__, strerror(errno)); return -1; } + if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) { + error_report("%s userfault: Region doesn't support COPY", __func__); + return -1; + } return 0; } @@ -420,7 +394,6 @@ static void *postcopy_ram_fault_thread(void *opaque) MigrationIncomingState *mis = opaque; struct uffd_msg msg; int ret; - size_t hostpagesize = getpagesize(); RAMBlock *rb = NULL; RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ @@ -487,7 +460,7 @@ static void *postcopy_ram_fault_thread(void *opaque) break; } - rb_offset &= ~(hostpagesize - 1); + rb_offset &= ~(qemu_ram_pagesize(rb) - 1); trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, qemu_ram_get_idstr(rb), rb_offset); @@ -499,11 +472,11 @@ static void *postcopy_ram_fault_thread(void *opaque) if (rb != last_rb) { last_rb = rb; migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), - rb_offset, hostpagesize); + rb_offset, qemu_ram_pagesize(rb)); } else { /* Save some space */ migrate_send_rp_req_pages(mis, NULL, - rb_offset, hostpagesize); + rb_offset, qemu_ram_pagesize(rb)); } } trace_postcopy_ram_fault_thread_exit(); @@ -564,13 +537,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * Place a host page (from) at (host) atomically * returns 0 on success */ -int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, + size_t pagesize) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host; copy_struct.src = (uint64_t)(uintptr_t)from; - copy_struct.len = getpagesize(); + copy_struct.len = pagesize; copy_struct.mode = 0; /* copy also acks to the kernel waking the stalled thread up @@ -580,8 +554,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) */ if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) { int e = errno; - error_report("%s: %s copy host: %p from: %p", - __func__, strerror(e), host, from); + error_report("%s: %s copy host: %p from: %p (size: %zd)", + __func__, strerror(e), host, from, pagesize); return -e; } @@ -594,23 +568,44 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) * Place a zero page at (host) atomically * returns 0 on success */ -int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, + size_t pagesize) { - struct uffdio_zeropage zero_struct; + trace_postcopy_place_page_zero(host); - zero_struct.range.start = (uint64_t)(uintptr_t)host; - zero_struct.range.len = getpagesize(); - zero_struct.mode = 0; + if (pagesize == getpagesize()) { + struct uffdio_zeropage zero_struct; + zero_struct.range.start = (uint64_t)(uintptr_t)host; + zero_struct.range.len = getpagesize(); + zero_struct.mode = 0; - if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { - int e = errno; - error_report("%s: %s zero host: %p", - __func__, strerror(e), host); + if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { + int e = errno; + error_report("%s: %s zero host: %p", + __func__, strerror(e), host); - return -e; + return -e; + } + } else { + /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */ + if (!mis->postcopy_tmp_zero_page) { + mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (mis->postcopy_tmp_zero_page == MAP_FAILED) { + int e = errno; + mis->postcopy_tmp_zero_page = NULL; + error_report("%s: %s mapping large zero page", + __func__, strerror(e)); + return -e; + } + memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); + } + return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, + pagesize); } - trace_postcopy_place_page_zero(host); return 0; } @@ -625,7 +620,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) void *postcopy_get_tmp_page(MigrationIncomingState *mis) { if (!mis->postcopy_tmp_page) { - mis->postcopy_tmp_page = mmap(NULL, getpagesize(), + mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mis->postcopy_tmp_page == MAP_FAILED) { @@ -658,13 +653,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) return -1; } -int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, - size_t length) -{ - assert(0); - return -1; -} - int postcopy_ram_prepare_discard(MigrationIncomingState *mis) { assert(0); @@ -677,13 +665,15 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return -1; } -int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, + size_t pagesize) { assert(0); return -1; } -int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, + size_t pagesize) { assert(0); return -1; diff --git a/migration/ram.c b/migration/ram.c index f289fcd..719425b 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -600,6 +600,23 @@ static void migration_bitmap_sync_init(void) iterations_prev = 0; } +/* Returns a summary bitmap of the page sizes of all RAMBlocks; + * for VMs with just normal pages this is equivalent to the + * host page size. If it's got some huge pages then it's the OR + * of all the different page sizes. + */ +uint64_t ram_pagesize_summary(void) +{ + RAMBlock *block; + uint64_t summary = 0; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + summary |= block->page_size; + } + + return summary; +} + static void migration_bitmap_sync(void) { RAMBlock *block; @@ -1285,6 +1302,8 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f, * offset to point into the middle of a host page * in which case the remainder of the hostpage is sent. * Only dirty target pages are sent. + * Note that the host page size may be a huge page for this + * block. * * Returns: Number of pages written. * @@ -1303,6 +1322,8 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f, ram_addr_t dirty_ram_abs) { int tmppages, pages = 0; + size_t pagesize = qemu_ram_pagesize(pss->block); + do { tmppages = ram_save_target_page(ms, f, pss, last_stage, bytes_transferred, dirty_ram_abs); @@ -1313,7 +1334,7 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f, pages += tmppages; pss->offset += TARGET_PAGE_SIZE; dirty_ram_abs += TARGET_PAGE_SIZE; - } while (pss->offset & (qemu_host_page_size - 1)); + } while (pss->offset & (pagesize - 1)); /* The offset we leave with is the last one we looked at */ pss->offset -= TARGET_PAGE_SIZE; @@ -1655,12 +1676,17 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, { unsigned long *bitmap; unsigned long *unsentmap; - unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE; + unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; unsigned long first = block->offset >> TARGET_PAGE_BITS; unsigned long len = block->used_length >> TARGET_PAGE_BITS; unsigned long last = first + (len - 1); unsigned long run_start; + if (block->page_size == TARGET_PAGE_SIZE) { + /* Easy case - TPS==HPS for a non-huge page RAMBlock */ + return; + } + bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; @@ -1764,7 +1790,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, * Utility for the outgoing postcopy code. * * Discard any partially sent host-page size chunks, mark any partially - * dirty host-page size chunks as all dirty. + * dirty host-page size chunks as all dirty. In this case the host-page + * is the host-page for the particular RAMBlock, i.e. it might be a huge page * * Returns: 0 on success */ @@ -1772,11 +1799,6 @@ static int postcopy_chunk_hostpages(MigrationState *ms) { struct RAMBlock *block; - if (qemu_host_page_size == TARGET_PAGE_SIZE) { - /* Easy case - TPS==HPS - nothing to be done */ - return 0; - } - /* Easiest way to make sure we don't resume in the middle of a host-page */ last_seen_block = NULL; last_sent_block = NULL; @@ -1832,7 +1854,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) return -EINVAL; } - /* Deal with TPS != HPS */ + /* Deal with TPS != HPS and huge pages */ ret = postcopy_chunk_hostpages(ms); if (ret) { rcu_read_unlock(); @@ -1872,6 +1894,8 @@ int ram_discard_range(MigrationIncomingState *mis, { int ret = -1; + trace_ram_discard_range(block_name, start, length); + rcu_read_lock(); RAMBlock *rb = qemu_ram_block_by_name(block_name); @@ -1881,27 +1905,7 @@ int ram_discard_range(MigrationIncomingState *mis, goto err; } - uint8_t *host_startaddr = rb->host + start; - - if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) { - error_report("ram_discard_range: Unaligned start address: %p", - host_startaddr); - goto err; - } - - if ((start + length) <= rb->used_length) { - uint8_t *host_endaddr = host_startaddr + length; - if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) { - error_report("ram_discard_range: Unaligned end address: %p", - host_endaddr); - goto err; - } - ret = postcopy_ram_discard_range(mis, host_startaddr, length); - } else { - error_report("ram_discard_range: Overrun block '%s' (%" PRIu64 - "/%zx/" RAM_ADDR_FMT")", - block_name, start, length, rb->used_length); - } + ret = ram_block_discard_range(rb, start, length); err: rcu_read_unlock(); @@ -2010,6 +2014,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque) qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); qemu_put_be64(f, block->used_length); + if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { + qemu_put_be64(f, block->page_size); + } } rcu_read_unlock(); @@ -2387,7 +2394,7 @@ static int ram_load_postcopy(QEMUFile *f) { int flags = 0, ret = 0; bool place_needed = false; - bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE; + bool matching_page_sizes = false; MigrationIncomingState *mis = migration_incoming_get_current(); /* Temporary page that is later 'placed' */ void *postcopy_host_page = postcopy_get_tmp_page(mis); @@ -2399,6 +2406,7 @@ static int ram_load_postcopy(QEMUFile *f) void *host = NULL; void *page_buffer = NULL; void *place_source = NULL; + RAMBlock *block = NULL; uint8_t ch; addr = qemu_get_be64(f); @@ -2408,7 +2416,7 @@ static int ram_load_postcopy(QEMUFile *f) trace_ram_load_postcopy_loop((uint64_t)addr, flags); place_needed = false; if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) { - RAMBlock *block = ram_block_from_stream(f, flags); + block = ram_block_from_stream(f, flags); host = host_from_ram_block_offset(block, addr); if (!host) { @@ -2416,8 +2424,11 @@ static int ram_load_postcopy(QEMUFile *f) ret = -EINVAL; break; } + matching_page_sizes = block->page_size == TARGET_PAGE_SIZE; /* - * Postcopy requires that we place whole host pages atomically. + * Postcopy requires that we place whole host pages atomically; + * these may be huge pages for RAMBlocks that are backed by + * hugetlbfs. * To make it atomic, the data is read into a temporary page * that's moved into place later. * The migration protocol uses, possibly smaller, target-pages @@ -2425,9 +2436,9 @@ static int ram_load_postcopy(QEMUFile *f) * of a host page in order. */ page_buffer = postcopy_host_page + - ((uintptr_t)host & ~qemu_host_page_mask); + ((uintptr_t)host & (block->page_size - 1)); /* If all TP are zero then we can optimise the place */ - if (!((uintptr_t)host & ~qemu_host_page_mask)) { + if (!((uintptr_t)host & (block->page_size - 1))) { all_zero = true; } else { /* not the 1st TP within the HP */ @@ -2445,7 +2456,7 @@ static int ram_load_postcopy(QEMUFile *f) * page */ place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & - ~qemu_host_page_mask) == 0; + (block->page_size - 1)) == 0; place_source = postcopy_host_page; } last_host = host; @@ -2483,14 +2494,14 @@ static int ram_load_postcopy(QEMUFile *f) if (place_needed) { /* This gets called at the last target page in the host page */ + void *place_dest = host + TARGET_PAGE_SIZE - block->page_size; + if (all_zero) { - ret = postcopy_place_page_zero(mis, - host + TARGET_PAGE_SIZE - - qemu_host_page_size); + ret = postcopy_place_page_zero(mis, place_dest, + block->page_size); } else { - ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE - - qemu_host_page_size, - place_source); + ret = postcopy_place_page(mis, place_dest, + place_source, block->page_size); } } if (!ret) { @@ -2511,6 +2522,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) * be atomic */ bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING; + /* ADVISE is earlier, it shows the source has the postcopy capability on */ + bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE; seq_iter++; @@ -2575,6 +2588,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) error_report_err(local_err); } } + /* For postcopy we need to check hugepage sizes match */ + if (postcopy_advised && + block->page_size != qemu_host_page_size) { + uint64_t remote_page_size = qemu_get_be64(f); + if (remote_page_size != block->page_size) { + error_report("Mismatched RAM page size %s " + "(local) %zd != %" PRId64, + id, block->page_size, + remote_page_size); + ret = -EINVAL; + } + } ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, block->idstr); } else { diff --git a/migration/savevm.c b/migration/savevm.c index 5ecd264..3b19a4a 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -688,6 +688,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, return -1; } + g_free(id); se->compat = g_new0(CompatEntry, 1); pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); @@ -869,7 +870,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len) void qemu_savevm_send_postcopy_advise(QEMUFile *f) { uint64_t tmp[2]; - tmp[0] = cpu_to_be64(getpagesize()); + tmp[0] = cpu_to_be64(ram_pagesize_summary()); tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits()); trace_qemu_savevm_send_postcopy_advise(); @@ -1276,6 +1277,11 @@ done: status = MIGRATION_STATUS_COMPLETED; } migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status); + + /* f is outer parameter, it should not stay in global migration state after + * this function finished */ + ms->to_dst_file = NULL; + return ret; } @@ -1346,7 +1352,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) { PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); - uint64_t remote_hps, remote_tps; + uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps; trace_loadvm_postcopy_handle_advise(); if (ps != POSTCOPY_INCOMING_NONE) { @@ -1359,17 +1365,27 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) return -1; } - remote_hps = qemu_get_be64(mis->from_src_file); - if (remote_hps != getpagesize()) { + remote_pagesize_summary = qemu_get_be64(mis->from_src_file); + local_pagesize_summary = ram_pagesize_summary(); + + if (remote_pagesize_summary != local_pagesize_summary) { /* - * Some combinations of mismatch are probably possible but it gets - * a bit more complicated. In particular we need to place whole - * host pages on the dest at once, and we need to ensure that we - * handle dirtying to make sure we never end up sending part of - * a hostpage on it's own. + * This detects two potential causes of mismatch: + * a) A mismatch in host page sizes + * Some combinations of mismatch are probably possible but it gets + * a bit more complicated. In particular we need to place whole + * host pages on the dest at once, and we need to ensure that we + * handle dirtying to make sure we never end up sending part of + * a hostpage on it's own. + * b) The use of different huge page sizes on source/destination + * a more fine grain test is performed during RAM block migration + * but this test here causes a nice early clear failure, and + * also fails when passed to an older qemu that doesn't + * do huge pages. */ - error_report("Postcopy needs matching host page sizes (s=%d d=%d)", - (int)remote_hps, getpagesize()); + error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64 + " d=%" PRIx64 ")", + remote_pagesize_summary, local_pagesize_summary); return -1; } diff --git a/migration/trace-events b/migration/trace-events index fa660e3..7372ce2 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -68,6 +68,7 @@ get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 migration_throttle(void) "" +ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx" ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x" ram_postcopy_send_discard_bitmap(void) "" ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx" @@ -176,7 +177,6 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) "" # migration/postcopy-ram.c postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands" postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx" -postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx" postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" diff --git a/migration/vmstate.c b/migration/vmstate.c index b4d8ae9..78b3cd4 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -52,29 +52,15 @@ static int vmstate_size(void *opaque, VMStateField *field) return size; } -static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc) +static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) { - void *base_addr = opaque + field->offset; - - if (field->flags & VMS_POINTER) { - if (alloc && (field->flags & VMS_ALLOC)) { - gsize size = 0; - if (field->flags & VMS_VBUFFER) { - size = vmstate_size(opaque, field); - } else { - int n_elems = vmstate_n_elems(opaque, field); - if (n_elems) { - size = n_elems * field->size; - } - } - if (size) { - *(void **)base_addr = g_malloc(size); - } + if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { + gsize size = vmstate_size(opaque, field); + size *= vmstate_n_elems(opaque, field); + if (size) { + *(void **)ptr = g_malloc(size); } - base_addr = *(void **)base_addr; } - - return base_addr; } int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, @@ -116,21 +102,30 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, field->field_exists(opaque, version_id)) || (!field->field_exists && field->version_id <= version_id)) { - void *base_addr = vmstate_base_addr(opaque, field, true); + void *first_elem = opaque + field->offset; int i, n_elems = vmstate_n_elems(opaque, field); int size = vmstate_size(opaque, field); + vmstate_handle_alloc(first_elem, field, opaque); + if (field->flags & VMS_POINTER) { + first_elem = *(void **)first_elem; + assert(first_elem || !n_elems); + } for (i = 0; i < n_elems; i++) { - void *addr = base_addr + size * i; + void *curr_elem = first_elem + size * i; if (field->flags & VMS_ARRAY_OF_POINTER) { - addr = *(void **)addr; + curr_elem = *(void **)curr_elem; } - if (field->flags & VMS_STRUCT) { - ret = vmstate_load_state(f, field->vmsd, addr, + if (!curr_elem) { + /* if null pointer check placeholder and do not follow */ + assert(field->flags & VMS_ARRAY_OF_POINTER); + ret = vmstate_info_nullptr.get(f, curr_elem, size, NULL); + } else if (field->flags & VMS_STRUCT) { + ret = vmstate_load_state(f, field->vmsd, curr_elem, field->vmsd->version_id); } else { - ret = field->info->get(f, addr, size, field); + ret = field->info->get(f, curr_elem, size, field); } if (ret >= 0) { ret = qemu_file_get_error(f); @@ -321,26 +316,34 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, while (field->name) { if (!field->field_exists || field->field_exists(opaque, vmsd->version_id)) { - void *base_addr = vmstate_base_addr(opaque, field, false); + void *first_elem = opaque + field->offset; int i, n_elems = vmstate_n_elems(opaque, field); int size = vmstate_size(opaque, field); int64_t old_offset, written_bytes; QJSON *vmdesc_loop = vmdesc; trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems); + if (field->flags & VMS_POINTER) { + first_elem = *(void **)first_elem; + assert(first_elem || !n_elems); + } for (i = 0; i < n_elems; i++) { - void *addr = base_addr + size * i; + void *curr_elem = first_elem + size * i; vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems); old_offset = qemu_ftell_fast(f); - if (field->flags & VMS_ARRAY_OF_POINTER) { - addr = *(void **)addr; + assert(curr_elem); + curr_elem = *(void **)curr_elem; } - if (field->flags & VMS_STRUCT) { - vmstate_save_state(f, field->vmsd, addr, vmdesc_loop); + if (!curr_elem) { + /* if null pointer write placeholder and do not follow */ + assert(field->flags & VMS_ARRAY_OF_POINTER); + vmstate_info_nullptr.put(f, curr_elem, size, NULL, NULL); + } else if (field->flags & VMS_STRUCT) { + vmstate_save_state(f, field->vmsd, curr_elem, vmdesc_loop); } else { - field->info->put(f, addr, size, field, vmdesc_loop); + field->info->put(f, curr_elem, size, field, vmdesc_loop); } written_bytes = qemu_ftell_fast(f) - old_offset; @@ -752,6 +755,34 @@ const VMStateInfo vmstate_info_uint64 = { .put = put_uint64, }; +static int get_nullptr(QEMUFile *f, void *pv, size_t size, VMStateField *field) + +{ + if (qemu_get_byte(f) == VMS_NULLPTR_MARKER) { + return 0; + } + error_report("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); + return -EINVAL; +} + +static int put_nullptr(QEMUFile *f, void *pv, size_t size, + VMStateField *field, QJSON *vmdesc) + +{ + if (pv == NULL) { + qemu_put_byte(f, VMS_NULLPTR_MARKER); + return 0; + } + error_report("vmstate: put_nullptr must be called with pv == NULL"); + return -EINVAL; +} + +const VMStateInfo vmstate_info_nullptr = { + .name = "uint64", + .get = get_nullptr, + .put = put_nullptr, +}; + /* 64 bit unsigned int. See that the received value is the same than the one in the field */ @@ -984,8 +984,10 @@ static void qmp_unregister_commands_hack(void) #ifndef TARGET_ARM qmp_unregister_command("query-gic-capabilities"); #endif -#if !defined(TARGET_S390X) +#if !defined(TARGET_S390X) && !defined(TARGET_I386) qmp_unregister_command("query-cpu-model-expansion"); +#endif +#if !defined(TARGET_S390X) qmp_unregister_command("query-cpu-model-baseline"); qmp_unregister_command("query-cpu-model-comparison"); #endif diff --git a/nbd/server.c b/nbd/server.c index ac92fa0..924a1fe 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -891,9 +891,21 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, { BlockBackend *blk; NBDExport *exp = g_malloc0(sizeof(NBDExport)); + uint64_t perm; + int ret; - blk = blk_new(); - blk_insert_bs(blk, bs); + /* Don't allow resize while the NBD server is running, otherwise we don't + * care what happens with the node. */ + perm = BLK_PERM_CONSISTENT_READ; + if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { + perm |= BLK_PERM_WRITE; + } + blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto fail; + } blk_set_enable_write_cache(blk, !writethrough); exp->refcount = 1; diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin Binary files differindex 229b5af..18666c9 100644 --- a/pc-bios/bios-256k.bin +++ b/pc-bios/bios-256k.bin diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin Binary files differindex 9a9b0f0..a394411 100644 --- a/pc-bios/bios.bin +++ b/pc-bios/bios.bin diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc Binary files differindex 95f1167..4869c9d 100644 --- a/pc-bios/openbios-ppc +++ b/pc-bios/openbios-ppc diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32 Binary files differindex 675968e..aada55e 100644 --- a/pc-bios/openbios-sparc32 +++ b/pc-bios/openbios-sparc32 diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64 Binary files differindex d4b9532..cf466f6 100644 --- a/pc-bios/openbios-sparc64 +++ b/pc-bios/openbios-sparc64 diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img Binary files differindex cf05bf0..2a4adfa 100644 --- a/pc-bios/s390-ccw.img +++ b/pc-bios/s390-ccw.img diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index 611102e..b21c877 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -724,11 +724,17 @@ static void zipl_load_vscsi(void) void zipl_load(void) { - if (virtio_get_device()->is_cdrom) { + VDev *vdev = virtio_get_device(); + + if (vdev->is_cdrom) { ipl_iso_el_torito(); panic("\n! Cannot IPL this ISO image !\n"); } + if (virtio_get_device_type() == VIRTIO_ID_NET) { + jump_to_IPL_code(vdev->netboot_start_addr); + } + ipl_scsi(); switch (virtio_get_device_type()) { diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h index 86abc56..890aed9 100644 --- a/pc-bios/s390-ccw/iplb.h +++ b/pc-bios/s390-ccw/iplb.h @@ -13,7 +13,8 @@ #define IPLB_H struct IplBlockCcw { - uint8_t reserved0[85]; + uint64_t netboot_start_addr; + uint8_t reserved0[77]; uint8_t ssid; uint16_t devno; uint8_t vm_flags; diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c index 345b848..0946766 100644 --- a/pc-bios/s390-ccw/main.c +++ b/pc-bios/s390-ccw/main.c @@ -53,6 +53,12 @@ static bool find_dev(Schib *schib, int dev_no) if (!virtio_is_supported(blk_schid)) { continue; } + /* Skip net devices since no IPLB is created and therefore no + * no network bootloader has been loaded + */ + if (virtio_get_device_type() == VIRTIO_ID_NET && dev_no < 0) { + continue; + } if ((dev_no < 0) || (schib->pmcw.dev == dev_no)) { return true; } @@ -67,6 +73,7 @@ static void virtio_setup(void) int ssid; bool found = false; uint16_t dev_no; + VDev *vdev = virtio_get_device(); /* * We unconditionally enable mss support. In every sane configuration, @@ -85,9 +92,6 @@ static void virtio_setup(void) found = find_dev(&schib, dev_no); break; case S390_IPL_TYPE_QEMU_SCSI: - { - VDev *vdev = virtio_get_device(); - vdev->scsi_device_selected = true; vdev->selected_scsi_device.channel = iplb.scsi.channel; vdev->selected_scsi_device.target = iplb.scsi.target; @@ -95,7 +99,6 @@ static void virtio_setup(void) blk_schid.ssid = iplb.scsi.ssid & 0x3; found = find_dev(&schib, iplb.scsi.devno); break; - } default: panic("List-directed IPL not supported yet!\n"); } @@ -111,9 +114,14 @@ static void virtio_setup(void) IPL_assert(found, "No virtio device found"); - virtio_setup_device(blk_schid); + if (virtio_get_device_type() == VIRTIO_ID_NET) { + sclp_print("Network boot device detected\n"); + vdev->netboot_start_addr = iplb.ccw.netboot_start_addr; + } else { + virtio_setup_device(blk_schid); - IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + } } int main(void) diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c index b333734..6ee93d5 100644 --- a/pc-bios/s390-ccw/virtio.c +++ b/pc-bios/s390-ccw/virtio.c @@ -585,6 +585,7 @@ bool virtio_is_supported(SubChannelId schid) switch (vdev.senseid.cu_model) { case VIRTIO_ID_BLOCK: case VIRTIO_ID_SCSI: + case VIRTIO_ID_NET: return true; } } diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h index eb35ea5..3388a42 100644 --- a/pc-bios/s390-ccw/virtio.h +++ b/pc-bios/s390-ccw/virtio.h @@ -276,6 +276,7 @@ struct VDev { uint8_t scsi_dev_heads; bool scsi_device_selected; ScsiDevice selected_scsi_device; + uint64_t netboot_start_addr; }; typedef struct VDev VDev; diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin Binary files differindex 9dadce2..e6c42bd 100644 --- a/pc-bios/vgabios-cirrus.bin +++ b/pc-bios/vgabios-cirrus.bin diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin Binary files differindex a89725c..915eba7 100644 --- a/pc-bios/vgabios-qxl.bin +++ b/pc-bios/vgabios-qxl.bin diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin Binary files differindex ea04141..40eca8c 100644 --- a/pc-bios/vgabios-stdvga.bin +++ b/pc-bios/vgabios-stdvga.bin diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin Binary files differindex 71e22fc..8b3abfa 100644 --- a/pc-bios/vgabios-virtio.bin +++ b/pc-bios/vgabios-virtio.bin diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin Binary files differindex ad239cb..6a90b94 100644 --- a/pc-bios/vgabios-vmware.bin +++ b/pc-bios/vgabios-vmware.bin diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin Binary files differindex 9947c2c..d3ed89d 100644 --- a/pc-bios/vgabios.bin +++ b/pc-bios/vgabios.bin diff --git a/qapi-schema.json b/qapi-schema.json index 150ee98..5eb1b8b 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4274,6 +4274,15 @@ # migration-safe, but allows tooling to get an insight and work with # model details. # +# Note: When a non-migration-safe CPU model is expanded in static mode, some +# features enabled by the CPU model may be omitted, because they can't be +# implemented by a static CPU model definition (e.g. cache info passthrough and +# PMU passthrough in x86). If you need an accurate representation of the +# features enabled by a non-migration-safe CPU model, use @full. If you need a +# static representation that will keep ABI compatibility even when changing QEMU +# version or machine-type, use @static (but keep in mind that some features may +# be omitted). +# # Since: 2.8.0 ## { 'enum': 'CpuModelExpansionType', @@ -5990,6 +5999,79 @@ { 'command': 'xen-load-devices-state', 'data': {'filename': 'str'} } ## +# @xen-set-replication: +# +# Enable or disable replication. +# +# @enable: true to enable, false to disable. +# +# @primary: true for primary or false for secondary. +# +# @failover: #optional true to do failover, false to stop. but cannot be +# specified if 'enable' is true. default value is false. +# +# Returns: nothing. +# +# Example: +# +# -> { "execute": "xen-set-replication", +# "arguments": {"enable": true, "primary": false} } +# <- { "return": {} } +# +# Since: 2.9 +## +{ 'command': 'xen-set-replication', + 'data': { 'enable': 'bool', 'primary': 'bool', '*failover' : 'bool' } } + +## +# @ReplicationStatus: +# +# The result format for 'query-xen-replication-status'. +# +# @error: true if an error happened, false if replication is normal. +# +# @desc: #optional the human readable error description string, when +# @error is 'true'. +# +# Since: 2.9 +## +{ 'struct': 'ReplicationStatus', + 'data': { 'error': 'bool', '*desc': 'str' } } + +## +# @query-xen-replication-status: +# +# Query replication status while the vm is running. +# +# Returns: A @ReplicationResult object showing the status. +# +# Example: +# +# -> { "execute": "query-xen-replication-status" } +# <- { "return": { "error": false } } +# +# Since: 2.9 +## +{ 'command': 'query-xen-replication-status', + 'returns': 'ReplicationStatus' } + +## +# @xen-colo-do-checkpoint: +# +# Xen uses this command to notify replication to trigger a checkpoint. +# +# Returns: nothing. +# +# Example: +# +# -> { "execute": "xen-colo-do-checkpoint" } +# <- { "return": {} } +# +# Since: 2.9 +## +{ 'command': 'xen-colo-do-checkpoint' } + +## # @GICCapability: # # The struct describes capability for a specific GIC (Generic diff --git a/qapi/block-core.json b/qapi/block-core.json index cf24c04..5cc992f 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1304,6 +1304,11 @@ # # @speed: #optional the maximum speed, in bytes per second # +# @filter-node-name: #optional the node name that should be assigned to the +# filter driver that the commit job inserts into the graph +# above @top. If this option is not given, a node name is +# autogenerated. (Since: 2.9) +# # Returns: Nothing on success # If commit or stream is already active on this device, DeviceInUse # If @device does not exist, DeviceNotFound @@ -1323,7 +1328,8 @@ ## { 'command': 'block-commit', 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str', - '*backing-file': 'str', '*speed': 'int' } } + '*backing-file': 'str', '*speed': 'int', + '*filter-node-name': 'str' } } ## # @drive-backup: @@ -1671,6 +1677,11 @@ # default 'report' (no limitations, since this applies to # a different block device than @device). # +# @filter-node-name: #optional the node name that should be assigned to the +# filter driver that the mirror job inserts into the graph +# above @device. If this option is not given, a node name is +# autogenerated. (Since: 2.9) +# # Returns: nothing on success. # # Since: 2.6 @@ -1690,7 +1701,8 @@ 'sync': 'MirrorSyncMode', '*speed': 'int', '*granularity': 'uint32', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', - '*on-target-error': 'BlockdevOnError' } } + '*on-target-error': 'BlockdevOnError', + '*filter-node-name': 'str' } } ## # @block_set_io_throttle: diff --git a/qdev-monitor.c b/qdev-monitor.c index 549f45f..5f2fcdf 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -29,7 +29,6 @@ #include "qemu/error-report.h" #include "qemu/help_option.h" #include "sysemu/block-backend.h" -#include "migration/migration.h" /* * Aliases were a bad idea from the start. Let's keep them @@ -579,14 +578,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) return NULL; } - if (only_migratable) { - if (dc->vmsd->unmigratable) { - error_setg(errp, "Device %s is not migratable, but " - "--only-migratable was specified", driver); - return NULL; - } - } - /* find bus */ path = qemu_opt_get(opts, "bus"); if (path != NULL) { diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index f054599..9c9702c 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -40,9 +40,9 @@ STEXI ETEXI DEF("convert", img_convert, - "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename") + "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename") STEXI -@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI DEF("dd", img_dd, @@ -156,6 +156,11 @@ static void QEMU_NORETURN help(void) " kinds of errors, with a higher risk of choosing the wrong fix or\n" " hiding corruption that has already occurred.\n" "\n" + "Parameters to convert subcommand:\n" + " '-m' specifies how many coroutines work in parallel during the convert\n" + " process (defaults to 8)\n" + " '-W' allow to write to the target out of order rather than sequential\n" + "\n" "Parameters to snapshot subcommand:\n" " 'snapshot' is the name of the snapshot to create, apply or delete\n" " '-a' applies a snapshot (revert disk to saved state)\n" @@ -809,6 +814,8 @@ static void run_block_job(BlockJob *job, Error **errp) { AioContext *aio_context = blk_get_aio_context(job->blk); + /* FIXME In error cases, the job simply goes away and we access a dangling + * pointer below. */ aio_context_acquire(aio_context); do { aio_poll(aio_context, true); @@ -830,6 +837,7 @@ static int img_commit(int argc, char **argv) const char *filename, *fmt, *cache, *base; BlockBackend *blk; BlockDriverState *bs, *base_bs; + BlockJob *job; bool progress = false, quiet = false, drop = false; bool writethrough; Error *local_err = NULL; @@ -950,8 +958,8 @@ static int img_commit(int argc, char **argv) aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0, - BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi, - &local_err, false); + BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb, + &cbi, &local_err, false); aio_context_release(aio_context); if (local_err) { goto done; @@ -965,7 +973,8 @@ static int img_commit(int argc, char **argv) bdrv_ref(bs); } - run_block_job(bs->job, &local_err); + job = block_job_get("commit"); + run_block_job(job, &local_err); if (local_err) { goto unref_backing; } @@ -1462,48 +1471,61 @@ enum ImgConvertBlockStatus { BLK_BACKING_FILE, }; +#define MAX_COROUTINES 16 + typedef struct ImgConvertState { BlockBackend **src; int64_t *src_sectors; - int src_cur, src_num; - int64_t src_cur_offset; + int src_num; int64_t total_sectors; int64_t allocated_sectors; + int64_t allocated_done; + int64_t sector_num; + int64_t wr_offs; enum ImgConvertBlockStatus status; int64_t sector_next_status; BlockBackend *target; bool has_zero_init; bool compressed; bool target_has_backing; + bool wr_in_order; int min_sparse; size_t cluster_sectors; size_t buf_sectors; + int num_coroutines; + int running_coroutines; + Coroutine *co[MAX_COROUTINES]; + int64_t wait_sector_num[MAX_COROUTINES]; + CoMutex lock; + int ret; } ImgConvertState; -static void convert_select_part(ImgConvertState *s, int64_t sector_num) +static void convert_select_part(ImgConvertState *s, int64_t sector_num, + int *src_cur, int64_t *src_cur_offset) { - assert(sector_num >= s->src_cur_offset); - while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) { - s->src_cur_offset += s->src_sectors[s->src_cur]; - s->src_cur++; - assert(s->src_cur < s->src_num); + *src_cur = 0; + *src_cur_offset = 0; + while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) { + *src_cur_offset += s->src_sectors[*src_cur]; + (*src_cur)++; + assert(*src_cur < s->src_num); } } static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) { - int64_t ret; - int n; + int64_t ret, src_cur_offset; + int n, src_cur; - convert_select_part(s, sector_num); + convert_select_part(s, sector_num, &src_cur, &src_cur_offset); assert(s->total_sectors > sector_num); n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); if (s->sector_next_status <= sector_num) { BlockDriverState *file; - ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]), - sector_num - s->src_cur_offset, + ret = bdrv_get_block_status(blk_bs(s->src[src_cur]), + sector_num - src_cur_offset, n, &n, &file); if (ret < 0) { return ret; @@ -1519,8 +1541,8 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) /* Check block status of the backing file chain to avoid * needlessly reading zeroes and limiting the iteration to the * buffer size */ - ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL, - sector_num - s->src_cur_offset, + ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL, + sector_num - src_cur_offset, n, &n, &file); if (ret < 0) { return ret; @@ -1558,28 +1580,34 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) return n; } -static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors, - uint8_t *buf) +static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num, + int nb_sectors, uint8_t *buf) { - int n; - int ret; + int n, ret; + QEMUIOVector qiov; + struct iovec iov; assert(nb_sectors <= s->buf_sectors); while (nb_sectors > 0) { BlockBackend *blk; - int64_t bs_sectors; + int src_cur; + int64_t bs_sectors, src_cur_offset; /* In the case of compression with multiple source files, we can get a * nb_sectors that spreads into the next part. So we must be able to * read across multiple BDSes for one convert_read() call. */ - convert_select_part(s, sector_num); - blk = s->src[s->src_cur]; - bs_sectors = s->src_sectors[s->src_cur]; - - n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset)); - ret = blk_pread(blk, - (sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS); + convert_select_part(s, sector_num, &src_cur, &src_cur_offset); + blk = s->src[src_cur]; + bs_sectors = s->src_sectors[src_cur]; + + n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset)); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_preadv( + blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, 0); if (ret < 0) { return ret; } @@ -1592,15 +1620,18 @@ static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors, return 0; } -static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, - const uint8_t *buf) + +static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, + int nb_sectors, uint8_t *buf, + enum ImgConvertBlockStatus status) { int ret; + QEMUIOVector qiov; + struct iovec iov; while (nb_sectors > 0) { int n = nb_sectors; - - switch (s->status) { + switch (status) { case BLK_BACKING_FILE: /* If we have a backing file, leave clusters unallocated that are * unallocated in the source image, so that the backing file is @@ -1621,9 +1652,13 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, break; } - ret = blk_pwrite_compressed(s->target, - sector_num << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, + BDRV_REQ_WRITE_COMPRESSED); if (ret < 0) { return ret; } @@ -1636,8 +1671,12 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, if (!s->min_sparse || is_allocated_sectors_min(buf, n, &n, s->min_sparse)) { - ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS, 0); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, 0); if (ret < 0) { return ret; } @@ -1649,8 +1688,9 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, if (s->has_zero_init) { break; } - ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS, - n << BDRV_SECTOR_BITS, 0); + ret = blk_co_pwrite_zeroes(s->target, + sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, 0); if (ret < 0) { return ret; } @@ -1665,12 +1705,122 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, return 0; } -static int convert_do_copy(ImgConvertState *s) +static void coroutine_fn convert_co_do_copy(void *opaque) { + ImgConvertState *s = opaque; uint8_t *buf = NULL; - int64_t sector_num, allocated_done; - int ret; - int n; + int ret, i; + int index = -1; + + for (i = 0; i < s->num_coroutines; i++) { + if (s->co[i] == qemu_coroutine_self()) { + index = i; + break; + } + } + assert(index >= 0); + + s->running_coroutines++; + buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE); + + while (1) { + int n; + int64_t sector_num; + enum ImgConvertBlockStatus status; + + qemu_co_mutex_lock(&s->lock); + if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) { + qemu_co_mutex_unlock(&s->lock); + goto out; + } + n = convert_iteration_sectors(s, s->sector_num); + if (n < 0) { + qemu_co_mutex_unlock(&s->lock); + s->ret = n; + goto out; + } + /* save current sector and allocation status to local variables */ + sector_num = s->sector_num; + status = s->status; + if (!s->min_sparse && s->status == BLK_ZERO) { + n = MIN(n, s->buf_sectors); + } + /* increment global sector counter so that other coroutines can + * already continue reading beyond this request */ + s->sector_num += n; + qemu_co_mutex_unlock(&s->lock); + + if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) { + s->allocated_done += n; + qemu_progress_print(100.0 * s->allocated_done / + s->allocated_sectors, 0); + } + + if (status == BLK_DATA) { + ret = convert_co_read(s, sector_num, n, buf); + if (ret < 0) { + error_report("error while reading sector %" PRId64 + ": %s", sector_num, strerror(-ret)); + s->ret = ret; + goto out; + } + } else if (!s->min_sparse && status == BLK_ZERO) { + status = BLK_DATA; + memset(buf, 0x00, n * BDRV_SECTOR_SIZE); + } + + if (s->wr_in_order) { + /* keep writes in order */ + while (s->wr_offs != sector_num) { + if (s->ret != -EINPROGRESS) { + goto out; + } + s->wait_sector_num[index] = sector_num; + qemu_coroutine_yield(); + } + s->wait_sector_num[index] = -1; + } + + ret = convert_co_write(s, sector_num, n, buf, status); + if (ret < 0) { + error_report("error while writing sector %" PRId64 + ": %s", sector_num, strerror(-ret)); + s->ret = ret; + goto out; + } + + if (s->wr_in_order) { + /* reenter the coroutine that might have waited + * for this write to complete */ + s->wr_offs = sector_num + n; + for (i = 0; i < s->num_coroutines; i++) { + if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) { + /* + * A -> B -> A cannot occur because A has + * s->wait_sector_num[i] == -1 during A -> B. Therefore + * B will never enter A during this time window. + */ + qemu_coroutine_enter(s->co[i]); + break; + } + } + } + } + +out: + qemu_vfree(buf); + s->co[index] = NULL; + s->running_coroutines--; + if (!s->running_coroutines && s->ret == -EINPROGRESS) { + /* the convert job finished successfully */ + s->ret = 0; + } +} + +static int convert_do_copy(ImgConvertState *s) +{ + int ret, i, n; + int64_t sector_num = 0; /* Check whether we have zero initialisation or can get it efficiently */ s->has_zero_init = s->min_sparse && !s->target_has_backing @@ -1691,21 +1841,15 @@ static int convert_do_copy(ImgConvertState *s) if (s->compressed) { if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) { error_report("invalid cluster size"); - ret = -EINVAL; - goto fail; + return -EINVAL; } s->buf_sectors = s->cluster_sectors; } - buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE); - /* Calculate allocated sectors for progress */ - s->allocated_sectors = 0; - sector_num = 0; while (sector_num < s->total_sectors) { n = convert_iteration_sectors(s, sector_num); if (n < 0) { - ret = n; - goto fail; + return n; } if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO)) { @@ -1715,61 +1859,29 @@ static int convert_do_copy(ImgConvertState *s) } /* Do the copy */ - s->src_cur = 0; - s->src_cur_offset = 0; s->sector_next_status = 0; + s->ret = -EINPROGRESS; - sector_num = 0; - allocated_done = 0; - - while (sector_num < s->total_sectors) { - n = convert_iteration_sectors(s, sector_num); - if (n < 0) { - ret = n; - goto fail; - } - if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO)) - { - allocated_done += n; - qemu_progress_print(100.0 * allocated_done / s->allocated_sectors, - 0); - } - - if (s->status == BLK_DATA) { - ret = convert_read(s, sector_num, n, buf); - if (ret < 0) { - error_report("error while reading sector %" PRId64 - ": %s", sector_num, strerror(-ret)); - goto fail; - } - } else if (!s->min_sparse && s->status == BLK_ZERO) { - n = MIN(n, s->buf_sectors); - memset(buf, 0, n * BDRV_SECTOR_SIZE); - s->status = BLK_DATA; - } - - ret = convert_write(s, sector_num, n, buf); - if (ret < 0) { - error_report("error while writing sector %" PRId64 - ": %s", sector_num, strerror(-ret)); - goto fail; - } + qemu_co_mutex_init(&s->lock); + for (i = 0; i < s->num_coroutines; i++) { + s->co[i] = qemu_coroutine_create(convert_co_do_copy, s); + s->wait_sector_num[i] = -1; + qemu_coroutine_enter(s->co[i]); + } - sector_num += n; + while (s->ret == -EINPROGRESS) { + main_loop_wait(false); } - if (s->compressed) { + if (s->compressed && !s->ret) { /* signal EOF to align */ ret = blk_pwrite_compressed(s->target, 0, NULL, 0); if (ret < 0) { - goto fail; + return ret; } } - ret = 0; -fail: - qemu_vfree(buf); - return ret; + return s->ret; } static int img_convert(int argc, char **argv) @@ -1797,6 +1909,8 @@ static int img_convert(int argc, char **argv) QemuOpts *sn_opts = NULL; ImgConvertState state; bool image_opts = false; + bool wr_in_order = true; + long num_coroutines = 8; fmt = NULL; out_fmt = "raw"; @@ -1812,7 +1926,7 @@ static int img_convert(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn", + c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W", long_options, NULL); if (c == -1) { break; @@ -1904,6 +2018,18 @@ static int img_convert(int argc, char **argv) case 'n': skip_create = 1; break; + case 'm': + if (qemu_strtol(optarg, NULL, 0, &num_coroutines) || + num_coroutines < 1 || num_coroutines > MAX_COROUTINES) { + error_report("Invalid number of coroutines. Allowed number of" + " coroutines is between 1 and %d", MAX_COROUTINES); + ret = -1; + goto fail_getopt; + } + break; + case 'W': + wr_in_order = false; + break; case OPTION_OBJECT: opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); @@ -1923,6 +2049,12 @@ static int img_convert(int argc, char **argv) goto fail_getopt; } + if (!wr_in_order && compress) { + error_report("Out of order write and compress are mutually exclusive"); + ret = -1; + goto fail_getopt; + } + /* Initialize before goto out */ if (quiet) { progress = 0; @@ -2163,6 +2295,8 @@ static int img_convert(int argc, char **argv) .min_sparse = min_sparse, .cluster_sectors = cluster_sectors, .buf_sectors = bufsectors, + .wr_in_order = wr_in_order, + .num_coroutines = num_coroutines, }; ret = convert_do_copy(&state); @@ -3289,7 +3423,7 @@ static int img_resize(int argc, char **argv) qemu_opts_del(param); blk = img_open(image_opts, filename, fmt, - BDRV_O_RDWR, false, quiet); + BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet); if (!blk) { ret = -1; goto out; diff --git a/qemu-img.texi b/qemu-img.texi index 174aae3..c81db3e 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -137,6 +137,12 @@ Parameters to convert subcommand: @item -n Skip the creation of the target volume +@item -m +Number of parallel coroutines for the convert process +@item -W +Allow out-of-order writes to the destination. This option improves performance, +but is only recommended for preallocated devices like host devices or other +raw block devices. @end table Parameters to dd subcommand: @@ -296,7 +302,7 @@ Error on reading data @end table -@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-m @var{num_coroutines}] [-W] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} Convert the disk image @var{filename} or a snapshot @var{snapshot_param}(@var{snapshot_id_or_name} is deprecated) to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c} @@ -326,6 +332,14 @@ skipped. This is useful for formats such as @code{rbd} if the target volume has already been created with site specific options that cannot be supplied through qemu-img. +Out of order writes can be enabled with @code{-W} to improve performance. +This is only recommended for preallocated devices like host devices or other +raw block devices. Out of order write does not work in combination with +creating compressed images. + +@var{num_coroutines} specifies how many coroutines work in parallel during +the convert process (defaults to 8). + @item dd [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output} Dd copies from @var{input} file to @var{output} file converting it from diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 7ac1576..2c48f9c 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -83,6 +83,29 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc, } return 0; } + + /* Request additional permissions if necessary for this command. The caller + * is responsible for restoring the original permissions afterwards if this + * is what it wants. */ + if (ct->perm && blk_is_available(blk)) { + uint64_t orig_perm, orig_shared_perm; + blk_get_perm(blk, &orig_perm, &orig_shared_perm); + + if (ct->perm & ~orig_perm) { + uint64_t new_perm; + Error *local_err = NULL; + int ret; + + new_perm = orig_perm | ct->perm; + + ret = blk_set_perm(blk, new_perm, orig_shared_perm, &local_err); + if (ret < 0) { + error_report_err(local_err); + return 0; + } + } + } + optind = 0; return ct->cfunc(blk, argc, argv); } @@ -918,6 +941,7 @@ static const cmdinfo_t write_cmd = { .name = "write", .altname = "w", .cfunc = write_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-bcCfquz] [-P pattern] off len", @@ -1093,6 +1117,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t writev_cmd = { .name = "writev", .cfunc = writev_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cfq] [-P pattern] off len [len..]", @@ -1392,6 +1417,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t aio_write_cmd = { .name = "aio_write", .cfunc = aio_write_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cfiquz] [-P pattern] off len [len..]", @@ -1556,6 +1582,7 @@ static const cmdinfo_t truncate_cmd = { .name = "truncate", .altname = "t", .cfunc = truncate_f, + .perm = BLK_PERM_WRITE | BLK_PERM_RESIZE, .argmin = 1, .argmax = 1, .args = "off", @@ -1653,6 +1680,7 @@ static const cmdinfo_t discard_cmd = { .name = "discard", .altname = "d", .cfunc = discard_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cq] off len", @@ -240,6 +240,7 @@ static void GCC_FMT_ATTR(2, 3) qtest_sendf(CharBackend *chr, va_start(ap, fmt); buffer = g_strdup_vprintf(fmt, ap); qtest_send(chr, buffer); + g_free(buffer); va_end(ap); } diff --git a/roms/openbios b/roms/openbios -Subproject ef8a14e8afb47635c9c5f7524a52c3251827e29 +Subproject 0cd97cc904e71fbb461112f6756934ec6af890b diff --git a/roms/seabios b/roms/seabios -Subproject 8891697e3f7d84355420573efd98e94f1473676 +Subproject 5f4c7b13cdf9c450eb55645f4362ea58fa61b79 diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 72cf1fb..6a370a8 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -75,7 +75,13 @@ for arch in $ARCHLIST; do continue fi - make -C "$linux" INSTALL_HDR_PATH="$tmpdir" SRCARCH=$arch headers_install + if [ "$arch" = x86 ]; then + arch_var=SRCARCH + else + arch_var=ARCH + fi + + make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install rm -rf "$output/linux-headers/asm-$arch" mkdir -p "$output/linux-headers/asm-$arch" @@ -92,6 +98,11 @@ for arch in $ARCHLIST; do cp_portable "$tmpdir/include/asm/kvm_virtio.h" "$output/include/standard-headers/asm-s390/" cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" fi + if [ $arch = arm ]; then + cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" + cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" + cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" + fi if [ $arch = x86 ]; then cp_portable "$tmpdir/include/asm/hyperv.h" "$output/include/standard-headers/asm-x86/" cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py index 14a27e7..bcef7ee 100755 --- a/scripts/vmstate-static-checker.py +++ b/scripts/vmstate-static-checker.py @@ -85,6 +85,11 @@ def check_fields_match(name, s_field, d_field): 'xio3130-express-upstream-port': ['br.dev', 'parent_obj.parent_obj', 'br.dev.exp.aer_log', 'parent_obj.parent_obj.exp.aer_log'], + 'spapr_pci': ['dma_liobn[0]', 'mig_liobn', + 'mem_win_addr', 'mig_mem_win_addr', + 'mem_win_size', 'mig_mem_win_size', + 'io_win_addr', 'mig_io_win_addr', + 'io_win_size', 'mig_io_win_size'], } if not name in changed_names: diff --git a/stubs/vmstate.c b/stubs/vmstate.c index bbe158f..6d52f29 100644 --- a/stubs/vmstate.c +++ b/stubs/vmstate.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "migration/vmstate.h" +#include "migration/migration.h" const VMStateDescription vmstate_dummy = {}; @@ -19,3 +20,8 @@ void vmstate_unregister(DeviceState *dev, void *opaque) { } + +int check_migratable(Object *obj, Error **err) +{ + return 0; +} diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 9e7b2df..25ceaab 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -521,6 +521,8 @@ typedef struct CPUARMState { void *nvic; const struct arm_boot_info *boot_info; + /* Store GICv3CPUState to access from this struct */ + void *gicv3state; } CPUARMState; /** diff --git a/target/i386/cpu-qom.h b/target/i386/cpu-qom.h index 8cd607e..c2205e6 100644 --- a/target/i386/cpu-qom.h +++ b/target/i386/cpu-qom.h @@ -48,7 +48,9 @@ typedef struct X86CPUDefinition X86CPUDefinition; * X86CPUClass: * @cpu_def: CPU model definition * @kvm_required: Whether CPU model requires KVM to be enabled. + * @ordering: Ordering on the "-cpu help" CPU model list. * @migration_safe: See CpuDefinitionInfo::migration_safe + * @static_model: See CpuDefinitionInfo::static * @parent_realize: The parent class' realize handler. * @parent_reset: The parent class' reset handler. * @@ -59,11 +61,15 @@ typedef struct X86CPUClass { CPUClass parent_class; /*< public >*/ - /* Should be eventually replaced by subclass-specific property defaults. */ + /* CPU definition, automatically loaded by instance_init if not NULL. + * Should be eventually replaced by subclass-specific property defaults. + */ X86CPUDefinition *cpu_def; bool kvm_required; + int ordering; bool migration_safe; + bool static_model; /* Optional description of CPU model. * If unavailable, cpu_def->model_id is used */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c index b6f157d..89421c8 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -29,10 +29,16 @@ #include "qemu/option.h" #include "qemu/config-file.h" #include "qapi/qmp/qerror.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qbool.h" +#include "qapi/qmp/qint.h" +#include "qapi/qmp/qfloat.h" #include "qapi-types.h" #include "qapi-visit.h" #include "qapi/visitor.h" +#include "qom/qom-qobject.h" #include "sysemu/arch_init.h" #if defined(CONFIG_KVM) @@ -1503,15 +1509,15 @@ void x86_cpu_change_kvm_default(const char *prop, const char *value) static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, bool migratable_only); -#ifdef CONFIG_KVM - static bool lmce_supported(void) { - uint64_t mce_cap; + uint64_t mce_cap = 0; +#ifdef CONFIG_KVM if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) { return false; } +#endif return !!(mce_cap & MCG_LMCE_P); } @@ -1531,51 +1537,28 @@ static int cpu_x86_fill_model_id(char *str) return 0; } -static X86CPUDefinition host_cpudef; - -static Property host_x86_cpu_properties[] = { +static Property max_x86_cpu_properties[] = { DEFINE_PROP_BOOL("migratable", X86CPU, migratable, true), DEFINE_PROP_BOOL("host-cache-info", X86CPU, cache_info_passthrough, false), DEFINE_PROP_END_OF_LIST() }; -/* class_init for the "host" CPU model - * - * This function may be called before KVM is initialized. - */ -static void host_x86_cpu_class_init(ObjectClass *oc, void *data) +static void max_x86_cpu_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); X86CPUClass *xcc = X86_CPU_CLASS(oc); - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - - xcc->kvm_required = true; - - host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx); - x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx); - - host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx); - host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); - host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12); - host_cpudef.stepping = eax & 0x0F; - cpu_x86_fill_model_id(host_cpudef.model_id); + xcc->ordering = 9; - xcc->cpu_def = &host_cpudef; xcc->model_description = - "KVM processor with all supported host features " - "(only available in KVM mode)"; - - /* level, xlevel, xlevel2, and the feature words are initialized on - * instance_init, because they require KVM to be initialized. - */ + "Enables all features supported by the accelerator in the current host"; - dc->props = host_x86_cpu_properties; - /* Reason: host_x86_cpu_initfn() dies when !kvm_enabled() */ - dc->cannot_destroy_with_object_finalize_yet = true; + dc->props = max_x86_cpu_properties; } -static void host_x86_cpu_initfn(Object *obj) +static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp); + +static void max_x86_cpu_initfn(Object *obj) { X86CPU *cpu = X86_CPU(obj); CPUX86State *env = &cpu->env; @@ -1584,10 +1567,24 @@ static void host_x86_cpu_initfn(Object *obj) /* We can't fill the features array here because we don't know yet if * "migratable" is true or false. */ - cpu->host_features = true; + cpu->max_features = true; - /* If KVM is disabled, x86_cpu_realizefn() will report an error later */ if (kvm_enabled()) { + X86CPUDefinition host_cpudef = { }; + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + + host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx); + x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx); + + host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx); + host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); + host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12); + host_cpudef.stepping = eax & 0x0F; + + cpu_x86_fill_model_id(host_cpudef.model_id); + + x86_cpu_load_def(cpu, &host_cpudef, &error_abort); + env->cpuid_min_level = kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX); env->cpuid_min_xlevel = @@ -1598,15 +1595,44 @@ static void host_x86_cpu_initfn(Object *obj) if (lmce_supported()) { object_property_set_bool(OBJECT(cpu), true, "lmce", &error_abort); } + } else { + object_property_set_str(OBJECT(cpu), CPUID_VENDOR_AMD, + "vendor", &error_abort); + object_property_set_int(OBJECT(cpu), 6, "family", &error_abort); + object_property_set_int(OBJECT(cpu), 6, "model", &error_abort); + object_property_set_int(OBJECT(cpu), 3, "stepping", &error_abort); + object_property_set_str(OBJECT(cpu), + "QEMU TCG CPU version " QEMU_HW_VERSION, + "model-id", &error_abort); } object_property_set_bool(OBJECT(cpu), true, "pmu", &error_abort); } +static const TypeInfo max_x86_cpu_type_info = { + .name = X86_CPU_TYPE_NAME("max"), + .parent = TYPE_X86_CPU, + .instance_init = max_x86_cpu_initfn, + .class_init = max_x86_cpu_class_init, +}; + +#ifdef CONFIG_KVM + +static void host_x86_cpu_class_init(ObjectClass *oc, void *data) +{ + X86CPUClass *xcc = X86_CPU_CLASS(oc); + + xcc->kvm_required = true; + xcc->ordering = 8; + + xcc->model_description = + "KVM processor with all supported host features " + "(only available in KVM mode)"; +} + static const TypeInfo host_x86_cpu_type_info = { .name = X86_CPU_TYPE_NAME("host"), - .parent = TYPE_X86_CPU, - .instance_init = host_x86_cpu_initfn, + .parent = X86_CPU_TYPE_NAME("max"), .class_init = host_x86_cpu_class_init, }; @@ -2060,7 +2086,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features, } } -static void x86_cpu_load_features(X86CPU *cpu, Error **errp); +static void x86_cpu_expand_features(X86CPU *cpu, Error **errp); static int x86_cpu_filter_features(X86CPU *cpu); /* Check for missing features that may prevent the CPU class from @@ -2083,9 +2109,9 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc, xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc)))); - x86_cpu_load_features(xc, &err); + x86_cpu_expand_features(xc, &err); if (err) { - /* Errors at x86_cpu_load_features should never happen, + /* Errors at x86_cpu_expand_features should never happen, * but in case it does, just report the model as not * runnable at all using the "type" property. */ @@ -2128,7 +2154,7 @@ static void listflags(FILE *f, fprintf_function print, const char **featureset) } } -/* Sort alphabetically by type name, listing kvm_required models last. */ +/* Sort alphabetically by type name, respecting X86CPUClass::ordering. */ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b) { ObjectClass *class_a = (ObjectClass *)a; @@ -2137,9 +2163,8 @@ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b) X86CPUClass *cc_b = X86_CPU_CLASS(class_b); const char *name_a, *name_b; - if (cc_a->kvm_required != cc_b->kvm_required) { - /* kvm_required items go last */ - return cc_a->kvm_required ? 1 : -1; + if (cc_a->ordering != cc_b->ordering) { + return cc_a->ordering - cc_b->ordering; } else { name_a = object_class_get_name(class_a); name_b = object_class_get_name(class_b); @@ -2161,7 +2186,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data) CPUListState *s = user_data; char *name = x86_cpu_class_get_model_name(cc); const char *desc = cc->model_description; - if (!desc) { + if (!desc && cc->cpu_def) { desc = cc->cpu_def->model_id; } @@ -2210,6 +2235,7 @@ static void x86_cpu_definition_entry(gpointer data, gpointer user_data) info->q_typename = g_strdup(object_class_get_name(oc)); info->migration_safe = cc->migration_safe; info->has_migration_safe = true; + info->q_static = cc->static_model; entry = g_malloc0(sizeof(*entry)); entry->value = info; @@ -2247,31 +2273,6 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, return r; } -/* - * Filters CPU feature words based on host availability of each feature. - * - * Returns: 0 if all flags are supported by the host, non-zero otherwise. - */ -static int x86_cpu_filter_features(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - FeatureWord w; - int rv = 0; - - for (w = 0; w < FEATURE_WORDS; w++) { - uint32_t host_feat = - x86_cpu_get_supported_feature_word(w, false); - uint32_t requested_features = env->features[w]; - env->features[w] &= host_feat; - cpu->filtered_features[w] = requested_features & ~env->features[w]; - if (cpu->filtered_features[w]) { - rv = 1; - } - } - - return rv; -} - static void x86_cpu_report_filtered_features(X86CPU *cpu) { FeatureWord w; @@ -2293,7 +2294,7 @@ static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props) } } -/* Load data from X86CPUDefinition +/* Load data from X86CPUDefinition into a X86CPU object */ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) { @@ -2302,6 +2303,11 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) char host_vendor[CPUID_VENDOR_SZ + 1]; FeatureWord w; + /*NOTE: any property set by this function should be returned by + * x86_cpu_static_props(), so static expansion of + * query-cpu-model-expansion is always complete. + */ + /* CPU models only set _minimum_ values for level/xlevel: */ object_property_set_int(OBJECT(cpu), def->level, "min-level", errp); object_property_set_int(OBJECT(cpu), def->xlevel, "min-xlevel", errp); @@ -2346,6 +2352,212 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) } +/* Return a QDict containing keys for all properties that can be included + * in static expansion of CPU models. All properties set by x86_cpu_load_def() + * must be included in the dictionary. + */ +static QDict *x86_cpu_static_props(void) +{ + FeatureWord w; + int i; + static const char *props[] = { + "min-level", + "min-xlevel", + "family", + "model", + "stepping", + "model-id", + "vendor", + "lmce", + NULL, + }; + static QDict *d; + + if (d) { + return d; + } + + d = qdict_new(); + for (i = 0; props[i]; i++) { + qdict_put_obj(d, props[i], qnull()); + } + + for (w = 0; w < FEATURE_WORDS; w++) { + FeatureWordInfo *fi = &feature_word_info[w]; + int bit; + for (bit = 0; bit < 32; bit++) { + if (!fi->feat_names[bit]) { + continue; + } + qdict_put_obj(d, fi->feat_names[bit], qnull()); + } + } + + return d; +} + +/* Add an entry to @props dict, with the value for property. */ +static void x86_cpu_expand_prop(X86CPU *cpu, QDict *props, const char *prop) +{ + QObject *value = object_property_get_qobject(OBJECT(cpu), prop, + &error_abort); + + qdict_put_obj(props, prop, value); +} + +/* Convert CPU model data from X86CPU object to a property dictionary + * that can recreate exactly the same CPU model. + */ +static void x86_cpu_to_dict(X86CPU *cpu, QDict *props) +{ + QDict *sprops = x86_cpu_static_props(); + const QDictEntry *e; + + for (e = qdict_first(sprops); e; e = qdict_next(sprops, e)) { + const char *prop = qdict_entry_key(e); + x86_cpu_expand_prop(cpu, props, prop); + } +} + +/* Convert CPU model data from X86CPU object to a property dictionary + * that can recreate exactly the same CPU model, including every + * writeable QOM property. + */ +static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props) +{ + ObjectPropertyIterator iter; + ObjectProperty *prop; + + object_property_iter_init(&iter, OBJECT(cpu)); + while ((prop = object_property_iter_next(&iter))) { + /* skip read-only or write-only properties */ + if (!prop->get || !prop->set) { + continue; + } + + /* "hotplugged" is the only property that is configurable + * on the command-line but will be set differently on CPUs + * created using "-cpu ... -smp ..." and by CPUs created + * on the fly by x86_cpu_from_model() for querying. Skip it. + */ + if (!strcmp(prop->name, "hotplugged")) { + continue; + } + x86_cpu_expand_prop(cpu, props, prop->name); + } +} + +static void object_apply_props(Object *obj, QDict *props, Error **errp) +{ + const QDictEntry *prop; + Error *err = NULL; + + for (prop = qdict_first(props); prop; prop = qdict_next(props, prop)) { + object_property_set_qobject(obj, qdict_entry_value(prop), + qdict_entry_key(prop), &err); + if (err) { + break; + } + } + + error_propagate(errp, err); +} + +/* Create X86CPU object according to model+props specification */ +static X86CPU *x86_cpu_from_model(const char *model, QDict *props, Error **errp) +{ + X86CPU *xc = NULL; + X86CPUClass *xcc; + Error *err = NULL; + + xcc = X86_CPU_CLASS(cpu_class_by_name(TYPE_X86_CPU, model)); + if (xcc == NULL) { + error_setg(&err, "CPU model '%s' not found", model); + goto out; + } + + xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc)))); + if (props) { + object_apply_props(OBJECT(xc), props, &err); + if (err) { + goto out; + } + } + + x86_cpu_expand_features(xc, &err); + if (err) { + goto out; + } + +out: + if (err) { + error_propagate(errp, err); + object_unref(OBJECT(xc)); + xc = NULL; + } + return xc; +} + +CpuModelExpansionInfo * +arch_query_cpu_model_expansion(CpuModelExpansionType type, + CpuModelInfo *model, + Error **errp) +{ + X86CPU *xc = NULL; + Error *err = NULL; + CpuModelExpansionInfo *ret = g_new0(CpuModelExpansionInfo, 1); + QDict *props = NULL; + const char *base_name; + + xc = x86_cpu_from_model(model->name, + model->has_props ? + qobject_to_qdict(model->props) : + NULL, &err); + if (err) { + goto out; + } + + props = qdict_new(); + + switch (type) { + case CPU_MODEL_EXPANSION_TYPE_STATIC: + /* Static expansion will be based on "base" only */ + base_name = "base"; + x86_cpu_to_dict(xc, props); + break; + case CPU_MODEL_EXPANSION_TYPE_FULL: + /* As we don't return every single property, full expansion needs + * to keep the original model name+props, and add extra + * properties on top of that. + */ + base_name = model->name; + x86_cpu_to_dict_full(xc, props); + break; + default: + error_setg(&err, "Unsupportted expansion type"); + goto out; + } + + if (!props) { + props = qdict_new(); + } + x86_cpu_to_dict(xc, props); + + ret->model = g_new0(CpuModelInfo, 1); + ret->model->name = g_strdup(base_name); + ret->model->props = QOBJECT(props); + ret->model->has_props = true; + +out: + object_unref(OBJECT(xc)); + if (err) { + error_propagate(errp, err); + qapi_free_CpuModelExpansionInfo(ret); + ret = NULL; + } + return ret; +} + X86CPU *cpu_x86_init(const char *cpu_model) { return X86_CPU(cpu_generic_init(TYPE_X86_CPU, cpu_model)); @@ -3095,20 +3307,59 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) env->features[FEAT_XSAVE_COMP_HI] = mask >> 32; } -/* Load CPUID data based on configured features */ -static void x86_cpu_load_features(X86CPU *cpu, Error **errp) +/***** Steps involved on loading and filtering CPUID data + * + * When initializing and realizing a CPU object, the steps + * involved in setting up CPUID data are: + * + * 1) Loading CPU model definition (X86CPUDefinition). This is + * implemented by x86_cpu_load_def() and should be completely + * transparent, as it is done automatically by instance_init. + * No code should need to look at X86CPUDefinition structs + * outside instance_init. + * + * 2) CPU expansion. This is done by realize before CPUID + * filtering, and will make sure host/accelerator data is + * loaded for CPU models that depend on host capabilities + * (e.g. "host"). Done by x86_cpu_expand_features(). + * + * 3) CPUID filtering. This initializes extra data related to + * CPUID, and checks if the host supports all capabilities + * required by the CPU. Runnability of a CPU model is + * determined at this step. Done by x86_cpu_filter_features(). + * + * Some operations don't require all steps to be performed. + * More precisely: + * + * - CPU instance creation (instance_init) will run only CPU + * model loading. CPU expansion can't run at instance_init-time + * because host/accelerator data may be not available yet. + * - CPU realization will perform both CPU model expansion and CPUID + * filtering, and return an error in case one of them fails. + * - query-cpu-definitions needs to run all 3 steps. It needs + * to run CPUID filtering, as the 'unavailable-features' + * field is set based on the filtering results. + * - The query-cpu-model-expansion QMP command only needs to run + * CPU model loading and CPU expansion. It should not filter + * any CPUID data based on host capabilities. + */ + +/* Expand CPU configuration data, based on configured features + * and host/accelerator capabilities when appropriate. + */ +static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) { CPUX86State *env = &cpu->env; FeatureWord w; GList *l; Error *local_err = NULL; - /*TODO: cpu->host_features incorrectly overwrites features + /*TODO: cpu->max_features incorrectly overwrites features * set using "feat=on|off". Once we fix this, we can convert * plus_features & minus_features to global properties * inside x86_cpu_parse_featurestr() too. */ - if (cpu->host_features) { + if (cpu->max_features) { for (w = 0; w < FEATURE_WORDS; w++) { env->features[w] = x86_cpu_get_supported_feature_word(w, cpu->migratable); @@ -3173,6 +3424,32 @@ out: } } +/* + * Finishes initialization of CPUID data, filters CPU feature + * words based on host availability of each feature. + * + * Returns: 0 if all flags are supported by the host, non-zero otherwise. + */ +static int x86_cpu_filter_features(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + FeatureWord w; + int rv = 0; + + for (w = 0; w < FEATURE_WORDS; w++) { + uint32_t host_feat = + x86_cpu_get_supported_feature_word(w, false); + uint32_t requested_features = env->features[w]; + env->features[w] &= host_feat; + cpu->filtered_features[w] = requested_features & ~env->features[w]; + if (cpu->filtered_features[w]) { + rv = 1; + } + } + + return rv; +} + #define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \ (env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \ (env)->cpuid_vendor3 == CPUID_VENDOR_INTEL_3) @@ -3200,7 +3477,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) return; } - x86_cpu_load_features(cpu, &local_err); + x86_cpu_expand_features(cpu, &local_err); if (local_err) { goto out; } @@ -3619,7 +3896,9 @@ static void x86_cpu_initfn(Object *obj) object_property_add_alias(obj, "sse4_1", obj, "sse4.1", &error_abort); object_property_add_alias(obj, "sse4_2", obj, "sse4.2", &error_abort); - x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort); + if (xcc->cpu_def) { + x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort); + } } static int64_t x86_cpu_get_arch_id(CPUState *cs) @@ -3774,6 +4053,24 @@ static const TypeInfo x86_cpu_type_info = { .class_init = x86_cpu_common_class_init, }; + +/* "base" CPU model, used by query-cpu-model-expansion */ +static void x86_cpu_base_class_init(ObjectClass *oc, void *data) +{ + X86CPUClass *xcc = X86_CPU_CLASS(oc); + + xcc->static_model = true; + xcc->migration_safe = true; + xcc->model_description = "base CPU model type with no features enabled"; + xcc->ordering = 8; +} + +static const TypeInfo x86_base_cpu_type_info = { + .name = X86_CPU_TYPE_NAME("base"), + .parent = TYPE_X86_CPU, + .class_init = x86_cpu_base_class_init, +}; + static void x86_cpu_register_types(void) { int i; @@ -3782,6 +4079,8 @@ static void x86_cpu_register_types(void) for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) { x86_register_cpudef_type(&builtin_x86_defs[i]); } + type_register_static(&max_x86_cpu_type_info); + type_register_static(&x86_base_cpu_type_info); #ifdef CONFIG_KVM type_register_static(&host_x86_cpu_type_info); #endif diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 573f2aa..12a39d5 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1211,7 +1211,7 @@ struct X86CPU { bool enforce_cpuid; bool expose_kvm; bool migratable; - bool host_features; + bool max_features; /* Enable all supported features automatically */ uint32_t apic_id; /* Enables publishing of TSC increment and Local APIC bus frequencies to diff --git a/target/ppc/Makefile.objs b/target/ppc/Makefile.objs index a8c7a30..0057b31 100644 --- a/target/ppc/Makefile.objs +++ b/target/ppc/Makefile.objs @@ -1,8 +1,9 @@ obj-y += cpu-models.o +obj-y += cpu.o obj-y += translate.o ifeq ($(CONFIG_SOFTMMU),y) -obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o -obj-$(TARGET_PPC64) += mmu-hash64.o arch_dump.o compat.o +obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o arch_dump.o +obj-$(TARGET_PPC64) += mmu-hash64.o compat.o endif obj-$(CONFIG_KVM) += kvm.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c index 40282a1..28d9cc7 100644 --- a/target/ppc/arch_dump.c +++ b/target/ppc/arch_dump.c @@ -1,5 +1,5 @@ /* - * writing ELF notes for ppc64 arch + * writing ELF notes for ppc{64,} arch * * * Copyright IBM, Corp. 2013 @@ -19,36 +19,48 @@ #include "sysemu/dump.h" #include "sysemu/kvm.h" -struct PPC64UserRegStruct { - uint64_t gpr[32]; - uint64_t nip; - uint64_t msr; - uint64_t orig_gpr3; - uint64_t ctr; - uint64_t link; - uint64_t xer; - uint64_t ccr; - uint64_t softe; - uint64_t trap; - uint64_t dar; - uint64_t dsisr; - uint64_t result; +#ifdef TARGET_PPC64 +#define ELFCLASS ELFCLASS64 +#define cpu_to_dump_reg cpu_to_dump64 +typedef uint64_t reg_t; +typedef Elf64_Nhdr Elf_Nhdr; +#else +#define ELFCLASS ELFCLASS32 +#define cpu_to_dump_reg cpu_to_dump32 +typedef uint32_t reg_t; +typedef Elf32_Nhdr Elf_Nhdr; +#endif /* TARGET_PPC64 */ + +struct PPCUserRegStruct { + reg_t gpr[32]; + reg_t nip; + reg_t msr; + reg_t orig_gpr3; + reg_t ctr; + reg_t link; + reg_t xer; + reg_t ccr; + reg_t softe; + reg_t trap; + reg_t dar; + reg_t dsisr; + reg_t result; } QEMU_PACKED; -struct PPC64ElfPrstatus { +struct PPCElfPrstatus { char pad1[112]; - struct PPC64UserRegStruct pr_reg; - uint64_t pad2[4]; + struct PPCUserRegStruct pr_reg; + reg_t pad2[4]; } QEMU_PACKED; -struct PPC64ElfFpregset { +struct PPCElfFpregset { uint64_t fpr[32]; - uint64_t fpscr; + reg_t fpscr; } QEMU_PACKED; -struct PPC64ElfVmxregset { +struct PPCElfVmxregset { ppc_avr_t avr[32]; ppc_avr_t vscr; union { @@ -57,26 +69,26 @@ struct PPC64ElfVmxregset { } vrsave; } QEMU_PACKED; -struct PPC64ElfVsxregset { +struct PPCElfVsxregset { uint64_t vsr[32]; } QEMU_PACKED; -struct PPC64ElfSperegset { +struct PPCElfSperegset { uint32_t evr[32]; uint64_t spe_acc; uint32_t spe_fscr; } QEMU_PACKED; typedef struct noteStruct { - Elf64_Nhdr hdr; + Elf_Nhdr hdr; char name[5]; char pad3[3]; union { - struct PPC64ElfPrstatus prstatus; - struct PPC64ElfFpregset fpregset; - struct PPC64ElfVmxregset vmxregset; - struct PPC64ElfVsxregset vsxregset; - struct PPC64ElfSperegset speregset; + struct PPCElfPrstatus prstatus; + struct PPCElfFpregset fpregset; + struct PPCElfVmxregset vmxregset; + struct PPCElfVsxregset vsxregset; + struct PPCElfSperegset speregset; } contents; } QEMU_PACKED Note; @@ -85,12 +97,12 @@ typedef struct NoteFuncArg { DumpState *state; } NoteFuncArg; -static void ppc64_write_elf64_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu) +static void ppc_write_elf_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu) { int i; - uint64_t cr; - struct PPC64ElfPrstatus *prstatus; - struct PPC64UserRegStruct *reg; + reg_t cr; + struct PPCElfPrstatus *prstatus; + struct PPCUserRegStruct *reg; Note *note = &arg->note; DumpState *s = arg->state; @@ -101,25 +113,25 @@ static void ppc64_write_elf64_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu) reg = &prstatus->pr_reg; for (i = 0; i < 32; i++) { - reg->gpr[i] = cpu_to_dump64(s, cpu->env.gpr[i]); + reg->gpr[i] = cpu_to_dump_reg(s, cpu->env.gpr[i]); } - reg->nip = cpu_to_dump64(s, cpu->env.nip); - reg->msr = cpu_to_dump64(s, cpu->env.msr); - reg->ctr = cpu_to_dump64(s, cpu->env.ctr); - reg->link = cpu_to_dump64(s, cpu->env.lr); - reg->xer = cpu_to_dump64(s, cpu_read_xer(&cpu->env)); + reg->nip = cpu_to_dump_reg(s, cpu->env.nip); + reg->msr = cpu_to_dump_reg(s, cpu->env.msr); + reg->ctr = cpu_to_dump_reg(s, cpu->env.ctr); + reg->link = cpu_to_dump_reg(s, cpu->env.lr); + reg->xer = cpu_to_dump_reg(s, cpu_read_xer(&cpu->env)); cr = 0; for (i = 0; i < 8; i++) { cr |= (cpu->env.crf[i] & 15) << (4 * (7 - i)); } - reg->ccr = cpu_to_dump64(s, cr); + reg->ccr = cpu_to_dump_reg(s, cr); } -static void ppc64_write_elf64_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu) +static void ppc_write_elf_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu) { int i; - struct PPC64ElfFpregset *fpregset; + struct PPCElfFpregset *fpregset; Note *note = &arg->note; DumpState *s = arg->state; @@ -131,13 +143,13 @@ static void ppc64_write_elf64_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu) for (i = 0; i < 32; i++) { fpregset->fpr[i] = cpu_to_dump64(s, cpu->env.fpr[i]); } - fpregset->fpscr = cpu_to_dump64(s, cpu->env.fpscr); + fpregset->fpscr = cpu_to_dump_reg(s, cpu->env.fpscr); } -static void ppc64_write_elf64_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu) +static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu) { int i; - struct PPC64ElfVmxregset *vmxregset; + struct PPCElfVmxregset *vmxregset; Note *note = &arg->note; DumpState *s = arg->state; @@ -164,10 +176,11 @@ static void ppc64_write_elf64_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu) } vmxregset->vscr.u32[3] = cpu_to_dump32(s, cpu->env.vscr); } -static void ppc64_write_elf64_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu) + +static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu) { int i; - struct PPC64ElfVsxregset *vsxregset; + struct PPCElfVsxregset *vsxregset; Note *note = &arg->note; DumpState *s = arg->state; @@ -179,9 +192,10 @@ static void ppc64_write_elf64_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu) vsxregset->vsr[i] = cpu_to_dump64(s, cpu->env.vsr[i]); } } -static void ppc64_write_elf64_speregset(NoteFuncArg *arg, PowerPCCPU *cpu) + +static void ppc_write_elf_speregset(NoteFuncArg *arg, PowerPCCPU *cpu) { - struct PPC64ElfSperegset *speregset; + struct PPCElfSperegset *speregset; Note *note = &arg->note; DumpState *s = arg->state; @@ -197,11 +211,11 @@ static const struct NoteFuncDescStruct { int contents_size; void (*note_contents_func)(NoteFuncArg *arg, PowerPCCPU *cpu); } note_func[] = { - {sizeof(((Note *)0)->contents.prstatus), ppc64_write_elf64_prstatus}, - {sizeof(((Note *)0)->contents.fpregset), ppc64_write_elf64_fpregset}, - {sizeof(((Note *)0)->contents.vmxregset), ppc64_write_elf64_vmxregset}, - {sizeof(((Note *)0)->contents.vsxregset), ppc64_write_elf64_vsxregset}, - {sizeof(((Note *)0)->contents.speregset), ppc64_write_elf64_speregset}, + {sizeof(((Note *)0)->contents.prstatus), ppc_write_elf_prstatus}, + {sizeof(((Note *)0)->contents.fpregset), ppc_write_elf_fpregset}, + {sizeof(((Note *)0)->contents.vmxregset), ppc_write_elf_vmxregset}, + {sizeof(((Note *)0)->contents.vsxregset), ppc_write_elf_vsxregset}, + {sizeof(((Note *)0)->contents.speregset), ppc_write_elf_speregset}, { 0, NULL} }; @@ -213,8 +227,9 @@ int cpu_get_dump_info(ArchDumpInfo *info, PowerPCCPU *cpu = POWERPC_CPU(first_cpu); PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); - info->d_machine = EM_PPC64; - info->d_class = ELFCLASS64; + info->d_machine = PPC_ELF_MACHINE; + info->d_class = ELFCLASS; + if ((*pcc->interrupts_big_endian)(cpu)) { info->d_endian = ELFDATA2MSB; } else { @@ -236,25 +251,19 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) int note_head_size; const NoteFuncDesc *nf; - if (class != ELFCLASS64) { - return -1; - } - assert(machine == EM_PPC64); - - note_head_size = sizeof(Elf64_Nhdr); - + note_head_size = sizeof(Elf_Nhdr); for (nf = note_func; nf->note_contents_func; nf++) { elf_note_size = elf_note_size + note_head_size + name_size + - nf->contents_size; + nf->contents_size; } return (elf_note_size) * nr_cpus; } -static int ppc64_write_all_elf64_notes(const char *note_name, - WriteCoreDumpFunction f, - PowerPCCPU *cpu, int id, - void *opaque) +static int ppc_write_all_elf_notes(const char *note_name, + WriteCoreDumpFunction f, + PowerPCCPU *cpu, int id, + void *opaque) { NoteFuncArg arg = { .state = opaque }; int ret = -1; @@ -282,5 +291,12 @@ int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, int cpuid, void *opaque) { PowerPCCPU *cpu = POWERPC_CPU(cs); - return ppc64_write_all_elf64_notes("CORE", f, cpu, cpuid, opaque); + return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque); +} + +int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, + int cpuid, void *opaque) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque); } diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c new file mode 100644 index 0000000..2801166 --- /dev/null +++ b/target/ppc/cpu.c @@ -0,0 +1,47 @@ +/* + * PowerPC CPU routines for qemu. + * + * Copyright (c) 2017 Nikunj A Dadhania, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "cpu-models.h" + +target_ulong cpu_read_xer(CPUPPCState *env) +{ + if (is_isa300(env)) { + return env->xer | (env->so << XER_SO) | + (env->ov << XER_OV) | (env->ca << XER_CA) | + (env->ov32 << XER_OV32) | (env->ca32 << XER_CA32); + } + + return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) | + (env->ca << XER_CA); +} + +void cpu_write_xer(CPUPPCState *env, target_ulong xer) +{ + env->so = (xer >> XER_SO) & 1; + env->ov = (xer >> XER_OV) & 1; + env->ca = (xer >> XER_CA) & 1; + /* write all the flags, while reading back check of isa300 */ + env->ov32 = (xer >> XER_OV32) & 1; + env->ca32 = (xer >> XER_CA32) & 1; + env->xer = xer & ~((1ul << XER_SO) | + (1ul << XER_OV) | (1ul << XER_CA) | + (1ul << XER_OV32) | (1ul << XER_CA32)); +} diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 425e79d..d33c17e 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -223,11 +223,12 @@ enum { typedef struct opc_handler_t opc_handler_t; /*****************************************************************************/ -/* Types used to describe some PowerPC registers */ +/* Types used to describe some PowerPC registers etc. */ typedef struct DisasContext DisasContext; typedef struct ppc_spr_t ppc_spr_t; typedef union ppc_avr_t ppc_avr_t; typedef union ppc_tlb_t ppc_tlb_t; +typedef struct ppc_hash_pte64 ppc_hash_pte64_t; /* SPR access micro-ops generations callbacks */ struct ppc_spr_t { @@ -305,14 +306,6 @@ union ppc_tlb_t { #define TLB_MAS 3 #endif -#define SDR_32_HTABORG 0xFFFF0000UL -#define SDR_32_HTABMASK 0x000001FFUL - -#if defined(TARGET_PPC64) -#define SDR_64_HTABORG 0xFFFFFFFFFFFC0000ULL -#define SDR_64_HTABSIZE 0x000000000000001FULL -#endif /* defined(TARGET_PPC64 */ - typedef struct ppc_slb_t ppc_slb_t; struct ppc_slb_t { uint64_t esid; @@ -965,6 +958,8 @@ struct CPUPPCState { target_ulong so; target_ulong ov; target_ulong ca; + target_ulong ov32; + target_ulong ca32; /* Reservation address */ target_ulong reserve_addr; /* Reservation value */ @@ -1005,12 +1000,7 @@ struct CPUPPCState { /* tcg TLB needs flush (deferred slb inval instruction typically) */ #endif /* segment registers */ - hwaddr htab_base; - /* mask used to normalize hash value to PTEG index */ - hwaddr htab_mask; target_ulong sr[32]; - /* externally stored hash table */ - uint8_t *external_htab; /* BATs */ uint32_t nb_BATs; target_ulong DBAT[2][8]; @@ -1218,6 +1208,14 @@ struct PPCVirtualHypervisor { struct PPCVirtualHypervisorClass { InterfaceClass parent; void (*hypercall)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu); + hwaddr (*hpt_mask)(PPCVirtualHypervisor *vhyp); + const ppc_hash_pte64_t *(*map_hptes)(PPCVirtualHypervisor *vhyp, + hwaddr ptex, int n); + void (*unmap_hptes)(PPCVirtualHypervisor *vhyp, + const ppc_hash_pte64_t *hptes, + hwaddr ptex, int n); + void (*store_hpte)(PPCVirtualHypervisor *vhyp, hwaddr ptex, + uint64_t pte0, uint64_t pte1); }; #define TYPE_PPC_VIRTUAL_HYPERVISOR "ppc-virtual-hypervisor" @@ -1243,6 +1241,8 @@ int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg); int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, int cpuid, void *opaque); +int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, + int cpuid, void *opaque); #ifndef CONFIG_USER_ONLY void ppc_cpu_do_system_reset(CPUState *cs); extern const struct VMStateDescription vmstate_ppc_cpu; @@ -1300,8 +1300,7 @@ void store_booke_tcr (CPUPPCState *env, target_ulong val); void store_booke_tsr (CPUPPCState *env, target_ulong val); void ppc_tlb_invalidate_all (CPUPPCState *env); void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr); -void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp); -void cpu_ppc_set_papr(PowerPCCPU *cpu); +void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp); #endif #endif @@ -1372,11 +1371,15 @@ int ppc_compat_max_threads(PowerPCCPU *cpu); #define XER_SO 31 #define XER_OV 30 #define XER_CA 29 +#define XER_OV32 19 +#define XER_CA32 18 #define XER_CMP 8 #define XER_BC 0 #define xer_so (env->so) #define xer_ov (env->ov) #define xer_ca (env->ca) +#define xer_ov32 (env->ov) +#define xer_ca32 (env->ca) #define xer_cmp ((env->xer >> XER_CMP) & 0xFF) #define xer_bc ((env->xer >> XER_BC) & 0x7F) @@ -2343,18 +2346,9 @@ enum { /*****************************************************************************/ -static inline target_ulong cpu_read_xer(CPUPPCState *env) -{ - return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) | (env->ca << XER_CA); -} - -static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer) -{ - env->so = (xer >> XER_SO) & 1; - env->ov = (xer >> XER_OV) & 1; - env->ca = (xer >> XER_CA) & 1; - env->xer = xer & ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA)); -} +#define is_isa300(ctx) (!!(ctx->insns_flags2 & PPC2_ISA300)) +target_ulong cpu_read_xer(CPUPPCState *env); +void cpu_write_xer(CPUPPCState *env, target_ulong xer); static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc, target_ulong *cs_base, uint32_t *flags) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index dd0a892..da4e1a6 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -28,6 +28,15 @@ /*****************************************************************************/ /* Fixed point operations helpers */ +static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) +{ + if (unlikely(ov)) { + env->so = env->ov = 1; + } else { + env->ov = 0; + } +} + target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, uint32_t oe) { @@ -49,11 +58,7 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, } if (oe) { - if (unlikely(overflow)) { - env->so = env->ov = 1; - } else { - env->ov = 0; - } + helper_update_ov_legacy(env, overflow); } return (target_ulong)rt; @@ -81,11 +86,7 @@ target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, } if (oe) { - if (unlikely(overflow)) { - env->so = env->ov = 1; - } else { - env->ov = 0; - } + helper_update_ov_legacy(env, overflow); } return (target_ulong)rt; @@ -105,11 +106,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) } if (oe) { - if (unlikely(overflow)) { - env->so = env->ov = 1; - } else { - env->ov = 0; - } + helper_update_ov_legacy(env, overflow); } return rt; @@ -127,12 +124,7 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) } if (oe) { - - if (unlikely(overflow)) { - env->so = env->ov = 1; - } else { - env->ov = 0; - } + helper_update_ov_legacy(env, overflow); } return rt; diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 52bbea5..acc40ec 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -1251,7 +1251,7 @@ static int kvmppc_get_books_sregs(PowerPCCPU *cpu) return ret; } - if (!env->external_htab) { + if (!cpu->vhyp) { ppc_store_sdr1(env, sregs.u.s.sdr1); } @@ -2596,89 +2596,85 @@ void kvm_arch_init_irq_routing(KVMState *s) { } -struct kvm_get_htab_buf { - struct kvm_get_htab_header header; - /* - * We require one extra byte for read - */ - target_ulong hpte[(HPTES_PER_GROUP * 2) + 1]; -}; - -uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index) +void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) { - int htab_fd; - struct kvm_get_htab_fd ghf; - struct kvm_get_htab_buf *hpte_buf; + struct kvm_get_htab_fd ghf = { + .flags = 0, + .start_index = ptex, + }; + int fd, rc; + int i; - ghf.flags = 0; - ghf.start_index = pte_index; - htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); - if (htab_fd < 0) { - goto error_out; + fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); + if (fd < 0) { + hw_error("kvmppc_read_hptes: Unable to open HPT fd"); } - hpte_buf = g_malloc0(sizeof(*hpte_buf)); - /* - * Read the hpte group - */ - if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) { - goto out_close; - } + i = 0; + while (i < n) { + struct kvm_get_htab_header *hdr; + int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; + char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; - close(htab_fd); - return (uint64_t)(uintptr_t) hpte_buf->hpte; + rc = read(fd, buf, sizeof(buf)); + if (rc < 0) { + hw_error("kvmppc_read_hptes: Unable to read HPTEs"); + } -out_close: - g_free(hpte_buf); - close(htab_fd); -error_out: - return 0; -} + hdr = (struct kvm_get_htab_header *)buf; + while ((i < n) && ((char *)hdr < (buf + rc))) { + int invalid = hdr->n_invalid; -void kvmppc_hash64_free_pteg(uint64_t token) -{ - struct kvm_get_htab_buf *htab_buf; + if (hdr->index != (ptex + i)) { + hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 + " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); + } + + memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid); + i += hdr->n_valid; - htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf, - hpte); - g_free(htab_buf); - return; + if ((n - i) < invalid) { + invalid = n - i; + } + memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); + i += hdr->n_invalid; + + hdr = (struct kvm_get_htab_header *) + ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); + } + } + + close(fd); } -void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index, - target_ulong pte0, target_ulong pte1) +void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) { - int htab_fd; + int fd, rc; struct kvm_get_htab_fd ghf; - struct kvm_get_htab_buf hpte_buf; + struct { + struct kvm_get_htab_header hdr; + uint64_t pte0; + uint64_t pte1; + } buf; ghf.flags = 0; ghf.start_index = 0; /* Ignored */ - htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); - if (htab_fd < 0) { - goto error_out; - } - - hpte_buf.header.n_valid = 1; - hpte_buf.header.n_invalid = 0; - hpte_buf.header.index = pte_index; - hpte_buf.hpte[0] = pte0; - hpte_buf.hpte[1] = pte1; - /* - * Write the hpte entry. - * CAUTION: write() has the warn_unused_result attribute. Hence we - * need to check the return value, even though we do nothing. - */ - if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) { - goto out_close; + fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); + if (fd < 0) { + hw_error("kvmppc_write_hpte: Unable to open HPT fd"); } -out_close: - close(htab_fd); - return; + buf.hdr.n_valid = 1; + buf.hdr.n_invalid = 0; + buf.hdr.index = ptex; + buf.pte0 = cpu_to_be64(pte0); + buf.pte1 = cpu_to_be64(pte1); -error_out: - return; + rc = write(fd, &buf, sizeof(buf)); + if (rc != sizeof(buf)) { + hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); + } + close(fd); } int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index 8da2ee4..8e9f42d 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -49,11 +49,8 @@ int kvmppc_get_htab_fd(bool write); int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns); int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, uint16_t n_valid, uint16_t n_invalid); -uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index); -void kvmppc_hash64_free_pteg(uint64_t token); - -void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index, - target_ulong pte0, target_ulong pte1); +void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n); +void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1); bool kvmppc_has_cap_fixup_hcalls(void); bool kvmppc_has_cap_htm(void); int kvmppc_enable_hwrng(void); @@ -234,20 +231,13 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, abort(); } -static inline uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, - target_ulong pte_index) -{ - abort(); -} - -static inline void kvmppc_hash64_free_pteg(uint64_t token) +static inline void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, + hwaddr ptex, int n) { abort(); } -static inline void kvmppc_hash64_write_pte(CPUPPCState *env, - target_ulong pte_index, - target_ulong pte0, target_ulong pte1) +static inline void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) { abort(); } diff --git a/target/ppc/machine.c b/target/ppc/machine.c index df9f7a4..6cb3a48 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -76,7 +76,7 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id) qemu_get_betls(f, &env->pb[i]); for (i = 0; i < 1024; i++) qemu_get_betls(f, &env->spr[i]); - if (!env->external_htab) { + if (!cpu->vhyp) { ppc_store_sdr1(env, sdr1); } qemu_get_be32s(f, &env->vscr); @@ -228,8 +228,7 @@ static int cpu_post_load(void *opaque, int version_id) env->IBAT[1][i+4] = env->spr[SPR_IBAT4U + 2*i + 1]; } - if (!env->external_htab) { - /* Restore htab_base and htab_mask variables */ + if (!cpu->vhyp) { ppc_store_sdr1(env, env->spr[SPR_SDR1]); } diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index ab432ba..fa573dd 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -82,11 +82,9 @@ void helper_store_sdr1(CPUPPCState *env, target_ulong val) { PowerPCCPU *cpu = ppc_env_get_cpu(env); - if (!env->external_htab) { - if (env->spr[SPR_SDR1] != val) { - ppc_store_sdr1(env, val); - tlb_flush(CPU(cpu)); - } + if (env->spr[SPR_SDR1] != val) { + ppc_store_sdr1(env, val); + tlb_flush(CPU(cpu)); } } diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c index 29bace6..03ae3c1 100644 --- a/target/ppc/mmu-hash32.c +++ b/target/ppc/mmu-hash32.c @@ -304,9 +304,9 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr, hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash) { - CPUPPCState *env = &cpu->env; + target_ulong mask = ppc_hash32_hpt_mask(cpu); - return (hash * HASH_PTEG_SIZE_32) & env->htab_mask; + return (hash * HASH_PTEG_SIZE_32) & mask; } static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off, @@ -339,7 +339,6 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu, target_ulong sr, target_ulong eaddr, ppc_hash_pte32_t *pte) { - CPUPPCState *env = &cpu->env; hwaddr pteg_off, pte_offset; hwaddr hash; uint32_t vsid, pgidx, ptem; @@ -353,21 +352,22 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu, qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx " hash " TARGET_FMT_plx "\n", - env->htab_base, env->htab_mask, hash); + ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash); /* Primary PTEG lookup */ qemu_log_mask(CPU_LOG_MMU, "0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx " vsid=%" PRIx32 " ptem=%" PRIx32 " hash=" TARGET_FMT_plx "\n", - env->htab_base, env->htab_mask, vsid, ptem, hash); + ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), + vsid, ptem, hash); pteg_off = get_pteg_offset32(cpu, hash); pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 0, ptem, pte); if (pte_offset == -1) { /* Secondary PTEG lookup */ qemu_log_mask(CPU_LOG_MMU, "1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx " vsid=%" PRIx32 " api=%" PRIx32 - " hash=" TARGET_FMT_plx "\n", env->htab_base, - env->htab_mask, vsid, ptem, ~hash); + " hash=" TARGET_FMT_plx "\n", ppc_hash32_hpt_base(cpu), + ppc_hash32_hpt_mask(cpu), vsid, ptem, ~hash); pteg_off = get_pteg_offset32(cpu, ~hash); pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 1, ptem, pte); } diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h index 5b9fb08..898021f 100644 --- a/target/ppc/mmu-hash32.h +++ b/target/ppc/mmu-hash32.h @@ -44,6 +44,8 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw, /* * Hash page table definitions */ +#define SDR_32_HTABORG 0xFFFF0000UL +#define SDR_32_HTABMASK 0x000001FFUL #define HPTES_PER_GROUP 8 #define HASH_PTE_SIZE_32 8 @@ -65,42 +67,46 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw, #define HPTE32_R_WIMG 0x00000078 #define HPTE32_R_PP 0x00000003 +static inline hwaddr ppc_hash32_hpt_base(PowerPCCPU *cpu) +{ + return cpu->env.spr[SPR_SDR1] & SDR_32_HTABORG; +} + +static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu) +{ + return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0xFFFF; +} + static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu, hwaddr pte_offset) { - CPUPPCState *env = &cpu->env; + target_ulong base = ppc_hash32_hpt_base(cpu); - assert(!env->external_htab); /* Not supported on 32-bit for now */ - return ldl_phys(CPU(cpu)->as, env->htab_base + pte_offset); + return ldl_phys(CPU(cpu)->as, base + pte_offset); } static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu, hwaddr pte_offset) { - CPUPPCState *env = &cpu->env; + target_ulong base = ppc_hash32_hpt_base(cpu); - assert(!env->external_htab); /* Not supported on 32-bit for now */ - return ldl_phys(CPU(cpu)->as, - env->htab_base + pte_offset + HASH_PTE_SIZE_32 / 2); + return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2); } static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu, hwaddr pte_offset, target_ulong pte0) { - CPUPPCState *env = &cpu->env; + target_ulong base = ppc_hash32_hpt_base(cpu); - assert(!env->external_htab); /* Not supported on 32-bit for now */ - stl_phys(CPU(cpu)->as, env->htab_base + pte_offset, pte0); + stl_phys(CPU(cpu)->as, base + pte_offset, pte0); } static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu, hwaddr pte_offset, target_ulong pte1) { - CPUPPCState *env = &cpu->env; + target_ulong base = ppc_hash32_hpt_base(cpu); - assert(!env->external_htab); /* Not supported on 32-bit for now */ - stl_phys(CPU(cpu)->as, - env->htab_base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1); + stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1); } typedef struct { diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index 76669ed..d44f2bb 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -27,6 +27,7 @@ #include "kvm_ppc.h" #include "mmu-hash64.h" #include "exec/log.h" +#include "hw/hw.h" //#define DEBUG_SLB @@ -37,12 +38,6 @@ #endif /* - * Used to indicate that a CPU has its hash page table (HPT) managed - * within the host kernel - */ -#define MMU_HASH64_KVM_MANAGED_HPT ((void *)-1) - -/* * SLB handling */ @@ -294,55 +289,6 @@ target_ulong helper_load_slb_vsid(CPUPPCState *env, target_ulong rb) return rt; } -/* - * 64-bit hash table MMU handling - */ -void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value, - Error **errp) -{ - CPUPPCState *env = &cpu->env; - target_ulong htabsize = value & SDR_64_HTABSIZE; - - env->spr[SPR_SDR1] = value; - if (htabsize > 28) { - error_setg(errp, - "Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1", - htabsize); - htabsize = 28; - } - env->htab_mask = (1ULL << (htabsize + 18 - 7)) - 1; - env->htab_base = value & SDR_64_HTABORG; -} - -void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift, - Error **errp) -{ - CPUPPCState *env = &cpu->env; - Error *local_err = NULL; - - if (hpt) { - env->external_htab = hpt; - } else { - env->external_htab = MMU_HASH64_KVM_MANAGED_HPT; - } - ppc_hash64_set_sdr1(cpu, (target_ulong)(uintptr_t)hpt | (shift - 18), - &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - - /* Not strictly necessary, but makes it clearer that an external - * htab is in use when debugging */ - env->htab_base = -1; - - if (kvm_enabled()) { - if (kvmppc_put_books_sregs(cpu) < 0) { - error_setg(errp, "Unable to update SDR1 in KVM"); - } - } -} - static int ppc_hash64_pte_prot(PowerPCCPU *cpu, ppc_slb_t *slb, ppc_hash_pte64_t pte) { @@ -431,34 +377,43 @@ static int ppc_hash64_amr_prot(PowerPCCPU *cpu, ppc_hash_pte64_t pte) return prot; } -uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index) +const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, + hwaddr ptex, int n) { - uint64_t token = 0; - hwaddr pte_offset; + hwaddr pte_offset = ptex * HASH_PTE_SIZE_64; + hwaddr base = ppc_hash64_hpt_base(cpu); + hwaddr plen = n * HASH_PTE_SIZE_64; + const ppc_hash_pte64_t *hptes; + + if (cpu->vhyp) { + PPCVirtualHypervisorClass *vhc = + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + return vhc->map_hptes(cpu->vhyp, ptex, n); + } - pte_offset = pte_index * HASH_PTE_SIZE_64; - if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) { - /* - * HTAB is controlled by KVM. Fetch the PTEG into a new buffer. - */ - token = kvmppc_hash64_read_pteg(cpu, pte_index); - } else if (cpu->env.external_htab) { - /* - * HTAB is controlled by QEMU. Just point to the internally - * accessible PTEG. - */ - token = (uint64_t)(uintptr_t) cpu->env.external_htab + pte_offset; - } else if (cpu->env.htab_base) { - token = cpu->env.htab_base + pte_offset; + if (!base) { + return NULL; } - return token; + + hptes = address_space_map(CPU(cpu)->as, base + pte_offset, &plen, false); + if (plen < (n * HASH_PTE_SIZE_64)) { + hw_error("%s: Unable to map all requested HPTEs\n", __func__); + } + return hptes; } -void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token) +void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, + hwaddr ptex, int n) { - if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) { - kvmppc_hash64_free_pteg(token); + if (cpu->vhyp) { + PPCVirtualHypervisorClass *vhc = + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n); + return; } + + address_space_unmap(CPU(cpu)->as, (void *)hptes, n * HASH_PTE_SIZE_64, + false, n * HASH_PTE_SIZE_64); } static unsigned hpte_page_shift(const struct ppc_one_seg_page_size *sps, @@ -503,20 +458,19 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash, target_ulong ptem, ppc_hash_pte64_t *pte, unsigned *pshift) { - CPUPPCState *env = &cpu->env; int i; - uint64_t token; + const ppc_hash_pte64_t *pteg; target_ulong pte0, pte1; - target_ulong pte_index; + target_ulong ptex; - pte_index = (hash & env->htab_mask) * HPTES_PER_GROUP; - token = ppc_hash64_start_access(cpu, pte_index); - if (!token) { + ptex = (hash & ppc_hash64_hpt_mask(cpu)) * HPTES_PER_GROUP; + pteg = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); + if (!pteg) { return -1; } for (i = 0; i < HPTES_PER_GROUP; i++) { - pte0 = ppc_hash64_load_hpte0(cpu, token, i); - pte1 = ppc_hash64_load_hpte1(cpu, token, i); + pte0 = ppc_hash64_hpte0(cpu, pteg, i); + pte1 = ppc_hash64_hpte1(cpu, pteg, i); /* This compares V, B, H (secondary) and the AVPN */ if (HPTE64_V_COMPARE(pte0, ptem)) { @@ -536,11 +490,11 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash, */ pte->pte0 = pte0; pte->pte1 = pte1; - ppc_hash64_stop_access(cpu, token); - return (pte_index + i) * HASH_PTE_SIZE_64; + ppc_hash64_unmap_hptes(cpu, pteg, ptex, HPTES_PER_GROUP); + return ptex + i; } } - ppc_hash64_stop_access(cpu, token); + ppc_hash64_unmap_hptes(cpu, pteg, ptex, HPTES_PER_GROUP); /* * We didn't find a valid entry. */ @@ -552,8 +506,7 @@ static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu, ppc_hash_pte64_t *pte, unsigned *pshift) { CPUPPCState *env = &cpu->env; - hwaddr pte_offset; - hwaddr hash; + hwaddr hash, ptex; uint64_t vsid, epnmask, epn, ptem; const struct ppc_one_seg_page_size *sps = slb->sps; @@ -588,29 +541,30 @@ static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu, qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx " hash " TARGET_FMT_plx "\n", - env->htab_base, env->htab_mask, hash); + ppc_hash64_hpt_base(cpu), ppc_hash64_hpt_mask(cpu), hash); /* Primary PTEG lookup */ qemu_log_mask(CPU_LOG_MMU, "0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx " vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx " hash=" TARGET_FMT_plx "\n", - env->htab_base, env->htab_mask, vsid, ptem, hash); - pte_offset = ppc_hash64_pteg_search(cpu, hash, sps, ptem, pte, pshift); + ppc_hash64_hpt_base(cpu), ppc_hash64_hpt_mask(cpu), + vsid, ptem, hash); + ptex = ppc_hash64_pteg_search(cpu, hash, sps, ptem, pte, pshift); - if (pte_offset == -1) { + if (ptex == -1) { /* Secondary PTEG lookup */ ptem |= HPTE64_V_SECONDARY; qemu_log_mask(CPU_LOG_MMU, "1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx " vsid=" TARGET_FMT_lx " api=" TARGET_FMT_lx - " hash=" TARGET_FMT_plx "\n", env->htab_base, - env->htab_mask, vsid, ptem, ~hash); + " hash=" TARGET_FMT_plx "\n", ppc_hash64_hpt_base(cpu), + ppc_hash64_hpt_mask(cpu), vsid, ptem, ~hash); - pte_offset = ppc_hash64_pteg_search(cpu, ~hash, sps, ptem, pte, pshift); + ptex = ppc_hash64_pteg_search(cpu, ~hash, sps, ptem, pte, pshift); } - return pte_offset; + return ptex; } unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu, @@ -708,7 +662,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, CPUPPCState *env = &cpu->env; ppc_slb_t *slb; unsigned apshift; - hwaddr pte_offset; + hwaddr ptex; ppc_hash_pte64_t pte; int pp_prot, amr_prot, prot; uint64_t new_pte1, dsisr; @@ -792,8 +746,8 @@ skip_slb_search: } /* 4. Locate the PTE in the hash table */ - pte_offset = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift); - if (pte_offset == -1) { + ptex = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift); + if (ptex == -1) { dsisr = 0x40000000; if (rwx == 2) { ppc_hash64_set_isi(cs, env, dsisr); @@ -806,7 +760,7 @@ skip_slb_search: return 1; } qemu_log_mask(CPU_LOG_MMU, - "found PTE at offset %08" HWADDR_PRIx "\n", pte_offset); + "found PTE at index %08" HWADDR_PRIx "\n", ptex); /* 5. Check access permissions */ @@ -849,8 +803,7 @@ skip_slb_search: } if (new_pte1 != pte.pte1) { - ppc_hash64_store_hpte(cpu, pte_offset / HASH_PTE_SIZE_64, - pte.pte0, new_pte1); + ppc_hash64_store_hpte(cpu, ptex, pte.pte0, new_pte1); } /* 7. Determine the real address from the PTE */ @@ -867,7 +820,7 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr) { CPUPPCState *env = &cpu->env; ppc_slb_t *slb; - hwaddr pte_offset, raddr; + hwaddr ptex, raddr; ppc_hash_pte64_t pte; unsigned apshift; @@ -900,8 +853,8 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr) } } - pte_offset = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift); - if (pte_offset == -1) { + ptex = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift); + if (ptex == -1) { return -1; } @@ -909,30 +862,24 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr) & TARGET_PAGE_MASK; } -void ppc_hash64_store_hpte(PowerPCCPU *cpu, - target_ulong pte_index, - target_ulong pte0, target_ulong pte1) +void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex, + uint64_t pte0, uint64_t pte1) { - CPUPPCState *env = &cpu->env; + hwaddr base = ppc_hash64_hpt_base(cpu); + hwaddr offset = ptex * HASH_PTE_SIZE_64; - if (env->external_htab == MMU_HASH64_KVM_MANAGED_HPT) { - kvmppc_hash64_write_pte(env, pte_index, pte0, pte1); + if (cpu->vhyp) { + PPCVirtualHypervisorClass *vhc = + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + vhc->store_hpte(cpu->vhyp, ptex, pte0, pte1); return; } - pte_index *= HASH_PTE_SIZE_64; - if (env->external_htab) { - stq_p(env->external_htab + pte_index, pte0); - stq_p(env->external_htab + pte_index + HASH_PTE_SIZE_64 / 2, pte1); - } else { - stq_phys(CPU(cpu)->as, env->htab_base + pte_index, pte0); - stq_phys(CPU(cpu)->as, - env->htab_base + pte_index + HASH_PTE_SIZE_64 / 2, pte1); - } + stq_phys(CPU(cpu)->as, base + offset, pte0); + stq_phys(CPU(cpu)->as, base + offset + HASH_PTE_SIZE_64 / 2, pte1); } -void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, - target_ulong pte_index, +void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, target_ulong ptex, target_ulong pte0, target_ulong pte1) { /* diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h index 7a0b7fc..54f1e37 100644 --- a/target/ppc/mmu-hash64.h +++ b/target/ppc/mmu-hash64.h @@ -10,8 +10,8 @@ int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot, hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr); int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw, int mmu_idx); -void ppc_hash64_store_hpte(PowerPCCPU *cpu, target_ulong index, - target_ulong pte0, target_ulong pte1); +void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex, + uint64_t pte0, uint64_t pte1); void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, target_ulong pte_index, target_ulong pte0, target_ulong pte1); @@ -56,6 +56,9 @@ void ppc_hash64_update_rmls(CPUPPCState *env); * Hash page table definitions */ +#define SDR_64_HTABORG 0x0FFFFFFFFFFC0000ULL +#define SDR_64_HTABSIZE 0x000000000000001FULL + #define HPTES_PER_GROUP 8 #define HASH_PTE_SIZE_64 16 #define HASH_PTEG_SIZE_64 (HASH_PTE_SIZE_64 * HPTES_PER_GROUP) @@ -91,45 +94,41 @@ void ppc_hash64_update_rmls(CPUPPCState *env); #define HPTE64_V_1TB_SEG 0x4000000000000000ULL #define HPTE64_V_VRMA_MASK 0x4001ffffff000000ULL -void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value, - Error **errp); -void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift, - Error **errp); - -uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index); -void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token); - -static inline target_ulong ppc_hash64_load_hpte0(PowerPCCPU *cpu, - uint64_t token, int index) +static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu) { - CPUPPCState *env = &cpu->env; - uint64_t addr; - - addr = token + (index * HASH_PTE_SIZE_64); - if (env->external_htab) { - return ldq_p((const void *)(uintptr_t)addr); - } else { - return ldq_phys(CPU(cpu)->as, addr); - } + return cpu->env.spr[SPR_SDR1] & SDR_64_HTABORG; } -static inline target_ulong ppc_hash64_load_hpte1(PowerPCCPU *cpu, - uint64_t token, int index) +static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) { - CPUPPCState *env = &cpu->env; - uint64_t addr; - - addr = token + (index * HASH_PTE_SIZE_64) + HASH_PTE_SIZE_64/2; - if (env->external_htab) { - return ldq_p((const void *)(uintptr_t)addr); - } else { - return ldq_phys(CPU(cpu)->as, addr); + if (cpu->vhyp) { + PPCVirtualHypervisorClass *vhc = + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + return vhc->hpt_mask(cpu->vhyp); } + return (1ULL << ((cpu->env.spr[SPR_SDR1] & SDR_64_HTABSIZE) + 18 - 7)) - 1; } -typedef struct { +struct ppc_hash_pte64 { uint64_t pte0, pte1; -} ppc_hash_pte64_t; +}; + +const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, + hwaddr ptex, int n); +void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, + hwaddr ptex, int n); + +static inline uint64_t ppc_hash64_hpte0(PowerPCCPU *cpu, + const ppc_hash_pte64_t *hptes, int i) +{ + return ldq_p(&(hptes[i].pte0)); +} + +static inline uint64_t ppc_hash64_hpte1(PowerPCCPU *cpu, + const ppc_hash_pte64_t *hptes, int i) +{ + return ldq_p(&(hptes[i].pte1)); +} #endif /* CONFIG_USER_ONLY */ diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c index eb2d482..a1af3d6 100644 --- a/target/ppc/mmu_helper.c +++ b/target/ppc/mmu_helper.c @@ -28,6 +28,7 @@ #include "exec/cpu_ldst.h" #include "exec/log.h" #include "helper_regs.h" +#include "qemu/error-report.h" //#define DEBUG_MMU //#define DEBUG_BATS @@ -466,6 +467,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, target_ulong eaddr, int rw, int type) { + PowerPCCPU *cpu = ppc_env_get_cpu(env); hwaddr hash; target_ulong vsid; int ds, pr, target_page_bits; @@ -503,7 +505,7 @@ static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx " hash " TARGET_FMT_plx "\n", - env->htab_base, env->htab_mask, hash); + ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash); ctx->hash[0] = hash; ctx->hash[1] = ~hash; @@ -518,9 +520,11 @@ static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, uint32_t a0, a1, a2, a3; qemu_log("Page table: " TARGET_FMT_plx " len " TARGET_FMT_plx - "\n", env->htab_base, env->htab_mask + 0x80); - for (curaddr = env->htab_base; - curaddr < (env->htab_base + env->htab_mask + 0x80); + "\n", ppc_hash32_hpt_base(cpu), + ppc_hash32_hpt_mask(env) + 0x80); + for (curaddr = ppc_hash32_hpt_base(cpu); + curaddr < (ppc_hash32_hpt_base(cpu) + + ppc_hash32_hpt_mask(cpu) + 0x80); curaddr += 16) { a0 = ldl_phys(cs->as, curaddr); a1 = ldl_phys(cs->as, curaddr + 4); @@ -1205,12 +1209,13 @@ static void mmu6xx_dump_BATs(FILE *f, fprintf_function cpu_fprintf, static void mmu6xx_dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env) { + PowerPCCPU *cpu = ppc_env_get_cpu(env); ppc6xx_tlb_t *tlb; target_ulong sr; int type, way, entry, i; - cpu_fprintf(f, "HTAB base = 0x%"HWADDR_PRIx"\n", env->htab_base); - cpu_fprintf(f, "HTAB mask = 0x%"HWADDR_PRIx"\n", env->htab_mask); + cpu_fprintf(f, "HTAB base = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_base(cpu)); + cpu_fprintf(f, "HTAB mask = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_mask(cpu)); cpu_fprintf(f, "\nSegment registers:\n"); for (i = 0; i < 32; i++) { @@ -1592,9 +1597,9 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address, env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem; tlb_miss: env->error_code |= ctx.key << 19; - env->spr[SPR_HASH1] = env->htab_base + + env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) + get_pteg_offset32(cpu, ctx.hash[0]); - env->spr[SPR_HASH2] = env->htab_base + + env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) + get_pteg_offset32(cpu, ctx.hash[1]); break; case POWERPC_MMU_SOFT_74xx: @@ -1997,26 +2002,28 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr) /* Special registers manipulation */ void ppc_store_sdr1(CPUPPCState *env, target_ulong value) { + PowerPCCPU *cpu = ppc_env_get_cpu(env); qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value); - assert(!env->external_htab); - env->spr[SPR_SDR1] = value; + assert(!cpu->vhyp); #if defined(TARGET_PPC64) if (env->mmu_model & POWERPC_MMU_64) { - PowerPCCPU *cpu = ppc_env_get_cpu(env); - Error *local_err = NULL; + target_ulong sdr_mask = SDR_64_HTABORG | SDR_64_HTABSIZE; + target_ulong htabsize = value & SDR_64_HTABSIZE; - ppc_hash64_set_sdr1(cpu, value, &local_err); - if (local_err) { - error_report_err(local_err); - error_free(local_err); + if (value & ~sdr_mask) { + error_report("Invalid bits 0x"TARGET_FMT_lx" set in SDR1", + value & ~sdr_mask); + value &= sdr_mask; + } + if (htabsize > 28) { + error_report("Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1", + htabsize); + return; } - } else -#endif /* defined(TARGET_PPC64) */ - { - /* FIXME: Should check for valid HTABMASK values */ - env->htab_mask = ((value & SDR_32_HTABMASK) << 16) | 0xFFFF; - env->htab_base = value & SDR_32_HTABORG; } +#endif /* defined(TARGET_PPC64) */ + /* FIXME: Should check for valid HTABMASK values in 32-bit case */ + env->spr[SPR_SDR1] = value; } /* Segment registers load and store */ diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 3ba2616..6e6868b 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -71,7 +71,7 @@ static TCGv cpu_lr; #if defined(TARGET_PPC64) static TCGv cpu_cfar; #endif -static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca; +static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32; static TCGv cpu_reserve; static TCGv cpu_fpscr; static TCGv_i32 cpu_access_type; @@ -173,6 +173,10 @@ void ppc_translate_init(void) offsetof(CPUPPCState, ov), "OV"); cpu_ca = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, ca), "CA"); + cpu_ov32 = tcg_global_mem_new(cpu_env, + offsetof(CPUPPCState, ov32), "OV32"); + cpu_ca32 = tcg_global_mem_new(cpu_env, + offsetof(CPUPPCState, ca32), "CA32"); cpu_reserve = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, reserve_addr), @@ -806,12 +810,40 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0, } tcg_temp_free(t0); if (NARROW_MODE(ctx)) { - tcg_gen_ext32s_tl(cpu_ov, cpu_ov); + tcg_gen_extract_tl(cpu_ov, cpu_ov, 31, 1); + if (is_isa300(ctx)) { + tcg_gen_mov_tl(cpu_ov32, cpu_ov); + } + } else { + if (is_isa300(ctx)) { + tcg_gen_extract_tl(cpu_ov32, cpu_ov, 31, 1); + } + tcg_gen_extract_tl(cpu_ov, cpu_ov, 63, 1); } - tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1); tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); } +static inline void gen_op_arith_compute_ca32(DisasContext *ctx, + TCGv res, TCGv arg0, TCGv arg1, + int sub) +{ + TCGv t0; + + if (!is_isa300(ctx)) { + return; + } + + t0 = tcg_temp_new(); + if (sub) { + tcg_gen_eqv_tl(t0, arg0, arg1); + } else { + tcg_gen_xor_tl(t0, arg0, arg1); + } + tcg_gen_xor_tl(t0, t0, res); + tcg_gen_extract_tl(cpu_ca32, t0, 32, 1); + tcg_temp_free(t0); +} + /* Common add function */ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, TCGv arg2, bool add_ca, bool compute_ca, @@ -838,6 +870,9 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_temp_free(t1); tcg_gen_shri_tl(cpu_ca, cpu_ca, 32); /* extract bit 32 */ tcg_gen_andi_tl(cpu_ca, cpu_ca, 1); + if (is_isa300(ctx)) { + tcg_gen_mov_tl(cpu_ca32, cpu_ca); + } } else { TCGv zero = tcg_const_tl(0); if (add_ca) { @@ -846,6 +881,7 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, } else { tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, arg2, zero); } + gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 0); tcg_temp_free(zero); } } else { @@ -985,6 +1021,9 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1, } if (compute_ov) { tcg_gen_extu_i32_tl(cpu_ov, t2); + if (is_isa300(ctx)) { + tcg_gen_extu_i32_tl(cpu_ov32, t2); + } tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); } tcg_temp_free_i32(t0); @@ -1056,6 +1095,9 @@ static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1, } if (compute_ov) { tcg_gen_mov_tl(cpu_ov, t2); + if (is_isa300(ctx)) { + tcg_gen_mov_tl(cpu_ov32, t2); + } tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); } tcg_temp_free_i64(t0); @@ -1074,10 +1116,10 @@ static void glue(gen_, name)(DisasContext *ctx) cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ sign, compute_ov); \ } -/* divwu divwu. divwuo divwuo. */ +/* divdu divdu. divduo divduo. */ GEN_INT_ARITH_DIVD(divdu, 0x0E, 0, 0); GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1); -/* divw divw. divwo divwo. */ +/* divd divd. divdo divdo. */ GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0); GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1); @@ -1249,6 +1291,9 @@ static void gen_mullwo(DisasContext *ctx) tcg_gen_sari_i32(t0, t0, 31); tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1); tcg_gen_extu_i32_tl(cpu_ov, t0); + if (is_isa300(ctx)) { + tcg_gen_mov_tl(cpu_ov32, cpu_ov); + } tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); tcg_temp_free_i32(t0); @@ -1310,6 +1355,9 @@ static void gen_mulldo(DisasContext *ctx) tcg_gen_sari_i64(t0, t0, 63); tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1); + if (is_isa300(ctx)) { + tcg_gen_mov_tl(cpu_ov32, cpu_ov); + } tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); tcg_temp_free_i64(t0); @@ -1353,17 +1401,22 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_temp_free(t1); tcg_gen_shri_tl(cpu_ca, cpu_ca, 32); /* extract bit 32 */ tcg_gen_andi_tl(cpu_ca, cpu_ca, 1); + if (is_isa300(ctx)) { + tcg_gen_mov_tl(cpu_ca32, cpu_ca); + } } else if (add_ca) { TCGv zero, inv1 = tcg_temp_new(); tcg_gen_not_tl(inv1, arg1); zero = tcg_const_tl(0); tcg_gen_add2_tl(t0, cpu_ca, arg2, zero, cpu_ca, zero); tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, inv1, zero); + gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, 0); tcg_temp_free(zero); tcg_temp_free(inv1); } else { tcg_gen_setcond_tl(TCG_COND_GEU, cpu_ca, arg2, arg1); tcg_gen_sub_tl(t0, arg2, arg1); + gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 1); } } else if (add_ca) { /* Since we're ignoring carry-out, we can simplify the @@ -1442,7 +1495,10 @@ static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov) static void gen_neg(DisasContext *ctx) { - gen_op_arith_neg(ctx, 0); + tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); + if (unlikely(Rc(ctx->opcode))) { + gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); + } } static void gen_nego(DisasContext *ctx) @@ -3703,7 +3759,7 @@ static void gen_tdi(DisasContext *ctx) /*** Processor control ***/ -static void gen_read_xer(TCGv dst) +static void gen_read_xer(DisasContext *ctx, TCGv dst) { TCGv t0 = tcg_temp_new(); TCGv t1 = tcg_temp_new(); @@ -3715,6 +3771,12 @@ static void gen_read_xer(TCGv dst) tcg_gen_or_tl(t0, t0, t1); tcg_gen_or_tl(dst, dst, t2); tcg_gen_or_tl(dst, dst, t0); + if (is_isa300(ctx)) { + tcg_gen_shli_tl(t0, cpu_ov32, XER_OV32); + tcg_gen_or_tl(dst, dst, t0); + tcg_gen_shli_tl(t0, cpu_ca32, XER_CA32); + tcg_gen_or_tl(dst, dst, t0); + } tcg_temp_free(t0); tcg_temp_free(t1); tcg_temp_free(t2); @@ -3722,14 +3784,16 @@ static void gen_read_xer(TCGv dst) static void gen_write_xer(TCGv src) { + /* Write all flags, while reading back check for isa300 */ tcg_gen_andi_tl(cpu_xer, src, - ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA))); - tcg_gen_shri_tl(cpu_so, src, XER_SO); - tcg_gen_shri_tl(cpu_ov, src, XER_OV); - tcg_gen_shri_tl(cpu_ca, src, XER_CA); - tcg_gen_andi_tl(cpu_so, cpu_so, 1); - tcg_gen_andi_tl(cpu_ov, cpu_ov, 1); - tcg_gen_andi_tl(cpu_ca, cpu_ca, 1); + ~((1u << XER_SO) | + (1u << XER_OV) | (1u << XER_OV32) | + (1u << XER_CA) | (1u << XER_CA32))); + tcg_gen_extract_tl(cpu_ov32, src, XER_OV32, 1); + tcg_gen_extract_tl(cpu_ca32, src, XER_CA32, 1); + tcg_gen_extract_tl(cpu_so, src, XER_SO, 1); + tcg_gen_extract_tl(cpu_ov, src, XER_OV, 1); + tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1); } /* mcrxr */ @@ -3755,6 +3819,28 @@ static void gen_mcrxr(DisasContext *ctx) tcg_gen_movi_tl(cpu_ca, 0); } +#ifdef TARGET_PPC64 +/* mcrxrx */ +static void gen_mcrxrx(DisasContext *ctx) +{ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)]; + + /* copy OV and OV32 */ + tcg_gen_shli_tl(t0, cpu_ov, 1); + tcg_gen_or_tl(t0, t0, cpu_ov32); + tcg_gen_shli_tl(t0, t0, 2); + /* copy CA and CA32 */ + tcg_gen_shli_tl(t1, cpu_ca, 1); + tcg_gen_or_tl(t1, t1, cpu_ca32); + tcg_gen_or_tl(t0, t0, t1); + tcg_gen_trunc_tl_i32(dst, t0); + tcg_temp_free(t0); + tcg_temp_free(t1); +} +#endif + /* mfcr mfocrf */ static void gen_mfcr(DisasContext *ctx) { @@ -6424,6 +6510,7 @@ GEN_HANDLER(mtcrf, 0x1F, 0x10, 0x04, 0x00000801, PPC_MISC), #if defined(TARGET_PPC64) GEN_HANDLER(mtmsrd, 0x1F, 0x12, 0x05, 0x001EF801, PPC_64B), GEN_HANDLER_E(setb, 0x1F, 0x00, 0x04, 0x0003F801, PPC_NONE, PPC2_ISA300), +GEN_HANDLER_E(mcrxrx, 0x1F, 0x00, 0x12, 0x007FF801, PPC_NONE, PPC2_ISA300), #endif GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001EF801, PPC_MISC), GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x00000000, PPC_MISC), diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c index be35cbd..37f74be 100644 --- a/target/ppc/translate_init.c +++ b/target/ppc/translate_init.c @@ -107,7 +107,7 @@ static void spr_access_nop(DisasContext *ctx, int sprn, int gprn) /* XER */ static void spr_read_xer (DisasContext *ctx, int gprn, int sprn) { - gen_read_xer(cpu_gpr[gprn]); + gen_read_xer(ctx, cpu_gpr[gprn]); } static void spr_write_xer (DisasContext *ctx, int sprn, int gprn) @@ -740,10 +740,22 @@ static void gen_spr_ne_601 (CPUPPCState *env) &spr_read_decr, &spr_write_decr, 0x00000000); /* Memory management */ - spr_register(env, SPR_SDR1, "SDR1", - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_sdr1, - 0x00000000); +#ifndef CONFIG_USER_ONLY + if (env->has_hv_mode) { + /* SDR1 is a hypervisor resource on CPUs which have a + * hypervisor mode */ + spr_register_hv(env, SPR_SDR1, "SDR1", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_sdr1, + 0x00000000); + } else { + spr_register(env, SPR_SDR1, "SDR1", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_sdr1, + 0x00000000); + } +#endif } /* BATs 0-3 */ @@ -8835,18 +8847,14 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) } #if !defined(CONFIG_USER_ONLY) - -void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) -{ - cpu->vhyp = vhyp; -} - -void cpu_ppc_set_papr(PowerPCCPU *cpu) +void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) { CPUPPCState *env = &cpu->env; ppc_spr_t *lpcr = &env->spr_cb[SPR_LPCR]; ppc_spr_t *amor = &env->spr_cb[SPR_AMOR]; + cpu->vhyp = vhyp; + /* PAPR always has exception vectors in RAM not ROM. To ensure this, * MSR[IP] should never be set. * @@ -10489,11 +10497,12 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data) #else cc->get_phys_page_debug = ppc_cpu_get_phys_page_debug; cc->vmsd = &vmstate_ppc_cpu; -#if defined(TARGET_PPC64) - cc->write_elf64_note = ppc64_cpu_write_elf64_note; -#endif #endif cc->cpu_exec_enter = ppc_cpu_exec_enter; +#if defined(CONFIG_SOFTMMU) + cc->write_elf64_note = ppc64_cpu_write_elf64_note; + cc->write_elf32_note = ppc32_cpu_write_elf32_note; +#endif cc->gdb_num_core_regs = 71; diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 6d227a5..290de6d 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -866,7 +866,7 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) } } -static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a, +static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, TCGArg b, bool b_const, TCGLabel *l) { intptr_t offset; @@ -937,7 +937,7 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, } } -static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl, +static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, TCGReg rh, TCGReg al, TCGReg ah, tcg_target_long bl, tcg_target_long bh, bool const_bl, bool const_bh, bool sub) diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index 5404805..d54018d 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -144,7 +144,7 @@ static void free_test_data(test_data *data) g_free(temp->asl_file); } - g_array_free(data->tables, false); + g_array_free(data->tables, true); } static uint8_t acpi_checksum(const uint8_t *data, int len) diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker new file mode 100644 index 0000000..bbb21ed --- /dev/null +++ b/tests/docker/dockerfiles/debian-s390x-cross.docker @@ -0,0 +1,22 @@ +# +# Docker s390 cross-compiler target +# +# This docker target is based on stretch (testing) as the stable build +# doesn't have the cross compiler available. +# +FROM debian:testing-slim + +# Duplicate deb line as deb-src +RUN cat /etc/apt/sources.list | sed "s/deb/deb-src/" >> /etc/apt/sources.list + +# Add the s390x architecture +RUN dpkg --add-architecture s390x + +# Grab the updated list of packages +RUN apt update +RUN apt dist-upgrade -yy +RUN apt-get build-dep -yy -a s390x qemu || apt-get -f install +RUN apt install -yy gcc-multilib-s390x-linux-gnu binutils-multiarch + +# Specify the cross prefix for this image (see tests/docker/common.rc) +ENV QEMU_CONFIGURE_OPTS --cross-prefix=s390x-linux-gnu- diff --git a/tests/e1000-test.c b/tests/e1000-test.c index 59cab68..0c5fcdc 100644 --- a/tests/e1000-test.c +++ b/tests/e1000-test.c @@ -44,6 +44,7 @@ int main(int argc, char **argv) path = g_strdup_printf("e1000/%s", models[i]); qtest_add_data_func(path, models[i], test_device); + g_free(path); } return g_test_run(); diff --git a/tests/e1000e-test.c b/tests/e1000e-test.c index 8c42ca9..c612dc6 100644 --- a/tests/e1000e-test.c +++ b/tests/e1000e-test.c @@ -99,7 +99,10 @@ static QPCIBus *test_bus; static void e1000e_pci_foreach_callback(QPCIDevice *dev, int devfn, void *data) { - *(QPCIDevice **) data = dev; + QPCIDevice **res = data; + + g_assert_null(*res); + *res = dev; } static QPCIDevice *e1000e_device_find(QPCIBus *bus) @@ -403,6 +406,7 @@ static void data_test_clear(e1000e_device *d) e1000e_device_clear(test_bus, d); close(test_sockets[0]); pc_alloc_uninit(test_alloc); + g_free(d->pci_dev); qpci_free_pc(test_bus); qtest_end(); } diff --git a/tests/eepro100-test.c b/tests/eepro100-test.c index ed23258..bdc8a67 100644 --- a/tests/eepro100-test.c +++ b/tests/eepro100-test.c @@ -54,6 +54,7 @@ int main(int argc, char **argv) path = g_strdup_printf("eepro100/%s", models[i]); qtest_add_data_func(path, models[i], test_device); + g_free(path); } return g_test_run(); diff --git a/tests/endianness-test.c b/tests/endianness-test.c index cf8d41b..ed0bf52 100644 --- a/tests/endianness-test.c +++ b/tests/endianness-test.c @@ -295,14 +295,17 @@ int main(int argc, char **argv) path = g_strdup_printf("endianness/%s", test_cases[i].machine); qtest_add_data_func(path, &test_cases[i], test_endianness); + g_free(path); path = g_strdup_printf("endianness/split/%s", test_cases[i].machine); qtest_add_data_func(path, &test_cases[i], test_endianness_split); + g_free(path); path = g_strdup_printf("endianness/combine/%s", test_cases[i].machine); qtest_add_data_func(path, &test_cases[i], test_endianness_combine); + g_free(path); } return g_test_run(); diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c index 6176e81..24870b3 100644 --- a/tests/hd-geo-test.c +++ b/tests/hd-geo-test.c @@ -19,6 +19,8 @@ #include "qemu-common.h" #include "libqtest.h" +#define ARGV_SIZE 256 + static char *create_test_img(int secs) { char *template = strdup("/tmp/qtest.XXXXXX"); @@ -66,7 +68,7 @@ static const CHST hd_chst[backend_last][mbr_last] = { }, }; -static const char *img_file_name[backend_last]; +static char *img_file_name[backend_last]; static const CHST *cur_ide[4]; @@ -234,28 +236,36 @@ static int setup_ide(int argc, char *argv[], int argv_sz, */ static void test_ide_none(void) { - char *argv[256]; - - setup_common(argv, ARRAY_SIZE(argv)); - qtest_start(g_strjoinv(" ", argv)); + char **argv = g_new0(char *, ARGV_SIZE); + char *args; + + setup_common(argv, ARGV_SIZE); + args = g_strjoinv(" ", argv); + qtest_start(args); + g_strfreev(argv); + g_free(args); test_cmos(); qtest_end(); } static void test_ide_mbr(bool use_device, MBRcontents mbr) { - char *argv[256]; + char **argv = g_new0(char *, ARGV_SIZE); + char *args; int argc; Backend i; const char *dev; - argc = setup_common(argv, ARRAY_SIZE(argv)); + argc = setup_common(argv, ARGV_SIZE); for (i = 0; i < backend_last; i++) { cur_ide[i] = &hd_chst[i][mbr]; dev = use_device ? (is_hd(cur_ide[i]) ? "ide-hd" : "ide-cd") : NULL; - argc = setup_ide(argc, argv, ARRAY_SIZE(argv), i, dev, i, mbr, ""); + argc = setup_ide(argc, argv, ARGV_SIZE, i, dev, i, mbr, ""); } - qtest_start(g_strjoinv(" ", argv)); + args = g_strjoinv(" ", argv); + qtest_start(args); + g_strfreev(argv); + g_free(args); test_cmos(); qtest_end(); } @@ -310,12 +320,13 @@ static void test_ide_device_mbr_chs(void) static void test_ide_drive_user(const char *dev, bool trans) { - char *argv[256], *opts; + char **argv = g_new0(char *, ARGV_SIZE); + char *args, *opts; int argc; int secs = img_secs[backend_small]; const CHST expected_chst = { secs / (4 * 32) , 4, 32, trans }; - argc = setup_common(argv, ARRAY_SIZE(argv)); + argc = setup_common(argv, ARGV_SIZE); opts = g_strdup_printf("%s,%s%scyls=%d,heads=%d,secs=%d", dev ?: "", trans && dev ? "bios-chs-" : "", @@ -323,11 +334,14 @@ static void test_ide_drive_user(const char *dev, bool trans) expected_chst.cyls, expected_chst.heads, expected_chst.secs); cur_ide[0] = &expected_chst; - argc = setup_ide(argc, argv, ARRAY_SIZE(argv), + argc = setup_ide(argc, argv, ARGV_SIZE, 0, dev ? opts : NULL, backend_small, mbr_chs, dev ? "" : opts); g_free(opts); - qtest_start(g_strjoinv(" ", argv)); + args = g_strjoinv(" ", argv); + qtest_start(args); + g_strfreev(argv); + g_free(args); test_cmos(); qtest_end(); } @@ -369,18 +383,22 @@ static void test_ide_device_user_chst(void) */ static void test_ide_drive_cd_0(void) { - char *argv[256]; + char **argv = g_new0(char *, ARGV_SIZE); + char *args; int argc, ide_idx; Backend i; - argc = setup_common(argv, ARRAY_SIZE(argv)); + argc = setup_common(argv, ARGV_SIZE); for (i = 0; i <= backend_empty; i++) { ide_idx = backend_empty - i; cur_ide[ide_idx] = &hd_chst[i][mbr_blank]; - argc = setup_ide(argc, argv, ARRAY_SIZE(argv), + argc = setup_ide(argc, argv, ARGV_SIZE, ide_idx, NULL, i, mbr_blank, ""); } - qtest_start(g_strjoinv(" ", argv)); + args = g_strjoinv(" ", argv); + qtest_start(args); + g_strfreev(argv); + g_free(args); test_cmos(); qtest_end(); } @@ -418,6 +436,7 @@ int main(int argc, char **argv) for (i = 0; i < backend_last; i++) { if (img_file_name[i]) { unlink(img_file_name[i]); + free(img_file_name[i]); } } diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c index da2d5a5..e9d05c8 100644 --- a/tests/i440fx-test.c +++ b/tests/i440fx-test.c @@ -134,6 +134,8 @@ static void test_i440fx_defaults(gconstpointer opaque) /* 3.2.26 */ g_assert_cmpint(qpci_config_readb(dev, 0x93), ==, 0x00); /* TRC */ + g_free(dev); + qpci_free_pc(bus); qtest_end(); } @@ -270,6 +272,9 @@ static void test_i440fx_pam(gconstpointer opaque) /* Verify the area is not our new mask */ g_assert(!verify_area(pam_area[i].start, pam_area[i].end, 0x82)); } + + g_free(dev); + qpci_free_pc(bus); qtest_end(); } diff --git a/tests/ide-test.c b/tests/ide-test.c index b57c2b1..139ebc0 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -339,6 +339,7 @@ static void test_bmdma_simple_rw(void) g_assert(memcmp(buf, cmpbuf, len) == 0); + free_pci_device(dev); g_free(buf); g_free(cmpbuf); } @@ -369,6 +370,7 @@ static void test_bmdma_short_prdt(void) prdt, ARRAY_SIZE(prdt), NULL); g_assert_cmphex(status, ==, 0); assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR); + free_pci_device(dev); } static void test_bmdma_one_sector_short_prdt(void) @@ -398,6 +400,7 @@ static void test_bmdma_one_sector_short_prdt(void) prdt, ARRAY_SIZE(prdt), NULL); g_assert_cmphex(status, ==, 0); assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR); + free_pci_device(dev); } static void test_bmdma_long_prdt(void) @@ -426,6 +429,7 @@ static void test_bmdma_long_prdt(void) prdt, ARRAY_SIZE(prdt), NULL); g_assert_cmphex(status, ==, BM_STS_INTR); assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR); + free_pci_device(dev); } static void test_bmdma_no_busmaster(void) @@ -449,6 +453,7 @@ static void test_bmdma_no_busmaster(void) * in practice. At least we want to be aware of any changes. */ g_assert_cmphex(status, ==, BM_STS_ACTIVE | BM_STS_INTR); assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR); + free_pci_device(dev); } static void test_bmdma_setup(void) @@ -525,6 +530,7 @@ static void test_identify(void) assert_bit_set(buf[85], 0x20); ide_test_quit(); + free_pci_device(dev); } /* @@ -563,6 +569,7 @@ static void make_dirty(uint8_t device) assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR); g_free(buf); + free_pci_device(dev); } static void test_flush(void) @@ -609,6 +616,7 @@ static void test_flush(void) assert_bit_clear(data, BSY | DF | ERR | DRQ); ide_test_quit(); + free_pci_device(dev); } static void test_retry_flush(const char *machine) @@ -659,6 +667,7 @@ static void test_retry_flush(const char *machine) assert_bit_clear(data, BSY | DF | ERR | DRQ); ide_test_quit(); + free_pci_device(dev); } static void test_flush_nodev(void) @@ -676,6 +685,7 @@ static void test_flush_nodev(void) /* Just testing that qemu doesn't crash... */ + free_pci_device(dev); ide_test_quit(); } @@ -742,6 +752,7 @@ static uint8_t ide_wait_clear(uint8_t flag) while (true) { data = qpci_io_readb(dev, ide_bar, reg_status); if (!(data & flag)) { + free_pci_device(dev); return data; } if (difftime(time(NULL), st) > 5.0) { @@ -851,6 +862,7 @@ static void cdrom_pio_impl(int nblocks) g_free(pattern); g_free(rx); test_bmdma_teardown(); + free_pci_device(dev); } static void test_cdrom_pio(void) diff --git a/tests/ipmi-bt-test.c b/tests/ipmi-bt-test.c index e84dd68..7e21a9b 100644 --- a/tests/ipmi-bt-test.c +++ b/tests/ipmi-bt-test.c @@ -420,6 +420,7 @@ int main(int argc, char **argv) " -device ipmi-bmc-extern,chardev=ipmi0,id=bmc0" " -device isa-ipmi-bt,bmc=bmc0", emu_port); qtest_start(cmdline); + g_free(cmdline); qtest_irq_intercept_in(global_qtest, "ioapic"); qtest_add_func("/ipmi/extern/connect", test_connect); qtest_add_func("/ipmi/extern/bt_base", test_bt_base); diff --git a/tests/ipmi-kcs-test.c b/tests/ipmi-kcs-test.c index 9cf0b34..178ffc1 100644 --- a/tests/ipmi-kcs-test.c +++ b/tests/ipmi-kcs-test.c @@ -279,6 +279,7 @@ int main(int argc, char **argv) cmdline = g_strdup_printf("-device ipmi-bmc-sim,id=bmc0" " -device isa-ipmi-kcs,bmc=bmc0"); qtest_start(cmdline); + g_free(cmdline); qtest_irq_intercept_in(global_qtest, "ioapic"); qtest_add_func("/ipmi/local/kcs_base", test_kcs_base); qtest_add_func("/ipmi/local/kcs_abort", test_kcs_abort); diff --git a/tests/libqos/usb.c b/tests/libqos/usb.c index 72d7a96..0cdfaec 100644 --- a/tests/libqos/usb.c +++ b/tests/libqos/usb.c @@ -24,6 +24,11 @@ void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc, uint32_t devfn, int bar) hc->bar = qpci_iomap(hc->dev, bar, NULL); } +void uhci_deinit(struct qhc *hc) +{ + g_free(hc->dev); +} + void uhci_port_test(struct qhc *hc, int port, uint16_t expect) { uint16_t value = qpci_io_readw(hc->dev, hc->bar, 0x10 + 2 * port); @@ -64,4 +69,5 @@ void usb_test_hotplug(const char *hcd_id, const int port, g_assert(response); g_assert(qdict_haskey(response, "event")); g_assert(!strcmp(qdict_get_str(response, "event"), "DEVICE_DELETED")); + QDECREF(response); } diff --git a/tests/libqos/usb.h b/tests/libqos/usb.h index 423dcfd..297cfc5 100644 --- a/tests/libqos/usb.h +++ b/tests/libqos/usb.h @@ -11,6 +11,7 @@ struct qhc { void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc, uint32_t devfn, int bar); void uhci_port_test(struct qhc *hc, int port, uint16_t expect); +void uhci_deinit(struct qhc *hc); void usb_test_hotplug(const char *bus_name, const int port, void (*port_check)(void)); diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index d4bf841..7ac15c0 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -24,9 +24,17 @@ typedef struct QVirtioPCIForeachData { void (*func)(QVirtioDevice *d, void *data); uint16_t device_type; + bool has_slot; + int slot; void *user_data; } QVirtioPCIForeachData; +void qvirtio_pci_device_free(QVirtioPCIDevice *dev) +{ + g_free(dev->pdev); + g_free(dev); +} + static QVirtioPCIDevice *qpcidevice_to_qvirtiodevice(QPCIDevice *pdev) { QVirtioPCIDevice *vpcidev; @@ -49,16 +57,18 @@ static void qvirtio_pci_foreach_callback( QVirtioPCIForeachData *d = data; QVirtioPCIDevice *vpcidev = qpcidevice_to_qvirtiodevice(dev); - if (vpcidev->vdev.device_type == d->device_type) { + if (vpcidev->vdev.device_type == d->device_type && + (!d->has_slot || vpcidev->pdev->devfn == d->slot << 3)) { d->func(&vpcidev->vdev, d->user_data); } else { - g_free(vpcidev); + qvirtio_pci_device_free(vpcidev); } } static void qvirtio_pci_assign_device(QVirtioDevice *d, void *data) { QVirtioPCIDevice **vpcidev = data; + assert(!*vpcidev); *vpcidev = (QVirtioPCIDevice *)d; } @@ -284,21 +294,39 @@ const QVirtioBus qvirtio_pci = { .virtqueue_kick = qvirtio_pci_virtqueue_kick, }; -void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type, +static void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type, + bool has_slot, int slot, void (*func)(QVirtioDevice *d, void *data), void *data) { QVirtioPCIForeachData d = { .func = func, .device_type = device_type, + .has_slot = has_slot, + .slot = slot, .user_data = data }; qpci_device_foreach(bus, PCI_VENDOR_ID_REDHAT_QUMRANET, -1, - qvirtio_pci_foreach_callback, &d); + qvirtio_pci_foreach_callback, &d); } QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type) { QVirtioPCIDevice *dev = NULL; - qvirtio_pci_foreach(bus, device_type, qvirtio_pci_assign_device, &dev); + + qvirtio_pci_foreach(bus, device_type, false, 0, + qvirtio_pci_assign_device, &dev); + + dev->vdev.bus = &qvirtio_pci; + + return dev; +} + +QVirtioPCIDevice *qvirtio_pci_device_find_slot(QPCIBus *bus, + uint16_t device_type, int slot) +{ + QVirtioPCIDevice *dev = NULL; + + qvirtio_pci_foreach(bus, device_type, true, slot, + qvirtio_pci_assign_device, &dev); dev->vdev.bus = &qvirtio_pci; diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h index 38c54c6..6ef1909 100644 --- a/tests/libqos/virtio-pci.h +++ b/tests/libqos/virtio-pci.h @@ -31,9 +31,11 @@ typedef struct QVirtQueuePCI { extern const QVirtioBus qvirtio_pci; -void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type, - void (*func)(QVirtioDevice *d, void *data), void *data); QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type); +QVirtioPCIDevice *qvirtio_pci_device_find_slot(QPCIBus *bus, + uint16_t device_type, int slot); +void qvirtio_pci_device_free(QVirtioPCIDevice *dev); + void qvirtio_pci_device_enable(QVirtioPCIDevice *d); void qvirtio_pci_device_disable(QVirtioPCIDevice *d); diff --git a/tests/libqtest.c b/tests/libqtest.c index e54354d..3a0e0d6 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -805,17 +805,7 @@ void qtest_add_data_func_full(const char *str, void *data, GDestroyNotify data_free_func) { gchar *path = g_strdup_printf("/%s/%s", qtest_get_arch(), str); -#if GLIB_CHECK_VERSION(2, 34, 0) g_test_add_data_func_full(path, data, fn, data_free_func); -#elif GLIB_CHECK_VERSION(2, 26, 0) - /* back-compat casts, remove this once we can require new-enough glib */ - g_test_add_vtable(path, 0, data, NULL, - (GTestFixtureFunc)fn, (GTestFixtureFunc) data_free_func); -#else - /* back-compat casts, remove this once we can require new-enough glib */ - g_test_add_vtable(path, 0, data, NULL, - (void (*)(void)) fn, (void (*)(void)) data_free_func); -#endif g_free(path); } diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c index dafe8be..de35a18 100644 --- a/tests/postcopy-test.c +++ b/tests/postcopy-test.c @@ -482,7 +482,7 @@ static void test_migrate(void) usleep(10 * 1000); } while (dest_byte_a == dest_byte_b); - qmp("{ 'execute' : 'stop'}"); + qmp_discard_response("{ 'execute' : 'stop'}"); /* With it stopped, check nothing changes */ qtest_memread(to, start_address, &dest_byte_c, 1); sleep(1); diff --git a/tests/ptimer-test-stubs.c b/tests/ptimer-test-stubs.c index 21d4ebb..8a1b0a3 100644 --- a/tests/ptimer-test-stubs.c +++ b/tests/ptimer-test-stubs.c @@ -108,6 +108,11 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) return bh; } +void qemu_bh_delete(QEMUBH *bh) +{ + g_free(bh); +} + void replay_bh_schedule_event(QEMUBH *bh) { bh->cb(bh->opaque); diff --git a/tests/ptimer-test.c b/tests/ptimer-test.c index b36a476..5d1a2a8 100644 --- a/tests/ptimer-test.c +++ b/tests/ptimer-test.c @@ -73,6 +73,7 @@ static void check_set_count(gconstpointer arg) ptimer_set_count(ptimer, 1000); g_assert_cmpuint(ptimer_get_count(ptimer), ==, 1000); g_assert_false(triggered); + ptimer_free(ptimer); } static void check_set_limit(gconstpointer arg) @@ -92,6 +93,7 @@ static void check_set_limit(gconstpointer arg) g_assert_cmpuint(ptimer_get_count(ptimer), ==, 2000); g_assert_cmpuint(ptimer_get_limit(ptimer), ==, 2000); g_assert_false(triggered); + ptimer_free(ptimer); } static void check_oneshot(gconstpointer arg) @@ -194,6 +196,7 @@ static void check_oneshot(gconstpointer arg) g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0); g_assert_false(triggered); + ptimer_free(ptimer); } static void check_periodic(gconstpointer arg) @@ -360,6 +363,7 @@ static void check_periodic(gconstpointer arg) g_assert_cmpuint(ptimer_get_count(ptimer), ==, (no_round_down ? 8 : 7) + (wrap_policy ? 1 : 0)); g_assert_false(triggered); + ptimer_free(ptimer); } static void check_on_the_fly_mode_change(gconstpointer arg) @@ -406,6 +410,7 @@ static void check_on_the_fly_mode_change(gconstpointer arg) g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0); g_assert_true(triggered); + ptimer_free(ptimer); } static void check_on_the_fly_period_change(gconstpointer arg) @@ -438,6 +443,7 @@ static void check_on_the_fly_period_change(gconstpointer arg) g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0); g_assert_true(triggered); + ptimer_free(ptimer); } static void check_on_the_fly_freq_change(gconstpointer arg) @@ -470,6 +476,7 @@ static void check_on_the_fly_freq_change(gconstpointer arg) g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0); g_assert_true(triggered); + ptimer_free(ptimer); } static void check_run_with_period_0(gconstpointer arg) @@ -487,6 +494,7 @@ static void check_run_with_period_0(gconstpointer arg) g_assert_cmpuint(ptimer_get_count(ptimer), ==, 99); g_assert_false(triggered); + ptimer_free(ptimer); } static void check_run_with_delta_0(gconstpointer arg) @@ -591,6 +599,7 @@ static void check_run_with_delta_0(gconstpointer arg) g_assert_true(triggered); ptimer_stop(ptimer); + ptimer_free(ptimer); } static void check_periodic_with_load_0(gconstpointer arg) @@ -649,6 +658,7 @@ static void check_periodic_with_load_0(gconstpointer arg) } ptimer_stop(ptimer); + ptimer_free(ptimer); } static void check_oneshot_with_load_0(gconstpointer arg) @@ -682,14 +692,14 @@ static void check_oneshot_with_load_0(gconstpointer arg) } else { g_assert_false(triggered); } + + ptimer_free(ptimer); } static void add_ptimer_tests(uint8_t policy) { - uint8_t *ppolicy = g_malloc(1); - char *policy_name = g_malloc0(256); - - *ppolicy = policy; + char policy_name[256] = ""; + char *tmp; if (policy == PTIMER_POLICY_DEFAULT) { g_sprintf(policy_name, "default"); @@ -715,49 +725,67 @@ static void add_ptimer_tests(uint8_t policy) g_strlcat(policy_name, "no_counter_rounddown,", 256); } - g_test_add_data_func( - g_strdup_printf("/ptimer/set_count policy=%s", policy_name), - ppolicy, check_set_count); - - g_test_add_data_func( - g_strdup_printf("/ptimer/set_limit policy=%s", policy_name), - ppolicy, check_set_limit); - - g_test_add_data_func( - g_strdup_printf("/ptimer/oneshot policy=%s", policy_name), - ppolicy, check_oneshot); - - g_test_add_data_func( - g_strdup_printf("/ptimer/periodic policy=%s", policy_name), - ppolicy, check_periodic); - - g_test_add_data_func( - g_strdup_printf("/ptimer/on_the_fly_mode_change policy=%s", policy_name), - ppolicy, check_on_the_fly_mode_change); - - g_test_add_data_func( - g_strdup_printf("/ptimer/on_the_fly_period_change policy=%s", policy_name), - ppolicy, check_on_the_fly_period_change); - - g_test_add_data_func( - g_strdup_printf("/ptimer/on_the_fly_freq_change policy=%s", policy_name), - ppolicy, check_on_the_fly_freq_change); - - g_test_add_data_func( - g_strdup_printf("/ptimer/run_with_period_0 policy=%s", policy_name), - ppolicy, check_run_with_period_0); - - g_test_add_data_func( - g_strdup_printf("/ptimer/run_with_delta_0 policy=%s", policy_name), - ppolicy, check_run_with_delta_0); - - g_test_add_data_func( - g_strdup_printf("/ptimer/periodic_with_load_0 policy=%s", policy_name), - ppolicy, check_periodic_with_load_0); - - g_test_add_data_func( - g_strdup_printf("/ptimer/oneshot_with_load_0 policy=%s", policy_name), - ppolicy, check_oneshot_with_load_0); + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/set_count policy=%s", policy_name), + g_memdup(&policy, 1), check_set_count, g_free); + g_free(tmp); + + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/set_limit policy=%s", policy_name), + g_memdup(&policy, 1), check_set_limit, g_free); + g_free(tmp); + + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/oneshot policy=%s", policy_name), + g_memdup(&policy, 1), check_oneshot, g_free); + g_free(tmp); + + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/periodic policy=%s", policy_name), + g_memdup(&policy, 1), check_periodic, g_free); + g_free(tmp); + + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/on_the_fly_mode_change policy=%s", + policy_name), + g_memdup(&policy, 1), check_on_the_fly_mode_change, g_free); + g_free(tmp); + + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/on_the_fly_period_change policy=%s", + policy_name), + g_memdup(&policy, 1), check_on_the_fly_period_change, g_free); + g_free(tmp); + + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/on_the_fly_freq_change policy=%s", + policy_name), + g_memdup(&policy, 1), check_on_the_fly_freq_change, g_free); + g_free(tmp); + + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/run_with_period_0 policy=%s", + policy_name), + g_memdup(&policy, 1), check_run_with_period_0, g_free); + g_free(tmp); + + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/run_with_delta_0 policy=%s", + policy_name), + g_memdup(&policy, 1), check_run_with_delta_0, g_free); + g_free(tmp); + + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/periodic_with_load_0 policy=%s", + policy_name), + g_memdup(&policy, 1), check_periodic_with_load_0, g_free); + g_free(tmp); + + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/oneshot_with_load_0 policy=%s", + policy_name), + g_memdup(&policy, 1), check_oneshot_with_load_0, g_free); + g_free(tmp); } static void add_all_ptimer_policies_comb_tests(void) diff --git a/tests/pvpanic-test.c b/tests/pvpanic-test.c index 3bfa678..71ebb5c 100644 --- a/tests/pvpanic-test.c +++ b/tests/pvpanic-test.c @@ -27,6 +27,7 @@ static void test_panic(void) data = qdict_get_qdict(response, "data"); g_assert(qdict_haskey(data, "action")); g_assert_cmpstr(qdict_get_str(data, "action"), ==, "pause"); + QDECREF(response); } int main(int argc, char **argv) diff --git a/tests/q35-test.c b/tests/q35-test.c index 763fe3d..cc58f3e 100644 --- a/tests/q35-test.c +++ b/tests/q35-test.c @@ -71,6 +71,9 @@ static void test_smram_lock(void) g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == false); smram_set_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN, true); g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == true); + + g_free(pcidev); + qpci_free_pc(pcibus); } int main(int argc, char **argv) diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out index 4673b67..34e66db 100644 --- a/tests/qemu-iotests/049.out +++ b/tests/qemu-iotests/049.out @@ -95,14 +95,14 @@ qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024 qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a non-negative number below 2^64 +qemu-img: Value '-1024' is out of range for parameter 'size' qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a non-negative number below 2^64 +qemu-img: Value '-1k' is out of range for parameter 'size' qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte @@ -110,15 +110,19 @@ qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +qemu-img: Parameter 'size' expects a non-negative number below 2^64 +Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta- +and exabytes, respectively. +qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a size -You may use k, M, G or T suffixes for kilobytes, megabytes, gigabytes and terabytes. +qemu-img: Parameter 'size' expects a non-negative number below 2^64 +Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta- +and exabytes, respectively. qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' == Check correct interpretation of suffixes for cluster size == diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out index e206ad6..c6f4eef 100644 --- a/tests/qemu-iotests/051.pc.out +++ b/tests/qemu-iotests/051.pc.out @@ -179,7 +179,7 @@ q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K Testing: -drive file=TEST_DIR/t.qcow2,if=ide,readonly=on QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: Can't use a read-only drive +(qemu) QEMU_PROG: Block node is read-only QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=scsi,readonly=on @@ -201,12 +201,12 @@ QEMU X.Y.Z monitor - type 'help' for more information Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-drive,drive=disk QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: -device ide-drive,drive=disk: Can't use a read-only drive +(qemu) QEMU_PROG: -device ide-drive,drive=disk: Block node is read-only QEMU_PROG: -device ide-drive,drive=disk: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-hd,drive=disk QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: -device ide-hd,drive=disk: Can't use a read-only drive +(qemu) QEMU_PROG: -device ide-hd,drive=disk: Block node is read-only QEMU_PROG: -device ide-hd,drive=disk: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 index 1d3fd04..aafcd24 100755 --- a/tests/qemu-iotests/055 +++ b/tests/qemu-iotests/055 @@ -48,7 +48,8 @@ class TestSingleDrive(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") if iotests.qemu_default_machine == 'pc': self.vm.add_drive(None, 'media=cdrom', 'ide') self.vm.launch() @@ -164,7 +165,8 @@ class TestSetSpeed(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") self.vm.launch() def tearDown(self): @@ -247,7 +249,8 @@ class TestSingleTransaction(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") if iotests.qemu_default_machine == 'pc': self.vm.add_drive(None, 'media=cdrom', 'ide') self.vm.launch() @@ -460,7 +463,7 @@ class TestDriveCompression(iotests.QMPTestCase): qemu_img('create', '-f', fmt, blockdev_target_img, str(TestDriveCompression.image_len), *args) - self.vm.add_drive(blockdev_target_img, format=fmt) + self.vm.add_drive(blockdev_target_img, format=fmt, interface="none") self.vm.launch() diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out index 08e4bb7..182acb4 100644 --- a/tests/qemu-iotests/085.out +++ b/tests/qemu-iotests/085.out @@ -74,7 +74,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ === Invalid command - snapshot node used as backing hd === -{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'virtio0'"}} +{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}} === Invalid command - snapshot node has a backing image === diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141 index 3ba79f0..6d8f0a1 100755 --- a/tests/qemu-iotests/141 +++ b/tests/qemu-iotests/141 @@ -67,7 +67,7 @@ test_blockjob() _send_qemu_cmd $QEMU_HANDLE \ "{'execute': 'x-blockdev-del', 'arguments': {'node-name': 'drv0'}}" \ - 'error' + 'error' | _filter_generated_node_ids _send_qemu_cmd $QEMU_HANDLE \ "{'execute': 'block-job-cancel', diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out index 195ca1a..82e763b 100644 --- a/tests/qemu-iotests/141.out +++ b/tests/qemu-iotests/141.out @@ -20,7 +20,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t. Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} {"return": {}} -{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}} +{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} {"return": {}} @@ -30,7 +30,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t. {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"return": {}} -{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}} +{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"return": {}} diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out index 6b7edaf..54b5329 100644 --- a/tests/qemu-iotests/172.out +++ b/tests/qemu-iotests/172.out @@ -28,6 +28,7 @@ Testing: opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -57,6 +58,7 @@ Testing: -fda TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 @@ -83,6 +85,7 @@ Testing: -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -93,6 +96,7 @@ Testing: -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 @@ -119,6 +123,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -129,6 +134,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -158,6 +164,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 @@ -184,6 +191,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -194,6 +202,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 @@ -220,6 +229,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -230,6 +240,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -259,6 +270,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 @@ -285,6 +297,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1 @@ -311,6 +324,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -321,6 +335,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -350,6 +365,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -376,6 +392,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1 @@ -402,6 +419,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -412,6 +430,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -441,6 +460,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -451,6 +471,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 @@ -477,6 +498,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -487,6 +509,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 @@ -513,6 +536,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 @@ -539,6 +563,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -568,6 +593,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -578,6 +604,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -604,6 +631,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -614,6 +642,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 @@ -640,6 +669,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -650,6 +680,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -676,6 +707,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -686,6 +718,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -723,6 +756,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -733,6 +767,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -759,6 +794,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -769,6 +805,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -802,6 +839,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -812,6 +850,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1 @@ -838,6 +877,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -848,6 +888,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1 @@ -874,6 +915,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -884,6 +926,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0 @@ -910,6 +953,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -920,6 +964,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0 @@ -964,6 +1009,7 @@ Testing: -device floppy opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -device floppy,drive-type=120 @@ -990,6 +1036,7 @@ Testing: -device floppy,drive-type=120 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "120" Testing: -device floppy,drive-type=144 @@ -1016,6 +1063,7 @@ Testing: -device floppy,drive-type=144 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -device floppy,drive-type=288 @@ -1042,6 +1090,7 @@ Testing: -device floppy,drive-type=288 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -1071,6 +1120,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "120" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-type=288 @@ -1097,6 +1147,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -1126,6 +1177,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=512 @@ -1152,6 +1204,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=4096 diff --git a/tests/tco-test.c b/tests/tco-test.c index ef02ec5..c4c264e 100644 --- a/tests/tco-test.c +++ b/tests/tco-test.c @@ -42,11 +42,18 @@ typedef struct { bool noreboot; QPCIDevice *dev; QPCIBar tco_io_bar; + QPCIBus *bus; } TestData; +static void test_end(TestData *d) +{ + g_free(d->dev); + qpci_free_pc(d->bus); + qtest_end(); +} + static void test_init(TestData *d) { - QPCIBus *bus; QTestState *qs; char *s; @@ -57,8 +64,8 @@ static void test_init(TestData *d) qtest_irq_intercept_in(qs, "ioapic"); g_free(s); - bus = qpci_init_pc(NULL); - d->dev = qpci_device_find(bus, QPCI_DEVFN(0x1f, 0x00)); + d->bus = qpci_init_pc(NULL); + d->dev = qpci_device_find(d->bus, QPCI_DEVFN(0x1f, 0x00)); g_assert(d->dev != NULL); qpci_device_enable(d->dev); @@ -148,7 +155,7 @@ static void test_tco_defaults(void) SW_IRQ_GEN_DEFAULT); g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO_TMR), ==, TCO_TMR_DEFAULT); - qtest_end(); + test_end(&d); } static void test_tco_timeout(void) @@ -192,7 +199,7 @@ static void test_tco_timeout(void) g_assert(ret == 1); stop_tco(&d); - qtest_end(); + test_end(&d); } static void test_tco_max_timeout(void) @@ -225,7 +232,7 @@ static void test_tco_max_timeout(void) g_assert(ret == 1); stop_tco(&d); - qtest_end(); + test_end(&d); } static QDict *get_watchdog_action(void) @@ -262,7 +269,7 @@ static void test_tco_second_timeout_pause(void) QDECREF(ad); stop_tco(&td); - qtest_end(); + test_end(&td); } static void test_tco_second_timeout_reset(void) @@ -287,7 +294,7 @@ static void test_tco_second_timeout_reset(void) QDECREF(ad); stop_tco(&td); - qtest_end(); + test_end(&td); } static void test_tco_second_timeout_shutdown(void) @@ -312,7 +319,7 @@ static void test_tco_second_timeout_shutdown(void) QDECREF(ad); stop_tco(&td); - qtest_end(); + test_end(&td); } static void test_tco_second_timeout_none(void) @@ -337,7 +344,7 @@ static void test_tco_second_timeout_none(void) QDECREF(ad); stop_tco(&td); - qtest_end(); + test_end(&td); } static void test_tco_ticks_counter(void) @@ -365,7 +372,7 @@ static void test_tco_ticks_counter(void) } while (!(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS) & TCO_TIMEOUT)); stop_tco(&d); - qtest_end(); + test_end(&d); } static void test_tco1_control_bits(void) @@ -383,7 +390,7 @@ static void test_tco1_control_bits(void) qpci_io_writew(d.dev, d.tco_io_bar, TCO1_CNT, val); g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_CNT), ==, TCO_LOCK); - qtest_end(); + test_end(&d); } static void test_tco1_status_bits(void) @@ -412,7 +419,7 @@ static void test_tco1_status_bits(void) g_assert(ret == 1); qpci_io_writew(d.dev, d.tco_io_bar, TCO1_STS, val); g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS), ==, 0); - qtest_end(); + test_end(&d); } static void test_tco2_status_bits(void) @@ -439,7 +446,7 @@ static void test_tco2_status_bits(void) g_assert(ret == 1); qpci_io_writew(d.dev, d.tco_io_bar, TCO2_STS, val); g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO2_STS), ==, 0); - qtest_end(); + test_end(&d); } int main(int argc, char **argv) diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c index f6dfd08..4ccbda1 100644 --- a/tests/test-blockjob-txn.c +++ b/tests/test-blockjob-txn.c @@ -101,9 +101,9 @@ static BlockJob *test_block_job_start(unsigned int iterations, g_assert_nonnull(bs); snprintf(job_id, sizeof(job_id), "job%u", counter++); - s = block_job_create(job_id, &test_block_job_driver, bs, 0, - BLOCK_JOB_DEFAULT, test_block_job_cb, - data, &error_abort); + s = block_job_create(job_id, &test_block_job_driver, bs, + 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, + test_block_job_cb, data, &error_abort); s->iterations = iterations; s->use_timer = use_timer; s->rc = rc; diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 068c9e4..740e740 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -30,8 +30,9 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id, BlockJob *job; Error *errp = NULL; - job = block_job_create(id, &test_block_job_driver, blk_bs(blk), 0, - BLOCK_JOB_DEFAULT, block_job_cb, NULL, &errp); + job = block_job_create(id, &test_block_job_driver, blk_bs(blk), + 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, block_job_cb, + NULL, &errp); if (should_succeed) { g_assert_null(errp); g_assert_nonnull(job); @@ -53,13 +54,14 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id, * BlockDriverState inserted. */ static BlockBackend *create_blk(const char *name) { - BlockBackend *blk = blk_new(); + /* No I/O is performed on this device */ + BlockBackend *blk = blk_new(0, BLK_PERM_ALL); BlockDriverState *bs; bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort); g_assert_nonnull(bs); - blk_insert_bs(blk, bs); + blk_insert_bs(blk, bs, &error_abort); bdrv_unref(bs); if (name) { diff --git a/tests/test-filter-mirror.c b/tests/test-filter-mirror.c index ffaaffa..9f84402 100644 --- a/tests/test-filter-mirror.c +++ b/tests/test-filter-mirror.c @@ -57,7 +57,7 @@ static void test_mirror(void) }; /* send a qmp command to guarantee that 'connected' is setting to true. */ - qmp("{ 'execute' : 'query-status'}"); + qmp_discard_response("{ 'execute' : 'query-status'}"); ret = iov_send(send_sock[0], iov, 2, 0, sizeof(size) + sizeof(send_buf)); g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size)); close(send_sock[0]); diff --git a/tests/test-filter-redirector.c b/tests/test-filter-redirector.c index c63b68f..0c4b8d5 100644 --- a/tests/test-filter-redirector.c +++ b/tests/test-filter-redirector.c @@ -99,7 +99,7 @@ static void test_redirector_tx(void) g_assert_cmpint(recv_sock, !=, -1); /* send a qmp command to guarantee that 'connected' is setting to true. */ - qmp("{ 'execute' : 'query-status'}"); + qmp_discard_response("{ 'execute' : 'query-status'}"); struct iovec iov[] = { { @@ -184,7 +184,7 @@ static void test_redirector_rx(void) send_sock = unix_connect(sock_path1, NULL); g_assert_cmpint(send_sock, !=, -1); /* send a qmp command to guarantee that 'connected' is setting to true. */ - qmp("{ 'execute' : 'query-status'}"); + qmp_discard_response("{ 'execute' : 'query-status'}"); ret = iov_send(send_sock, iov, 2, 0, sizeof(size) + sizeof(send_buf)); g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size)); diff --git a/tests/test-io-channel-command.c b/tests/test-io-channel-command.c index 1d1f461..46ce1ff 100644 --- a/tests/test-io-channel-command.c +++ b/tests/test-io-channel-command.c @@ -29,8 +29,8 @@ static void test_io_channel_command_fifo(bool async) #define TEST_FIFO "tests/test-io-channel-command.fifo" QIOChannel *src, *dst; QIOChannelTest *test; - char *srcfifo = g_strdup_printf("PIPE:%s,wronly", TEST_FIFO); - char *dstfifo = g_strdup_printf("PIPE:%s,rdonly", TEST_FIFO); + const char *srcfifo = "PIPE:" TEST_FIFO ",wronly"; + const char *dstfifo = "PIPE:" TEST_FIFO ",rdonly"; const char *srcargv[] = { "/bin/socat", "-", srcfifo, NULL, }; @@ -59,8 +59,6 @@ static void test_io_channel_command_fifo(bool async) object_unref(OBJECT(src)); object_unref(OBJECT(dst)); - g_free(srcfifo); - g_free(dstfifo); unlink(TEST_FIFO); } diff --git a/tests/test-throttle.c b/tests/test-throttle.c index 363b59a..bd7c501 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -593,9 +593,10 @@ static void test_groups(void) BlockBackend *blk1, *blk2, *blk3; BlockBackendPublic *blkp1, *blkp2, *blkp3; - blk1 = blk_new(); - blk2 = blk_new(); - blk3 = blk_new(); + /* No actual I/O is performed on these devices */ + blk1 = blk_new(0, BLK_PERM_ALL); + blk2 = blk_new(0, BLK_PERM_ALL); + blk3 = blk_new(0, BLK_PERM_ALL); blkp1 = blk_get_public(blk1); blkp2 = blk_get_public(blk2); diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c index 39f338a..f694a89 100644 --- a/tests/test-vmstate.c +++ b/tests/test-vmstate.c @@ -476,6 +476,8 @@ const VMStateDescription vmsd_tst = { } }; +/* test array migration */ + #define AR_SIZE 4 typedef struct { @@ -492,20 +494,22 @@ const VMStateDescription vmsd_arps = { VMSTATE_END_OF_LIST() } }; + +static uint8_t wire_arr_ptr_no0[] = { + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x02, + 0x00, 0x00, 0x00, 0x03, + QEMU_VM_EOF +}; + static void test_arr_ptr_str_no0_save(void) { TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} }; TestArrayOfPtrToStuct sample = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} }; - uint8_t wire_sample[] = { - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x02, - 0x00, 0x00, 0x00, 0x03, - QEMU_VM_EOF - }; save_vmstate(&vmsd_arps, &sample); - compare_vmstate(wire_sample, sizeof(wire_sample)); + compare_vmstate(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0)); } static void test_arr_ptr_str_no0_load(void) @@ -514,21 +518,98 @@ static void test_arr_ptr_str_no0_load(void) TestStructTriv ar[AR_SIZE] = {}; TestArrayOfPtrToStuct obj = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} }; int idx; - uint8_t wire_sample[] = { - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x02, - 0x00, 0x00, 0x00, 0x03, - QEMU_VM_EOF - }; - save_buffer(wire_sample, sizeof(wire_sample)); + save_buffer(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0)); + SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1, + wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0))); + for (idx = 0; idx < AR_SIZE; ++idx) { + /* compare the target array ar with the ground truth array ar_gt */ + g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i); + } +} + +static uint8_t wire_arr_ptr_0[] = { + 0x00, 0x00, 0x00, 0x00, + VMS_NULLPTR_MARKER, + 0x00, 0x00, 0x00, 0x02, + 0x00, 0x00, 0x00, 0x03, + QEMU_VM_EOF +}; + +static void test_arr_ptr_str_0_save(void) +{ + TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} }; + TestArrayOfPtrToStuct sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} }; + + save_vmstate(&vmsd_arps, &sample); + compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0)); +} + +static void test_arr_ptr_str_0_load(void) +{ + TestStructTriv ar_gt[AR_SIZE] = {{.i = 0}, {.i = 0}, {.i = 2}, {.i = 3} }; + TestStructTriv ar[AR_SIZE] = {}; + TestArrayOfPtrToStuct obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} }; + int idx; + + save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0)); SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1, - wire_sample, sizeof(wire_sample))); + wire_arr_ptr_0, sizeof(wire_arr_ptr_0))); for (idx = 0; idx < AR_SIZE; ++idx) { /* compare the target array ar with the ground truth array ar_gt */ g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i); } + for (idx = 0; idx < AR_SIZE; ++idx) { + if (idx == 1) { + g_assert_cmpint((uintptr_t)(obj.ar[idx]), ==, 0); + } else { + g_assert_cmpint((uintptr_t)(obj.ar[idx]), !=, 0); + } + } +} + +typedef struct TestArrayOfPtrToInt { + int32_t *ar[AR_SIZE]; +} TestArrayOfPtrToInt; + +const VMStateDescription vmsd_arpp = { + .name = "test/arps", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_ARRAY_OF_POINTER(ar, TestArrayOfPtrToInt, + AR_SIZE, 0, vmstate_info_int32, int32_t*), + VMSTATE_END_OF_LIST() + } +}; + +static void test_arr_ptr_prim_0_save(void) +{ + int32_t ar[AR_SIZE] = {0 , 1, 2, 3}; + TestArrayOfPtrToInt sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} }; + + save_vmstate(&vmsd_arpp, &sample); + compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0)); +} + +static void test_arr_ptr_prim_0_load(void) +{ + int32_t ar_gt[AR_SIZE] = {0, 1, 2, 3}; + int32_t ar[AR_SIZE] = {3 , 42, 1, 0}; + TestArrayOfPtrToInt obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} }; + int idx; + + save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0)); + SUCCESS(load_vmstate_one(&vmsd_arpp, &obj, 1, + wire_arr_ptr_0, sizeof(wire_arr_ptr_0))); + for (idx = 0; idx < AR_SIZE; ++idx) { + /* compare the target array ar with the ground truth array ar_gt */ + if (idx == 1) { + g_assert_cmpint(42, ==, ar[idx]); + } else { + g_assert_cmpint(ar_gt[idx], ==, ar[idx]); + } + } } /* test QTAILQ migration */ @@ -781,6 +862,13 @@ int main(int argc, char **argv) test_arr_ptr_str_no0_save); g_test_add_func("/vmstate/array/ptr/str/no0/load", test_arr_ptr_str_no0_load); + g_test_add_func("/vmstate/array/ptr/str/0/save", test_arr_ptr_str_0_save); + g_test_add_func("/vmstate/array/ptr/str/0/load", + test_arr_ptr_str_0_load); + g_test_add_func("/vmstate/array/ptr/prim/0/save", + test_arr_ptr_prim_0_save); + g_test_add_func("/vmstate/array/ptr/prim/0/load", + test_arr_ptr_prim_0_load); g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q); g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q); g_test_add_func("/vmstate/tmp_struct", test_tmp_struct); diff --git a/tests/usb-hcd-ehci-test.c b/tests/usb-hcd-ehci-test.c index 57af8a0..944eb1c 100644 --- a/tests/usb-hcd-ehci-test.c +++ b/tests/usb-hcd-ehci-test.c @@ -50,11 +50,8 @@ static void ehci_port_test(struct qhc *hc, int port, uint32_t expect) /* tests */ -static void pci_init(void) +static void test_init(void) { - if (pcibus) { - return; - } pcibus = qpci_init_pc(NULL); g_assert(pcibus != NULL); @@ -64,6 +61,15 @@ static void pci_init(void) qusb_pci_init_one(pcibus, &ehci1, QPCI_DEVFN(0x1d, 7), 0); } +static void test_deinit(void) +{ + uhci_deinit(&uhci1); + uhci_deinit(&uhci2); + uhci_deinit(&uhci3); + uhci_deinit(&ehci1); + qpci_free_pc(pcibus); +} + static void pci_uhci_port_1(void) { g_assert(pcibus != NULL); @@ -142,7 +148,7 @@ int main(int argc, char **argv) int ret; g_test_init(&argc, &argv, NULL); - qtest_add_func("/ehci/pci/init", pci_init); + qtest_add_func("/ehci/pci/uhci-port-1", pci_uhci_port_1); qtest_add_func("/ehci/pci/ehci-port-1", pci_ehci_port_1); qtest_add_func("/ehci/pci/ehci-config", pci_ehci_config); @@ -161,7 +167,10 @@ int main(int argc, char **argv) "-drive if=none,id=usbcdrom,media=cdrom " "-device usb-tablet,bus=ich9-ehci-1.0,port=1,usb_version=1 " "-device usb-storage,bus=ich9-ehci-1.0,port=2,drive=usbcdrom "); + + test_init(); ret = g_test_run(); + test_deinit(); qtest_end(); diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c index e956b9c..f25bae5 100644 --- a/tests/usb-hcd-uhci-test.c +++ b/tests/usb-hcd-uhci-test.c @@ -28,6 +28,7 @@ static void test_port(int port) g_assert(port > 0); qusb_pci_init_one(qs->pcibus, &uhci, QPCI_DEVFN(0x1d, 0), 4); uhci_port_test(&uhci, port - 1, UHCI_PORT_CCS); + uhci_deinit(&uhci); } static void test_port_1(void) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 2c45c7b..a61896c 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -139,6 +139,7 @@ enum { }; typedef struct TestServer { + QPCIBus *bus; gchar *socket_path; gchar *mig_path; gchar *chr_name; @@ -160,14 +161,13 @@ static const char *root; static void init_virtio_dev(TestServer *s) { - QPCIBus *bus; QVirtioPCIDevice *dev; uint32_t features; - bus = qpci_init_pc(NULL); - g_assert_nonnull(bus); + s->bus = qpci_init_pc(NULL); + g_assert_nonnull(s->bus); - dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET); + dev = qvirtio_pci_device_find(s->bus, VIRTIO_ID_NET); g_assert_nonnull(dev); qvirtio_pci_device_enable(dev); @@ -180,6 +180,7 @@ static void init_virtio_dev(TestServer *s) qvirtio_set_features(&dev->vdev, features); qvirtio_set_driver_ok(&dev->vdev); + qvirtio_pci_device_free(dev); } static void wait_for_fds(TestServer *s) @@ -507,6 +508,8 @@ static gboolean _test_server_free(TestServer *server) g_free(server->mig_path); g_free(server->chr_name); + qpci_free_pc(server->bus); + g_free(server); return FALSE; diff --git a/tests/virtio-9p-test.c b/tests/virtio-9p-test.c index 9556291..43a1ad8 100644 --- a/tests/virtio-9p-test.c +++ b/tests/virtio-9p-test.c @@ -80,7 +80,7 @@ static void qvirtio_9p_pci_stop(QVirtIO9P *v9p) { qvirtqueue_cleanup(v9p->dev->bus, v9p->vq, v9p->qs->alloc); qvirtio_pci_device_disable(container_of(v9p->dev, QVirtioPCIDevice, vdev)); - g_free(v9p->dev); + qvirtio_pci_device_free((QVirtioPCIDevice *)v9p->dev); qvirtio_9p_stop(v9p); } diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 0e32e41..1eee95d 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -108,7 +108,7 @@ static QVirtioPCIDevice *virtio_blk_pci_init(QPCIBus *bus, int slot) { QVirtioPCIDevice *dev; - dev = qvirtio_pci_device_find(bus, VIRTIO_ID_BLOCK); + dev = qvirtio_pci_device_find_slot(bus, VIRTIO_ID_BLOCK, slot); g_assert(dev != NULL); g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_BLOCK); g_assert_cmphex(dev->pdev->devfn, ==, ((slot << 3) | PCI_FN)); @@ -296,7 +296,7 @@ static void pci_basic(void) /* End test */ qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc); qvirtio_pci_device_disable(dev); - g_free(dev); + qvirtio_pci_device_free(dev); qtest_shutdown(qs); } @@ -389,7 +389,7 @@ static void pci_indirect(void) /* End test */ qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc); qvirtio_pci_device_disable(dev); - g_free(dev); + qvirtio_pci_device_free(dev); qtest_shutdown(qs); } @@ -409,15 +409,16 @@ static void pci_config(void) qvirtio_set_driver_ok(&dev->vdev); - qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', " - " 'size': %d } }", n_size); + qmp_discard_response("{ 'execute': 'block_resize', " + " 'arguments': { 'device': 'drive0', " + " 'size': %d } }", n_size); qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US); capacity = qvirtio_config_readq(&dev->vdev, 0); g_assert_cmpint(capacity, ==, n_size / 512); qvirtio_pci_device_disable(dev); - g_free(dev); + qvirtio_pci_device_free(dev); qtest_shutdown(qs); } @@ -458,8 +459,9 @@ static void pci_msix(void) qvirtio_set_driver_ok(&dev->vdev); - qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', " - " 'size': %d } }", n_size); + qmp_discard_response("{ 'execute': 'block_resize', " + " 'arguments': { 'device': 'drive0', " + " 'size': %d } }", n_size); qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US); @@ -524,7 +526,7 @@ static void pci_msix(void) qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc); qpci_msix_disable(dev->pdev); qvirtio_pci_device_disable(dev); - g_free(dev); + qvirtio_pci_device_free(dev); qtest_shutdown(qs); } @@ -640,7 +642,7 @@ static void pci_idx(void) qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc); qpci_msix_disable(dev->pdev); qvirtio_pci_device_disable(dev); - g_free(dev); + qvirtio_pci_device_free(dev); qtest_shutdown(qs); } @@ -659,7 +661,7 @@ static void pci_hotplug(void) dev = virtio_blk_pci_init(qs->pcibus, PCI_SLOT_HP); g_assert(dev); qvirtio_pci_device_disable(dev); - g_free(dev); + qvirtio_pci_device_free(dev); /* unplug secondary disk */ if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { @@ -691,8 +693,9 @@ static void mmio_basic(void) test_basic(&dev->vdev, alloc, vq); - qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', " - " 'size': %d } }", n_size); + qmp_discard_response("{ 'execute': 'block_resize', " + " 'arguments': { 'device': 'drive0', " + " 'size': %d } }", n_size); qvirtio_wait_queue_isr(&dev->vdev, vq, QVIRTIO_BLK_TIMEOUT_US); diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c index 69220ef..0eabd56 100644 --- a/tests/virtio-scsi-test.c +++ b/tests/virtio-scsi-test.c @@ -63,7 +63,7 @@ static void qvirtio_scsi_pci_free(QVirtIOSCSI *vs) qvirtqueue_cleanup(vs->dev->bus, vs->vq[i], vs->qs->alloc); } qvirtio_pci_device_disable(container_of(vs->dev, QVirtioPCIDevice, vdev)); - g_free(vs->dev); + qvirtio_pci_device_free((QVirtioPCIDevice *)vs->dev); qvirtio_scsi_stop(vs->qs); g_free(vs); } diff --git a/util/qemu-option.c b/util/qemu-option.c index 419f252..5ce1b5c 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -179,7 +179,7 @@ void parse_option_size(const char *name, const char *value, err = qemu_strtosz(value, NULL, &size); if (err == -ERANGE) { - error_setg(errp, "Value '%s' is too large for parameter '%s'", + error_setg(errp, "Value '%s' is out of range for parameter '%s'", value, name); return; } diff --git a/util/qemu-timer.c b/util/qemu-timer.c index ff620ec..6cf70b9 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -355,11 +355,6 @@ void timer_deinit(QEMUTimer *ts) ts->timer_list = NULL; } -void timer_free(QEMUTimer *ts) -{ - g_free(ts); -} - static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) { QEMUTimer **pt, *t; |