diff options
90 files changed, 4749 insertions, 982 deletions
diff --git a/COPYING.PYTHON b/COPYING.PYTHON deleted file mode 100644 index 4d3f1ef..0000000 --- a/COPYING.PYTHON +++ /dev/null @@ -1,270 +0,0 @@ -A. HISTORY OF THE SOFTWARE -========================== - -Python was created in the early 1990s by Guido van Rossum at Stichting -Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands -as a successor of a language called ABC. Guido remains Python's -principal author, although it includes many contributions from others. - -In 1995, Guido continued his work on Python at the Corporation for -National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) -in Reston, Virginia where he released several versions of the -software. - -In May 2000, Guido and the Python core development team moved to -BeOpen.com to form the BeOpen PythonLabs team. In October of the same -year, the PythonLabs team moved to Digital Creations (now Zope -Corporation, see http://www.zope.com). In 2001, the Python Software -Foundation (PSF, see http://www.python.org/psf/) was formed, a -non-profit organization created specifically to own Python-related -Intellectual Property. Zope Corporation is a sponsoring member of -the PSF. - -All Python releases are Open Source (see http://www.opensource.org for -the Open Source Definition). Historically, most, but not all, Python -releases have also been GPL-compatible; the table below summarizes -the various releases. - - Release Derived Year Owner GPL- - from compatible? (1) - - 0.9.0 thru 1.2 1991-1995 CWI yes - 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes - 1.6 1.5.2 2000 CNRI no - 2.0 1.6 2000 BeOpen.com no - 1.6.1 1.6 2001 CNRI yes (2) - 2.1 2.0+1.6.1 2001 PSF no - 2.0.1 2.0+1.6.1 2001 PSF yes - 2.1.1 2.1+2.0.1 2001 PSF yes - 2.2 2.1.1 2001 PSF yes - 2.1.2 2.1.1 2002 PSF yes - 2.1.3 2.1.2 2002 PSF yes - 2.2.1 2.2 2002 PSF yes - 2.2.2 2.2.1 2002 PSF yes - 2.2.3 2.2.2 2003 PSF yes - 2.3 2.2.2 2002-2003 PSF yes - 2.3.1 2.3 2002-2003 PSF yes - 2.3.2 2.3.1 2002-2003 PSF yes - 2.3.3 2.3.2 2002-2003 PSF yes - 2.3.4 2.3.3 2004 PSF yes - 2.3.5 2.3.4 2005 PSF yes - 2.4 2.3 2004 PSF yes - 2.4.1 2.4 2005 PSF yes - 2.4.2 2.4.1 2005 PSF yes - 2.4.3 2.4.2 2006 PSF yes - 2.5 2.4 2006 PSF yes - 2.7 2.6 2010 PSF yes - -Footnotes: - -(1) GPL-compatible doesn't mean that we're distributing Python under - the GPL. All Python licenses, unlike the GPL, let you distribute - a modified version without making your changes open source. The - GPL-compatible licenses make it possible to combine Python with - other software that is released under the GPL; the others don't. - -(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, - because its license has a choice of law clause. According to - CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 - is "not incompatible" with the GPL. - -Thanks to the many outside volunteers who have worked under Guido's -direction to make these releases possible. - - -B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON -=============================================================== - -PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 --------------------------------------------- - -1. This LICENSE AGREEMENT is between the Python Software Foundation -("PSF"), and the Individual or Organization ("Licensee") accessing and -otherwise using this software ("Python") in source or binary form and -its associated documentation. - -2. Subject to the terms and conditions of this License Agreement, PSF -hereby grants Licensee a nonexclusive, royalty-free, world-wide -license to reproduce, analyze, test, perform and/or display publicly, -prepare derivative works, distribute, and otherwise use Python -alone or in any derivative version, provided, however, that PSF's -License Agreement and PSF's notice of copyright, i.e., "Copyright (c) -2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights -Reserved" are retained in Python alone or in any derivative version -prepared by Licensee. - -3. In the event Licensee prepares a derivative work that is based on -or incorporates Python or any part thereof, and wants to make -the derivative work available to others as provided herein, then -Licensee hereby agrees to include in any such work a brief summary of -the changes made to Python. - -4. PSF is making Python available to Licensee on an "AS IS" -basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT -INFRINGE ANY THIRD PARTY RIGHTS. - -5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON -FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS -A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, -OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. - -6. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -7. Nothing in this License Agreement shall be deemed to create any -relationship of agency, partnership, or joint venture between PSF and -Licensee. This License Agreement does not grant permission to use PSF -trademarks or trade name in a trademark sense to endorse or promote -products or services of Licensee, or any third party. - -8. By copying, installing or otherwise using Python, Licensee -agrees to be bound by the terms and conditions of this License -Agreement. - - -BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 -------------------------------------------- - -BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 - -1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an -office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the -Individual or Organization ("Licensee") accessing and otherwise using -this software in source or binary form and its associated -documentation ("the Software"). - -2. Subject to the terms and conditions of this BeOpen Python License -Agreement, BeOpen hereby grants Licensee a non-exclusive, -royalty-free, world-wide license to reproduce, analyze, test, perform -and/or display publicly, prepare derivative works, distribute, and -otherwise use the Software alone or in any derivative version, -provided, however, that the BeOpen Python License is retained in the -Software, alone or in any derivative version prepared by Licensee. - -3. BeOpen is making the Software available to Licensee on an "AS IS" -basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT -INFRINGE ANY THIRD PARTY RIGHTS. - -4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE -SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS -AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY -DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. - -5. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -6. This License Agreement shall be governed by and interpreted in all -respects by the law of the State of California, excluding conflict of -law provisions. Nothing in this License Agreement shall be deemed to -create any relationship of agency, partnership, or joint venture -between BeOpen and Licensee. This License Agreement does not grant -permission to use BeOpen trademarks or trade names in a trademark -sense to endorse or promote products or services of Licensee, or any -third party. As an exception, the "BeOpen Python" logos available at -http://www.pythonlabs.com/logos.html may be used according to the -permissions granted on that web page. - -7. By copying, installing or otherwise using the software, Licensee -agrees to be bound by the terms and conditions of this License -Agreement. - - -CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 ---------------------------------------- - -1. This LICENSE AGREEMENT is between the Corporation for National -Research Initiatives, having an office at 1895 Preston White Drive, -Reston, VA 20191 ("CNRI"), and the Individual or Organization -("Licensee") accessing and otherwise using Python 1.6.1 software in -source or binary form and its associated documentation. - -2. Subject to the terms and conditions of this License Agreement, CNRI -hereby grants Licensee a nonexclusive, royalty-free, world-wide -license to reproduce, analyze, test, perform and/or display publicly, -prepare derivative works, distribute, and otherwise use Python 1.6.1 -alone or in any derivative version, provided, however, that CNRI's -License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) -1995-2001 Corporation for National Research Initiatives; All Rights -Reserved" are retained in Python 1.6.1 alone or in any derivative -version prepared by Licensee. Alternately, in lieu of CNRI's License -Agreement, Licensee may substitute the following text (omitting the -quotes): "Python 1.6.1 is made available subject to the terms and -conditions in CNRI's License Agreement. This Agreement together with -Python 1.6.1 may be located on the Internet using the following -unique, persistent identifier (known as a handle): 1895.22/1013. This -Agreement may also be obtained from a proxy server on the Internet -using the following URL: http://hdl.handle.net/1895.22/1013". - -3. In the event Licensee prepares a derivative work that is based on -or incorporates Python 1.6.1 or any part thereof, and wants to make -the derivative work available to others as provided herein, then -Licensee hereby agrees to include in any such work a brief summary of -the changes made to Python 1.6.1. - -4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" -basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT -INFRINGE ANY THIRD PARTY RIGHTS. - -5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON -1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS -A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, -OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. - -6. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -7. This License Agreement shall be governed by the federal -intellectual property law of the United States, including without -limitation the federal copyright law, and, to the extent such -U.S. federal law does not apply, by the law of the Commonwealth of -Virginia, excluding Virginia's conflict of law provisions. -Notwithstanding the foregoing, with regard to derivative works based -on Python 1.6.1 that incorporate non-separable material that was -previously distributed under the GNU General Public License (GPL), the -law of the Commonwealth of Virginia shall govern this License -Agreement only as to issues arising under or with respect to -Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this -License Agreement shall be deemed to create any relationship of -agency, partnership, or joint venture between CNRI and Licensee. This -License Agreement does not grant permission to use CNRI trademarks or -trade name in a trademark sense to endorse or promote products or -services of Licensee, or any third party. - -8. By clicking on the "ACCEPT" button where indicated, or by copying, -installing or otherwise using Python 1.6.1, Licensee agrees to be -bound by the terms and conditions of this License Agreement. - - ACCEPT - - -CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 --------------------------------------------------- - -Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, -The Netherlands. All rights reserved. - -Permission to use, copy, modify, and distribute this software and its -documentation for any purpose and without fee is hereby granted, -provided that the above copyright notice appear in all copies and that -both that copyright notice and this permission notice appear in -supporting documentation, and that the name of Stichting Mathematisch -Centrum or CWI not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO -THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE -FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT -OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/MAINTAINERS b/MAINTAINERS index 0fb5f38..da91501 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1331,6 +1331,12 @@ F: hw/display/bochs-display.c F: include/hw/display/vga.h F: include/hw/display/bochs-vbe.h +ramfb +M: Gerd Hoffmann <kraxel@redhat.com> +S: Maintained +F: hw/display/ramfb*.c +F: include/hw/display/ramfb.h + virtio-gpu M: Gerd Hoffmann <kraxel@redhat.com> S: Maintained @@ -333,6 +333,10 @@ BlockDriverState *bdrv_new(void) qemu_co_queue_init(&bs->flush_queue); + for (i = 0; i < bdrv_drain_all_count; i++) { + bdrv_drained_begin(bs); + } + QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); return bs; @@ -818,7 +822,13 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) static void bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; - bdrv_drained_begin(bs); + bdrv_do_drained_begin_quiesce(bs, NULL, false); +} + +static bool bdrv_child_cb_drained_poll(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + return bdrv_drain_poll(bs, false, NULL, false); } static void bdrv_child_cb_drained_end(BdrvChild *child) @@ -902,9 +912,11 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_file = { + .parent_is_bds = true, .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_options, .drained_begin = bdrv_child_cb_drained_begin, + .drained_poll = bdrv_child_cb_drained_poll, .drained_end = bdrv_child_cb_drained_end, .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, @@ -926,9 +938,11 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_format = { + .parent_is_bds = true, .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_fmt_options, .drained_begin = bdrv_child_cb_drained_begin, + .drained_poll = bdrv_child_cb_drained_poll, .drained_end = bdrv_child_cb_drained_end, .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, @@ -1043,11 +1057,13 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, } const BdrvChildRole child_backing = { + .parent_is_bds = true, .get_parent_desc = bdrv_child_get_parent_desc, .attach = bdrv_backing_attach, .detach = bdrv_backing_detach, .inherit_options = bdrv_backing_options, .drained_begin = bdrv_child_cb_drained_begin, + .drained_poll = bdrv_child_cb_drained_poll, .drained_end = bdrv_child_cb_drained_end, .inactivate = bdrv_child_cb_inactivate, .update_filename = bdrv_backing_update_filename, @@ -1152,7 +1168,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, int open_flags, Error **errp) { Error *local_err = NULL; - int ret; + int i, ret; bdrv_assign_node_name(bs, node_name, &local_err); if (local_err) { @@ -1200,6 +1216,12 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, assert(bdrv_min_mem_align(bs) != 0); assert(is_power_of_2(bs->bl.request_alignment)); + for (i = 0; i < bs->quiesce_counter; i++) { + if (drv->bdrv_co_drain_begin) { + drv->bdrv_co_drain_begin(bs); + } + } + return 0; open_failed: bs->drv = NULL; @@ -2021,7 +2043,12 @@ static void bdrv_replace_child_noperm(BdrvChild *child, child->role->detach(child); } if (old_bs->quiesce_counter && child->role->drained_end) { - for (i = 0; i < old_bs->quiesce_counter; i++) { + int num = old_bs->quiesce_counter; + if (child->role->parent_is_bds) { + num -= bdrv_drain_all_count; + } + assert(num >= 0); + for (i = 0; i < num; i++) { child->role->drained_end(child); } } @@ -2033,7 +2060,12 @@ static void bdrv_replace_child_noperm(BdrvChild *child, if (new_bs) { QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); if (new_bs->quiesce_counter && child->role->drained_begin) { - for (i = 0; i < new_bs->quiesce_counter; i++) { + int num = new_bs->quiesce_counter; + if (child->role->parent_is_bds) { + num -= bdrv_drain_all_count; + } + assert(num >= 0); + for (i = 0; i < num; i++) { child->role->drained_begin(child); } } @@ -3395,16 +3427,39 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to) return false; } - if (c->role == &child_backing) { - /* If @from is a backing file of @to, ignore the child to avoid - * creating a loop. We only want to change the pointer of other - * parents. */ - QLIST_FOREACH(to_c, &to->children, next) { - if (to_c == c) { - break; - } - } - if (to_c) { + /* If the child @c belongs to the BDS @to, replacing the current + * c->bs by @to would mean to create a loop. + * + * Such a case occurs when appending a BDS to a backing chain. + * For instance, imagine the following chain: + * + * guest device -> node A -> further backing chain... + * + * Now we create a new BDS B which we want to put on top of this + * chain, so we first attach A as its backing node: + * + * node B + * | + * v + * guest device -> node A -> further backing chain... + * + * Finally we want to replace A by B. When doing that, we want to + * replace all pointers to A by pointers to B -- except for the + * pointer from B because (1) that would create a loop, and (2) + * that pointer should simply stay intact: + * + * guest device -> node B + * | + * v + * node A -> further backing chain... + * + * In general, when replacing a node A (c->bs) by a node B (@to), + * if A is a child of B, that means we cannot replace A by B there + * because that would create a loop. Silently detaching A from B + * is also not really an option. So overall just leaving A in + * place there is the most sensible choice. */ + QLIST_FOREACH(to_c, &to->children, next) { + if (to_c == c) { return false; } } @@ -3430,6 +3485,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, /* Put all parents into @list and calculate their cumulative permissions */ QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + assert(c->bs == from); if (!should_update_child(c, to)) { continue; } @@ -4037,6 +4093,14 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs) return QTAILQ_NEXT(bs, node_list); } +BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) +{ + if (!bs) { + return QTAILQ_FIRST(&all_bdrv_states); + } + return QTAILQ_NEXT(bs, bs_list); +} + const char *bdrv_get_node_name(const BlockDriverState *bs) { return bs->node_name; @@ -4948,7 +5012,7 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) AioContext *ctx = bdrv_get_aio_context(bs); aio_disable_external(ctx); - bdrv_parent_drained_begin(bs, NULL); + bdrv_parent_drained_begin(bs, NULL, false); bdrv_drain(bs); /* ensure there are no in-flight requests */ while (aio_poll(ctx, false)) { @@ -4962,7 +5026,7 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) */ aio_context_acquire(new_context); bdrv_attach_aio_context(bs, new_context); - bdrv_parent_drained_end(bs, NULL); + bdrv_parent_drained_end(bs, NULL, false); aio_enable_external(ctx); aio_context_release(new_context); } diff --git a/block/backup.c b/block/backup.c index 5661435..d18be40 100644 --- a/block/backup.c +++ b/block/backup.c @@ -354,7 +354,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) HBitmapIter hbi; hbitmap_iter_init(&hbi, job->copy_bitmap, 0); - while ((cluster = hbitmap_iter_next(&hbi)) != -1) { + while ((cluster = hbitmap_iter_next(&hbi, true)) != -1) { do { if (yield_and_check(job)) { return 0; diff --git a/block/block-backend.c b/block/block-backend.c index 2d1a346..6b75bca 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -767,6 +767,11 @@ void blk_remove_bs(BlockBackend *blk) blk_update_root_state(blk); + /* bdrv_root_unref_child() will cause blk->root to become stale and may + * switch to a completion coroutine later on. Let's drain all I/O here + * to avoid that and a potential QEMU crash. + */ + blk_drain(blk); bdrv_root_unref_child(blk->root); blk->root = NULL; } diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 383d742..db1782e 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -519,7 +519,62 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) { - return hbitmap_iter_next(&iter->hbi); + return hbitmap_iter_next(&iter->hbi, true); +} + +/** + * Return the next consecutively dirty area in the dirty bitmap + * belonging to the given iterator @iter. + * + * @max_offset: Maximum value that may be returned for + * *offset + *bytes + * @offset: Will contain the start offset of the next dirty area + * @bytes: Will contain the length of the next dirty area + * + * Returns: True if a dirty area could be found before max_offset + * (which means that *offset and *bytes then contain valid + * values), false otherwise. + * + * Note that @iter is never advanced if false is returned. If an area + * is found (which means that true is returned), it will be advanced + * past that area. + */ +bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset, + uint64_t *offset, int *bytes) +{ + uint32_t granularity = bdrv_dirty_bitmap_granularity(iter->bitmap); + uint64_t gran_max_offset; + int64_t ret; + int size; + + if (max_offset == iter->bitmap->size) { + /* If max_offset points to the image end, round it up by the + * bitmap granularity */ + gran_max_offset = ROUND_UP(max_offset, granularity); + } else { + gran_max_offset = max_offset; + } + + ret = hbitmap_iter_next(&iter->hbi, false); + if (ret < 0 || ret + granularity > gran_max_offset) { + return false; + } + + *offset = ret; + size = 0; + + assert(granularity <= INT_MAX); + + do { + /* Advance iterator */ + ret = hbitmap_iter_next(&iter->hbi, true); + size += granularity; + } while (ret + granularity <= gran_max_offset && + hbitmap_iter_next(&iter->hbi, false) == ret + granularity && + size <= INT_MAX - granularity); + + *bytes = MIN(size, max_offset - *offset); + return true; } /* Called within bdrv_dirty_bitmap_lock..unlock */ @@ -38,15 +38,18 @@ /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) +static AioWait drain_all_aio_wait; + static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags); -void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents) { BdrvChild *c, *next; QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { - if (c == ignore) { + if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) { continue; } if (c->role->drained_begin) { @@ -55,12 +58,13 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) } } -void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents) { BdrvChild *c, *next; QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { - if (c == ignore) { + if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) { continue; } if (c->role->drained_end) { @@ -69,6 +73,24 @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) } } +static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents) +{ + BdrvChild *c, *next; + bool busy = false; + + QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { + if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) { + continue; + } + if (c->role->drained_poll) { + busy |= c->role->drained_poll(c); + } + } + + return busy; +} + static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) { dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer); @@ -148,7 +170,9 @@ typedef struct { bool done; bool begin; bool recursive; + bool poll; BdrvChild *parent; + bool ignore_bds_parents; } BdrvCoDrainData; static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) @@ -164,67 +188,83 @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) /* Set data->done before reading bs->wakeup. */ atomic_mb_set(&data->done, true); - bdrv_wakeup(bs); + bdrv_dec_in_flight(bs); + + if (data->begin) { + g_free(data); + } } /* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ -static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive) +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) { - BdrvChild *child, *tmp; - BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; + BdrvCoDrainData *data; if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || (!begin && !bs->drv->bdrv_co_drain_end)) { return; } - data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data); - bdrv_coroutine_enter(bs, data.co); - BDRV_POLL_WHILE(bs, !data.done); + data = g_new(BdrvCoDrainData, 1); + *data = (BdrvCoDrainData) { + .bs = bs, + .done = false, + .begin = begin + }; - if (recursive) { - QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { - bdrv_drain_invoke(child->bs, begin, true); - } + /* Make sure the driver callback completes during the polling phase for + * drain_begin. */ + bdrv_inc_in_flight(bs); + data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data); + aio_co_schedule(bdrv_get_aio_context(bs), data->co); + + if (!begin) { + BDRV_POLL_WHILE(bs, !data->done); + g_free(data); } } -static bool bdrv_drain_recurse(BlockDriverState *bs) +/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ +bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent, bool ignore_bds_parents) { - BdrvChild *child, *tmp; - bool waited; + BdrvChild *child, *next; - /* Wait for drained requests to finish */ - waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0); - - QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { - BlockDriverState *bs = child->bs; - bool in_main_loop = - qemu_get_current_aio_context() == qemu_get_aio_context(); - assert(bs->refcnt > 0); - if (in_main_loop) { - /* In case the recursive bdrv_drain_recurse processes a - * block_job_defer_to_main_loop BH and modifies the graph, - * let's hold a reference to bs until we are done. - * - * IOThread doesn't have such a BH, and it is not safe to call - * bdrv_unref without BQL, so skip doing it there. - */ - bdrv_ref(bs); - } - waited |= bdrv_drain_recurse(bs); - if (in_main_loop) { - bdrv_unref(bs); + if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { + return true; + } + + if (atomic_read(&bs->in_flight)) { + return true; + } + + if (recursive) { + assert(!ignore_bds_parents); + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + if (bdrv_drain_poll(child->bs, recursive, child, false)) { + return true; + } } } - return waited; + return false; +} + +static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent) +{ + /* Execute pending BHs first and check everything else only after the BHs + * have executed. */ + while (aio_poll(bs->aio_context, false)); + + return bdrv_drain_poll(bs, recursive, ignore_parent, false); } static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent); + BdrvChild *parent, bool ignore_bds_parents, + bool poll); static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent); + BdrvChild *parent, bool ignore_bds_parents); static void bdrv_co_drain_bh_cb(void *opaque) { @@ -232,11 +272,18 @@ static void bdrv_co_drain_bh_cb(void *opaque) Coroutine *co = data->co; BlockDriverState *bs = data->bs; - bdrv_dec_in_flight(bs); - if (data->begin) { - bdrv_do_drained_begin(bs, data->recursive, data->parent); + if (bs) { + bdrv_dec_in_flight(bs); + if (data->begin) { + bdrv_do_drained_begin(bs, data->recursive, data->parent, + data->ignore_bds_parents, data->poll); + } else { + bdrv_do_drained_end(bs, data->recursive, data->parent, + data->ignore_bds_parents); + } } else { - bdrv_do_drained_end(bs, data->recursive, data->parent); + assert(data->begin); + bdrv_drain_all_begin(); } data->done = true; @@ -245,7 +292,9 @@ static void bdrv_co_drain_bh_cb(void *opaque) static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, bool begin, bool recursive, - BdrvChild *parent) + BdrvChild *parent, + bool ignore_bds_parents, + bool poll) { BdrvCoDrainData data; @@ -260,8 +309,12 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, .begin = begin, .recursive = recursive, .parent = parent, + .ignore_bds_parents = ignore_bds_parents, + .poll = poll, }; - bdrv_inc_in_flight(bs); + if (bs) { + bdrv_inc_in_flight(bs); + } aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), bdrv_co_drain_bh_cb, &data); @@ -271,79 +324,106 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, assert(data.done); } -void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent) +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + BdrvChild *parent, bool ignore_bds_parents) { - BdrvChild *child, *next; - - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, recursive, parent); - return; - } + assert(!qemu_in_coroutine()); /* Stop things in parent-to-child order */ if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { aio_disable_external(bdrv_get_aio_context(bs)); } - bdrv_parent_drained_begin(bs, parent); - bdrv_drain_invoke(bs, true, false); - bdrv_drain_recurse(bs); + bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); + bdrv_drain_invoke(bs, true); +} + +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + BdrvChild *parent, bool ignore_bds_parents, + bool poll) +{ + BdrvChild *child, *next; + + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, + poll); + return; + } + + bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); if (recursive) { + assert(!ignore_bds_parents); bs->recursive_quiesce_counter++; QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - bdrv_do_drained_begin(child->bs, true, child); + bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, + false); } } + + /* + * Wait for drained requests to finish. + * + * Calling BDRV_POLL_WHILE() only once for the top-level node is okay: The + * call is needed so things in this AioContext can make progress even + * though we don't return to the main AioContext loop - this automatically + * includes other nodes in the same AioContext and therefore all child + * nodes. + */ + if (poll) { + assert(!ignore_bds_parents); + BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); + } } void bdrv_drained_begin(BlockDriverState *bs) { - bdrv_do_drained_begin(bs, false, NULL); + bdrv_do_drained_begin(bs, false, NULL, false, true); } void bdrv_subtree_drained_begin(BlockDriverState *bs) { - bdrv_do_drained_begin(bs, true, NULL); + bdrv_do_drained_begin(bs, true, NULL, false, true); } -void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent) +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + BdrvChild *parent, bool ignore_bds_parents) { BdrvChild *child, *next; int old_quiesce_counter; if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, false, recursive, parent); + bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, + false); return; } assert(bs->quiesce_counter > 0); old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter); /* Re-enable things in child-to-parent order */ - bdrv_drain_invoke(bs, false, false); - bdrv_parent_drained_end(bs, parent); + bdrv_drain_invoke(bs, false); + bdrv_parent_drained_end(bs, parent, ignore_bds_parents); if (old_quiesce_counter == 1) { aio_enable_external(bdrv_get_aio_context(bs)); } if (recursive) { + assert(!ignore_bds_parents); bs->recursive_quiesce_counter--; QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - bdrv_do_drained_end(child->bs, true, child); + bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); } } } void bdrv_drained_end(BlockDriverState *bs) { - bdrv_do_drained_end(bs, false, NULL); + bdrv_do_drained_end(bs, false, NULL, false); } void bdrv_subtree_drained_end(BlockDriverState *bs) { - bdrv_do_drained_end(bs, true, NULL); + bdrv_do_drained_end(bs, true, NULL, false); } void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) @@ -351,7 +431,7 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) int i; for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { - bdrv_do_drained_begin(child->bs, true, child); + bdrv_do_drained_begin(child->bs, true, child, false, true); } } @@ -360,7 +440,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) int i; for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { - bdrv_do_drained_end(child->bs, true, child); + bdrv_do_drained_end(child->bs, true, child, false); } } @@ -370,10 +450,6 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) * * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState * AioContext. - * - * Only this BlockDriverState's AioContext is run, so in-flight requests must - * not depend on events in other AioContexts. In that case, use - * bdrv_drain_all() instead. */ void coroutine_fn bdrv_co_drain(BlockDriverState *bs) { @@ -388,6 +464,39 @@ void bdrv_drain(BlockDriverState *bs) bdrv_drained_end(bs); } +static void bdrv_drain_assert_idle(BlockDriverState *bs) +{ + BdrvChild *child, *next; + + assert(atomic_read(&bs->in_flight) == 0); + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + bdrv_drain_assert_idle(child->bs); + } +} + +unsigned int bdrv_drain_all_count = 0; + +static bool bdrv_drain_all_poll(void) +{ + BlockDriverState *bs = NULL; + bool result = false; + + /* Execute pending BHs first (may modify the graph) and check everything + * else only after the BHs have executed. */ + while (aio_poll(qemu_get_aio_context(), false)); + + /* bdrv_drain_poll() can't make changes to the graph and we are holding the + * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ + while ((bs = bdrv_next_all_states(bs))) { + AioContext *aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + result |= bdrv_drain_poll(bs, false, NULL, true); + aio_context_release(aio_context); + } + + return result; +} + /* * Wait for pending requests to complete across all BlockDriverStates * @@ -402,73 +511,51 @@ void bdrv_drain(BlockDriverState *bs) */ void bdrv_drain_all_begin(void) { - /* Always run first iteration so any pending completion BHs run */ - bool waited = true; - BlockDriverState *bs; - BdrvNextIterator it; - GSList *aio_ctxs = NULL, *ctx; + BlockDriverState *bs = NULL; + + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); + return; + } - /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread - * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on - * nodes in several different AioContexts, so make sure we're in the main - * context. */ + /* AIO_WAIT_WHILE() with a NULL context can only be called from the main + * loop AioContext, so make sure we're in the main context. */ assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + assert(bdrv_drain_all_count < INT_MAX); + bdrv_drain_all_count++; - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + /* Quiesce all nodes, without polling in-flight requests yet. The graph + * cannot change during this loop. */ + while ((bs = bdrv_next_all_states(bs))) { AioContext *aio_context = bdrv_get_aio_context(bs); - /* Stop things in parent-to-child order */ aio_context_acquire(aio_context); - aio_disable_external(aio_context); - bdrv_parent_drained_begin(bs, NULL); - bdrv_drain_invoke(bs, true, true); + bdrv_do_drained_begin(bs, false, NULL, true, false); aio_context_release(aio_context); - - if (!g_slist_find(aio_ctxs, aio_context)) { - aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); - } } - /* Note that completion of an asynchronous I/O operation can trigger any - * number of other I/O operations on other devices---for example a - * coroutine can submit an I/O request to another device in response to - * request completion. Therefore we must keep looping until there was no - * more activity rather than simply draining each device independently. - */ - while (waited) { - waited = false; - - for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { - AioContext *aio_context = ctx->data; + /* Now poll the in-flight requests */ + AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll()); - aio_context_acquire(aio_context); - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { - if (aio_context == bdrv_get_aio_context(bs)) { - waited |= bdrv_drain_recurse(bs); - } - } - aio_context_release(aio_context); - } + while ((bs = bdrv_next_all_states(bs))) { + bdrv_drain_assert_idle(bs); } - - g_slist_free(aio_ctxs); } void bdrv_drain_all_end(void) { - BlockDriverState *bs; - BdrvNextIterator it; + BlockDriverState *bs = NULL; - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + while ((bs = bdrv_next_all_states(bs))) { AioContext *aio_context = bdrv_get_aio_context(bs); - /* Re-enable things in child-to-parent order */ aio_context_acquire(aio_context); - bdrv_drain_invoke(bs, false, true); - bdrv_parent_drained_end(bs, NULL); - aio_enable_external(aio_context); + bdrv_do_drained_end(bs, false, NULL, true); aio_context_release(aio_context); } + + assert(bdrv_drain_all_count > 0); + bdrv_drain_all_count--; } void bdrv_drain_all(void) @@ -591,6 +678,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs) void bdrv_wakeup(BlockDriverState *bs) { aio_wait_kick(bdrv_get_aio_wait(bs)); + aio_wait_kick(&drain_all_aio_wait); } void bdrv_dec_in_flight(BlockDriverState *bs) diff --git a/block/mirror.c b/block/mirror.c index 435268b..61bd9f3 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -13,6 +13,8 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/coroutine.h" +#include "qemu/range.h" #include "trace.h" #include "block/blockjob_int.h" #include "block/block_int.h" @@ -33,11 +35,12 @@ typedef struct MirrorBuffer { QSIMPLEQ_ENTRY(MirrorBuffer) next; } MirrorBuffer; +typedef struct MirrorOp MirrorOp; + typedef struct MirrorBlockJob { BlockJob common; BlockBackend *target; BlockDriverState *mirror_top_bs; - BlockDriverState *source; BlockDriverState *base; /* The name of the graph node to replace */ @@ -48,8 +51,12 @@ typedef struct MirrorBlockJob { Error *replace_blocker; bool is_none_mode; BlockMirrorBackingMode backing_mode; + MirrorCopyMode copy_mode; BlockdevOnError on_source_error, on_target_error; bool synced; + /* Set when the target is synced (dirty bitmap is clean, nothing + * in flight) and the job is running in active mode */ + bool actively_synced; bool should_complete; int64_t granularity; size_t buf_size; @@ -65,25 +72,47 @@ typedef struct MirrorBlockJob { unsigned long *in_flight_bitmap; int in_flight; int64_t bytes_in_flight; + QTAILQ_HEAD(MirrorOpList, MirrorOp) ops_in_flight; int ret; bool unmap; - bool waiting_for_io; int target_cluster_size; int max_iov; bool initial_zeroing_ongoing; + int in_active_write_counter; } MirrorBlockJob; -typedef struct MirrorOp { +typedef struct MirrorBDSOpaque { + MirrorBlockJob *job; +} MirrorBDSOpaque; + +struct MirrorOp { MirrorBlockJob *s; QEMUIOVector qiov; int64_t offset; uint64_t bytes; -} MirrorOp; + + /* The pointee is set by mirror_co_read(), mirror_co_zero(), and + * mirror_co_discard() before yielding for the first time */ + int64_t *bytes_handled; + + bool is_pseudo_op; + bool is_active_write; + CoQueue waiting_requests; + + QTAILQ_ENTRY(MirrorOp) next; +}; + +typedef enum MirrorMethod { + MIRROR_METHOD_COPY, + MIRROR_METHOD_ZERO, + MIRROR_METHOD_DISCARD, +} MirrorMethod; static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, int error) { s->synced = false; + s->actively_synced = false; if (read) { return block_job_error_action(&s->common, s->on_source_error, true, error); @@ -93,7 +122,42 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, } } -static void mirror_iteration_done(MirrorOp *op, int ret) +static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self, + MirrorBlockJob *s, + uint64_t offset, + uint64_t bytes) +{ + uint64_t self_start_chunk = offset / s->granularity; + uint64_t self_end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity); + uint64_t self_nb_chunks = self_end_chunk - self_start_chunk; + + while (find_next_bit(s->in_flight_bitmap, self_end_chunk, + self_start_chunk) < self_end_chunk && + s->ret >= 0) + { + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { + uint64_t op_start_chunk = op->offset / s->granularity; + uint64_t op_nb_chunks = DIV_ROUND_UP(op->offset + op->bytes, + s->granularity) - + op_start_chunk; + + if (op == self) { + continue; + } + + if (ranges_overlap(self_start_chunk, self_nb_chunks, + op_start_chunk, op_nb_chunks)) + { + qemu_co_queue_wait(&op->waiting_requests, NULL); + break; + } + } + } +} + +static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret) { MirrorBlockJob *s = op->s; struct iovec *iov; @@ -113,7 +177,9 @@ static void mirror_iteration_done(MirrorOp *op, int ret) chunk_num = op->offset / s->granularity; nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity); + bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks); + QTAILQ_REMOVE(&s->ops_in_flight, op, next); if (ret >= 0) { if (s->cow_bitmap) { bitmap_set(s->cow_bitmap, chunk_num, nb_chunks); @@ -123,16 +189,13 @@ static void mirror_iteration_done(MirrorOp *op, int ret) } } qemu_iovec_destroy(&op->qiov); - g_free(op); - if (s->waiting_for_io) { - qemu_coroutine_enter(s->common.job.co); - } + qemu_co_queue_restart_all(&op->waiting_requests); + g_free(op); } -static void mirror_write_complete(void *opaque, int ret) +static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret) { - MirrorOp *op = opaque; MirrorBlockJob *s = op->s; aio_context_acquire(blk_get_aio_context(s->common.blk)); @@ -149,9 +212,8 @@ static void mirror_write_complete(void *opaque, int ret) aio_context_release(blk_get_aio_context(s->common.blk)); } -static void mirror_read_complete(void *opaque, int ret) +static void coroutine_fn mirror_read_complete(MirrorOp *op, int ret) { - MirrorOp *op = opaque; MirrorBlockJob *s = op->s; aio_context_acquire(blk_get_aio_context(s->common.blk)); @@ -166,8 +228,9 @@ static void mirror_read_complete(void *opaque, int ret) mirror_iteration_done(op, ret); } else { - blk_aio_pwritev(s->target, op->offset, &op->qiov, - 0, mirror_write_complete, op); + ret = blk_co_pwritev(s->target, op->offset, + op->qiov.size, &op->qiov, 0); + mirror_write_complete(op, ret); } aio_context_release(blk_get_aio_context(s->common.blk)); } @@ -216,68 +279,80 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, return ret; } -static inline void mirror_wait_for_io(MirrorBlockJob *s) +static inline void mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) { - assert(!s->waiting_for_io); - s->waiting_for_io = true; - qemu_coroutine_yield(); - s->waiting_for_io = false; + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { + /* Do not wait on pseudo ops, because it may in turn wait on + * some other operation to start, which may in fact be the + * caller of this function. Since there is only one pseudo op + * at any given time, we will always find some real operation + * to wait on. */ + if (!op->is_pseudo_op && op->is_active_write == active) { + qemu_co_queue_wait(&op->waiting_requests, NULL); + return; + } + } + abort(); } -/* Submit async read while handling COW. - * Returns: The number of bytes copied after and including offset, - * excluding any bytes copied prior to offset due to alignment. - * This will be @bytes if no alignment is necessary, or - * (new_end - offset) if tail is rounded up or down due to - * alignment or buffer limit. +static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) +{ + /* Only non-active operations use up in-flight slots */ + mirror_wait_for_any_operation(s, false); +} + +/* Perform a mirror copy operation. + * + * *op->bytes_handled is set to the number of bytes copied after and + * including offset, excluding any bytes copied prior to offset due + * to alignment. This will be op->bytes if no alignment is necessary, + * or (new_end - op->offset) if the tail is rounded up or down due to + * alignment or buffer limit. */ -static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset, - uint64_t bytes) +static void coroutine_fn mirror_co_read(void *opaque) { - BlockBackend *source = s->common.blk; + MirrorOp *op = opaque; + MirrorBlockJob *s = op->s; int nb_chunks; uint64_t ret; - MirrorOp *op; uint64_t max_bytes; max_bytes = s->granularity * s->max_iov; /* We can only handle as much as buf_size at a time. */ - bytes = MIN(s->buf_size, MIN(max_bytes, bytes)); - assert(bytes); - assert(bytes < BDRV_REQUEST_MAX_BYTES); - ret = bytes; + op->bytes = MIN(s->buf_size, MIN(max_bytes, op->bytes)); + assert(op->bytes); + assert(op->bytes < BDRV_REQUEST_MAX_BYTES); + *op->bytes_handled = op->bytes; if (s->cow_bitmap) { - ret += mirror_cow_align(s, &offset, &bytes); + *op->bytes_handled += mirror_cow_align(s, &op->offset, &op->bytes); } - assert(bytes <= s->buf_size); + /* Cannot exceed BDRV_REQUEST_MAX_BYTES + INT_MAX */ + assert(*op->bytes_handled <= UINT_MAX); + assert(op->bytes <= s->buf_size); /* The offset is granularity-aligned because: * 1) Caller passes in aligned values; * 2) mirror_cow_align is used only when target cluster is larger. */ - assert(QEMU_IS_ALIGNED(offset, s->granularity)); + assert(QEMU_IS_ALIGNED(op->offset, s->granularity)); /* The range is sector-aligned, since bdrv_getlength() rounds up. */ - assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); - nb_chunks = DIV_ROUND_UP(bytes, s->granularity); + assert(QEMU_IS_ALIGNED(op->bytes, BDRV_SECTOR_SIZE)); + nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity); while (s->buf_free_count < nb_chunks) { - trace_mirror_yield_in_flight(s, offset, s->in_flight); - mirror_wait_for_io(s); + trace_mirror_yield_in_flight(s, op->offset, s->in_flight); + mirror_wait_for_free_in_flight_slot(s); } - /* Allocate a MirrorOp that is used as an AIO callback. */ - op = g_new(MirrorOp, 1); - op->s = s; - op->offset = offset; - op->bytes = bytes; - /* Now make a QEMUIOVector taking enough granularity-sized chunks * from s->buf_free. */ qemu_iovec_init(&op->qiov, nb_chunks); while (nb_chunks-- > 0) { MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); - size_t remaining = bytes - op->qiov.size; + size_t remaining = op->bytes - op->qiov.size; QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); s->buf_free_count--; @@ -286,44 +361,92 @@ static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset, /* Copy the dirty cluster. */ s->in_flight++; - s->bytes_in_flight += bytes; - trace_mirror_one_iteration(s, offset, bytes); + s->bytes_in_flight += op->bytes; + trace_mirror_one_iteration(s, op->offset, op->bytes); - blk_aio_preadv(source, offset, &op->qiov, 0, mirror_read_complete, op); - return ret; + ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, + &op->qiov, 0); + mirror_read_complete(op, ret); } -static void mirror_do_zero_or_discard(MirrorBlockJob *s, - int64_t offset, - uint64_t bytes, - bool is_discard) +static void coroutine_fn mirror_co_zero(void *opaque) { - MirrorOp *op; + MirrorOp *op = opaque; + int ret; - /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed - * so the freeing in mirror_iteration_done is nop. */ - op = g_new0(MirrorOp, 1); - op->s = s; - op->offset = offset; - op->bytes = bytes; + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; - s->in_flight++; - s->bytes_in_flight += bytes; - if (is_discard) { - blk_aio_pdiscard(s->target, offset, - op->bytes, mirror_write_complete, op); - } else { - blk_aio_pwrite_zeroes(s->target, offset, - op->bytes, s->unmap ? BDRV_REQ_MAY_UNMAP : 0, - mirror_write_complete, op); + ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, + op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); + mirror_write_complete(op, ret); +} + +static void coroutine_fn mirror_co_discard(void *opaque) +{ + MirrorOp *op = opaque; + int ret; + + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; + + ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); + mirror_write_complete(op, ret); +} + +static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, + unsigned bytes, MirrorMethod mirror_method) +{ + MirrorOp *op; + Coroutine *co; + int64_t bytes_handled = -1; + + op = g_new(MirrorOp, 1); + *op = (MirrorOp){ + .s = s, + .offset = offset, + .bytes = bytes, + .bytes_handled = &bytes_handled, + }; + qemu_co_queue_init(&op->waiting_requests); + + switch (mirror_method) { + case MIRROR_METHOD_COPY: + co = qemu_coroutine_create(mirror_co_read, op); + break; + case MIRROR_METHOD_ZERO: + co = qemu_coroutine_create(mirror_co_zero, op); + break; + case MIRROR_METHOD_DISCARD: + co = qemu_coroutine_create(mirror_co_discard, op); + break; + default: + abort(); } + + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); + qemu_coroutine_enter(co); + /* At this point, ownership of op has been moved to the coroutine + * and the object may already be freed */ + + /* Assert that this value has been set */ + assert(bytes_handled >= 0); + + /* Same assertion as in mirror_co_read() (and for mirror_co_read() + * and mirror_co_discard(), bytes_handled == op->bytes, which + * is the @bytes parameter given to this function) */ + assert(bytes_handled <= UINT_MAX); + return bytes_handled; } static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { - BlockDriverState *source = s->source; - int64_t offset, first_chunk; - uint64_t delay_ns = 0; + BlockDriverState *source = s->mirror_top_bs->backing->bs; + MirrorOp *pseudo_op; + int64_t offset; + uint64_t delay_ns = 0, ret = 0; /* At least the first dirty chunk is mirrored in one iteration. */ int nb_chunks = 1; bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); @@ -339,11 +462,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) } bdrv_dirty_bitmap_unlock(s->dirty_bitmap); - first_chunk = offset / s->granularity; - while (test_bit(first_chunk, s->in_flight_bitmap)) { - trace_mirror_yield_in_flight(s, offset, s->in_flight); - mirror_wait_for_io(s); - } + mirror_wait_on_conflicts(NULL, s, offset, 1); job_pause_point(&s->common.job); @@ -380,16 +499,27 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) nb_chunks * s->granularity); bdrv_dirty_bitmap_unlock(s->dirty_bitmap); + /* Before claiming an area in the in-flight bitmap, we have to + * create a MirrorOp for it so that conflicting requests can wait + * for it. mirror_perform() will create the real MirrorOps later, + * for now we just create a pseudo operation that will wake up all + * conflicting requests once all real operations have been + * launched. */ + pseudo_op = g_new(MirrorOp, 1); + *pseudo_op = (MirrorOp){ + .offset = offset, + .bytes = nb_chunks * s->granularity, + .is_pseudo_op = true, + }; + qemu_co_queue_init(&pseudo_op->waiting_requests); + QTAILQ_INSERT_TAIL(&s->ops_in_flight, pseudo_op, next); + bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks); while (nb_chunks > 0 && offset < s->bdev_length) { int ret; int64_t io_bytes; int64_t io_bytes_acct; - enum MirrorMethod { - MIRROR_METHOD_COPY, - MIRROR_METHOD_ZERO, - MIRROR_METHOD_DISCARD - } mirror_method = MIRROR_METHOD_COPY; + MirrorMethod mirror_method = MIRROR_METHOD_COPY; assert(!(offset % s->granularity)); ret = bdrv_block_status_above(source, NULL, offset, @@ -419,37 +549,34 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) while (s->in_flight >= MAX_IN_FLIGHT) { trace_mirror_yield_in_flight(s, offset, s->in_flight); - mirror_wait_for_io(s); + mirror_wait_for_free_in_flight_slot(s); } if (s->ret < 0) { - return 0; + ret = 0; + goto fail; } io_bytes = mirror_clip_bytes(s, offset, io_bytes); - switch (mirror_method) { - case MIRROR_METHOD_COPY: - io_bytes = io_bytes_acct = mirror_do_read(s, offset, io_bytes); - break; - case MIRROR_METHOD_ZERO: - case MIRROR_METHOD_DISCARD: - mirror_do_zero_or_discard(s, offset, io_bytes, - mirror_method == MIRROR_METHOD_DISCARD); - if (write_zeroes_ok) { - io_bytes_acct = 0; - } else { - io_bytes_acct = io_bytes; - } - break; - default: - abort(); + io_bytes = mirror_perform(s, offset, io_bytes, mirror_method); + if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) { + io_bytes_acct = 0; + } else { + io_bytes_acct = io_bytes; } assert(io_bytes); offset += io_bytes; nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity); delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct); } - return delay_ns; + + ret = delay_ns; +fail: + QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next); + qemu_co_queue_restart_all(&pseudo_op->waiting_requests); + g_free(pseudo_op); + + return ret; } static void mirror_free_init(MirrorBlockJob *s) @@ -476,7 +603,7 @@ static void mirror_free_init(MirrorBlockJob *s) static void mirror_wait_for_all_io(MirrorBlockJob *s) { while (s->in_flight > 0) { - mirror_wait_for_io(s); + mirror_wait_for_free_in_flight_slot(s); } } @@ -489,8 +616,9 @@ static void mirror_exit(Job *job, void *opaque) MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); BlockJob *bjob = &s->common; MirrorExitData *data = opaque; + MirrorBDSOpaque *bs_opaque = s->mirror_top_bs->opaque; AioContext *replace_aio_context = NULL; - BlockDriverState *src = s->source; + BlockDriverState *src = s->mirror_top_bs->backing->bs; BlockDriverState *target_bs = blk_bs(s->target); BlockDriverState *mirror_top_bs = s->mirror_top_bs; Error *local_err = NULL; @@ -581,6 +709,7 @@ static void mirror_exit(Job *job, void *opaque) blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort); blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort); + bs_opaque->job = NULL; job_completed(job, data->ret, NULL); g_free(data); @@ -605,7 +734,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) { int64_t offset; BlockDriverState *base = s->base; - BlockDriverState *bs = s->source; + BlockDriverState *bs = s->mirror_top_bs->backing->bs; BlockDriverState *target_bs = blk_bs(s->target); int ret; int64_t count; @@ -631,11 +760,11 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) if (s->in_flight >= MAX_IN_FLIGHT) { trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, s->in_flight); - mirror_wait_for_io(s); + mirror_wait_for_free_in_flight_slot(s); continue; } - mirror_do_zero_or_discard(s, offset, bytes, false); + mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO); offset += bytes; } @@ -687,7 +816,7 @@ static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; - BlockDriverState *bs = s->source; + BlockDriverState *bs = s->mirror_top_bs->backing->bs; BlockDriverState *target_bs = blk_bs(s->target); bool need_drain = true; int64_t length; @@ -730,6 +859,7 @@ static void coroutine_fn mirror_run(void *opaque) /* Transition to the READY state and wait for complete. */ job_transition_to_ready(&s->common.job); s->synced = true; + s->actively_synced = true; while (!job_is_cancelled(&s->common.job) && !s->should_complete) { job_yield(&s->common.job); } @@ -781,6 +911,12 @@ static void coroutine_fn mirror_run(void *opaque) int64_t cnt, delta; bool should_complete; + /* Do not start passive operations while there are active + * writes in progress */ + while (s->in_active_write_counter) { + mirror_wait_for_any_operation(s, true); + } + if (s->ret < 0) { ret = s->ret; goto immediate_exit; @@ -804,7 +940,7 @@ static void coroutine_fn mirror_run(void *opaque) if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); - mirror_wait_for_io(s); + mirror_wait_for_free_in_flight_slot(s); continue; } else if (cnt != 0) { delay_ns = mirror_iteration(s); @@ -826,6 +962,9 @@ static void coroutine_fn mirror_run(void *opaque) */ job_transition_to_ready(&s->common.job); s->synced = true; + if (s->copy_mode != MIRROR_COPY_MODE_BACKGROUND) { + s->actively_synced = true; + } } should_complete = s->should_complete || @@ -964,6 +1103,12 @@ static void mirror_pause(Job *job) mirror_wait_for_all_io(s); } +static bool mirror_drained_poll(BlockJob *job) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + return !!s->in_flight; +} + static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); @@ -997,6 +1142,7 @@ static const BlockJobDriver mirror_job_driver = { .pause = mirror_pause, .complete = mirror_complete, }, + .drained_poll = mirror_drained_poll, .attached_aio_context = mirror_attached_aio_context, .drain = mirror_drain, }; @@ -1012,20 +1158,237 @@ static const BlockJobDriver commit_active_job_driver = { .pause = mirror_pause, .complete = mirror_complete, }, + .drained_poll = mirror_drained_poll, .attached_aio_context = mirror_attached_aio_context, .drain = mirror_drain, }; +static void do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BdrvDirtyBitmapIter *iter; + QEMUIOVector target_qiov; + uint64_t dirty_offset; + int dirty_bytes; + + if (qiov) { + qemu_iovec_init(&target_qiov, qiov->niov); + } + + iter = bdrv_dirty_iter_new(job->dirty_bitmap); + bdrv_set_dirty_iter(iter, offset); + + while (true) { + bool valid_area; + int ret; + + bdrv_dirty_bitmap_lock(job->dirty_bitmap); + valid_area = bdrv_dirty_iter_next_area(iter, offset + bytes, + &dirty_offset, &dirty_bytes); + if (!valid_area) { + bdrv_dirty_bitmap_unlock(job->dirty_bitmap); + break; + } + + bdrv_reset_dirty_bitmap_locked(job->dirty_bitmap, + dirty_offset, dirty_bytes); + bdrv_dirty_bitmap_unlock(job->dirty_bitmap); + + job_progress_increase_remaining(&job->common.job, dirty_bytes); + + assert(dirty_offset - offset <= SIZE_MAX); + if (qiov) { + qemu_iovec_reset(&target_qiov); + qemu_iovec_concat(&target_qiov, qiov, + dirty_offset - offset, dirty_bytes); + } + + switch (method) { + case MIRROR_METHOD_COPY: + ret = blk_co_pwritev(job->target, dirty_offset, dirty_bytes, + qiov ? &target_qiov : NULL, flags); + break; + + case MIRROR_METHOD_ZERO: + assert(!qiov); + ret = blk_co_pwrite_zeroes(job->target, dirty_offset, dirty_bytes, + flags); + break; + + case MIRROR_METHOD_DISCARD: + assert(!qiov); + ret = blk_co_pdiscard(job->target, dirty_offset, dirty_bytes); + break; + + default: + abort(); + } + + if (ret >= 0) { + job_progress_update(&job->common.job, dirty_bytes); + } else { + BlockErrorAction action; + + bdrv_set_dirty_bitmap(job->dirty_bitmap, dirty_offset, dirty_bytes); + job->actively_synced = false; + + action = mirror_error_action(job, false, -ret); + if (action == BLOCK_ERROR_ACTION_REPORT) { + if (!job->ret) { + job->ret = ret; + } + break; + } + } + } + + bdrv_dirty_iter_free(iter); + if (qiov) { + qemu_iovec_destroy(&target_qiov); + } +} + +static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, + uint64_t offset, + uint64_t bytes) +{ + MirrorOp *op; + uint64_t start_chunk = offset / s->granularity; + uint64_t end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity); + + op = g_new(MirrorOp, 1); + *op = (MirrorOp){ + .s = s, + .offset = offset, + .bytes = bytes, + .is_active_write = true, + }; + qemu_co_queue_init(&op->waiting_requests); + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); + + s->in_active_write_counter++; + + mirror_wait_on_conflicts(op, s, offset, bytes); + + bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk); + + return op; +} + +static void coroutine_fn active_write_settle(MirrorOp *op) +{ + uint64_t start_chunk = op->offset / op->s->granularity; + uint64_t end_chunk = DIV_ROUND_UP(op->offset + op->bytes, + op->s->granularity); + + if (!--op->s->in_active_write_counter && op->s->actively_synced) { + BdrvChild *source = op->s->mirror_top_bs->backing; + + if (QLIST_FIRST(&source->bs->parents) == source && + QLIST_NEXT(source, next_parent) == NULL) + { + /* Assert that we are back in sync once all active write + * operations are settled. + * Note that we can only assert this if the mirror node + * is the source node's only parent. */ + assert(!bdrv_get_dirty_count(op->s->dirty_bitmap)); + } + } + bitmap_clear(op->s->in_flight_bitmap, start_chunk, end_chunk - start_chunk); + QTAILQ_REMOVE(&op->s->ops_in_flight, op, next); + qemu_co_queue_restart_all(&op->waiting_requests); + g_free(op); +} + static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); } +static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs, + MirrorMethod method, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, + int flags) +{ + MirrorOp *op = NULL; + MirrorBDSOpaque *s = bs->opaque; + int ret = 0; + bool copy_to_target; + + copy_to_target = s->job->ret >= 0 && + s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; + + if (copy_to_target) { + op = active_write_prepare(s->job, offset, bytes); + } + + switch (method) { + case MIRROR_METHOD_COPY: + ret = bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); + break; + + case MIRROR_METHOD_ZERO: + ret = bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags); + break; + + case MIRROR_METHOD_DISCARD: + ret = bdrv_co_pdiscard(bs->backing->bs, offset, bytes); + break; + + default: + abort(); + } + + if (ret < 0) { + goto out; + } + + if (copy_to_target) { + do_sync_target_write(s->job, method, offset, bytes, qiov, flags); + } + +out: + if (copy_to_target) { + active_write_settle(op); + } + return ret; +} + static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { - return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); + MirrorBDSOpaque *s = bs->opaque; + QEMUIOVector bounce_qiov; + void *bounce_buf; + int ret = 0; + bool copy_to_target; + + copy_to_target = s->job->ret >= 0 && + s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; + + if (copy_to_target) { + /* The guest might concurrently modify the data to write; but + * the data on source and destination must match, so we have + * to use a bounce buffer if we are going to write to the + * target now. */ + bounce_buf = qemu_blockalign(bs, bytes); + iov_to_buf_full(qiov->iov, qiov->niov, 0, bounce_buf, bytes); + + qemu_iovec_init(&bounce_qiov, 1); + qemu_iovec_add(&bounce_qiov, bounce_buf, bytes); + qiov = &bounce_qiov; + } + + ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov, + flags); + + if (copy_to_target) { + qemu_iovec_destroy(&bounce_qiov); + qemu_vfree(bounce_buf); + } + + return ret; } static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) @@ -1040,13 +1403,15 @@ static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags) { - return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags); + return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL, + flags); } static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { - return bdrv_co_pdiscard(bs->backing->bs, offset, bytes); + return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes, + NULL, 0); } static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts) @@ -1108,10 +1473,11 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, const BlockJobDriver *driver, bool is_none_mode, BlockDriverState *base, bool auto_complete, const char *filter_node_name, - bool is_mirror, + bool is_mirror, MirrorCopyMode copy_mode, Error **errp) { MirrorBlockJob *s; + MirrorBDSOpaque *bs_opaque; BlockDriverState *mirror_top_bs; bool target_graph_mod; bool target_is_backing; @@ -1147,6 +1513,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, mirror_top_bs->total_sectors = bs->total_sectors; mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED; + bs_opaque = g_new0(MirrorBDSOpaque, 1); + mirror_top_bs->opaque = bs_opaque; bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs)); /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep @@ -1171,10 +1539,11 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, if (!s) { goto fail; } + bs_opaque->job = s; + /* The block job now has a reference to this node */ bdrv_unref(mirror_top_bs); - s->source = bs; s->mirror_top_bs = mirror_top_bs; /* No resize for the target either; while the mirror is still running, a @@ -1212,6 +1581,7 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, s->on_target_error = on_target_error; s->is_none_mode = is_none_mode; s->backing_mode = backing_mode; + s->copy_mode = copy_mode; s->base = base; s->granularity = granularity; s->buf_size = ROUND_UP(buf_size, granularity); @@ -1247,6 +1617,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, } } + QTAILQ_INIT(&s->ops_in_flight); + trace_mirror_start(bs, s, opaque); job_start(&s->common.job); return; @@ -1259,6 +1631,7 @@ fail: g_free(s->replaces); blk_unref(s->target); + bs_opaque->job = NULL; job_early_fail(&s->common.job); } @@ -1275,7 +1648,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, Error **errp) + bool unmap, const char *filter_node_name, + MirrorCopyMode copy_mode, Error **errp) { bool is_none_mode; BlockDriverState *base; @@ -1290,7 +1664,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, speed, granularity, buf_size, backing_mode, on_source_error, on_target_error, unmap, NULL, NULL, &mirror_job_driver, is_none_mode, base, false, - filter_node_name, true, errp); + filter_node_name, true, copy_mode, errp); } void commit_active_start(const char *job_id, BlockDriverState *bs, @@ -1313,7 +1687,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, MIRROR_LEAVE_BACKING_CHAIN, on_error, on_error, true, cb, opaque, &commit_active_job_driver, false, base, auto_complete, - filter_node_name, false, &local_err); + filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, + &local_err); if (local_err) { error_propagate(errp, local_err); goto error_restore_flags; diff --git a/block/vvfat.c b/block/vvfat.c index 4595f33..c7d2ed2 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3134,6 +3134,7 @@ static void vvfat_qcow_options(int *child_flags, QDict *child_options, } static const BdrvChildRole child_vvfat_qcow = { + .parent_is_bds = true, .inherit_options = vvfat_qcow_options, }; @@ -3586,6 +3586,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, bool has_unmap, bool unmap, bool has_filter_node_name, const char *filter_node_name, + bool has_copy_mode, MirrorCopyMode copy_mode, Error **errp) { @@ -3610,6 +3611,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, if (!has_filter_node_name) { filter_node_name = NULL; } + if (!has_copy_mode) { + copy_mode = MIRROR_COPY_MODE_BACKGROUND; + } if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", @@ -3640,7 +3644,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, has_replaces ? replaces : NULL, speed, granularity, buf_size, sync, backing_mode, on_source_error, on_target_error, unmap, filter_node_name, - errp); + copy_mode, errp); } void qmp_drive_mirror(DriveMirror *arg, Error **errp) @@ -3786,6 +3790,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) arg->has_on_target_error, arg->on_target_error, arg->has_unmap, arg->unmap, false, NULL, + arg->has_copy_mode, arg->copy_mode, &local_err); bdrv_unref(target_bs); error_propagate(errp, local_err); @@ -3806,6 +3811,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, BlockdevOnError on_target_error, bool has_filter_node_name, const char *filter_node_name, + bool has_copy_mode, MirrorCopyMode copy_mode, Error **errp) { BlockDriverState *bs; @@ -3838,6 +3844,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, has_on_target_error, on_target_error, true, true, has_filter_node_name, filter_node_name, + has_copy_mode, copy_mode, &local_err); error_propagate(errp, local_err); @@ -155,6 +155,28 @@ static void child_job_drained_begin(BdrvChild *c) job_pause(&job->job); } +static bool child_job_drained_poll(BdrvChild *c) +{ + BlockJob *bjob = c->opaque; + Job *job = &bjob->job; + const BlockJobDriver *drv = block_job_driver(bjob); + + /* An inactive or completed job doesn't have any pending requests. Jobs + * with !job->busy are either already paused or have a pause point after + * being reentered, so no job driver code will run before they pause. */ + if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) { + return false; + } + + /* Otherwise, assume that it isn't fully stopped yet, but allow the job to + * override this assumption. */ + if (drv->drained_poll) { + return drv->drained_poll(bjob); + } else { + return true; + } +} + static void child_job_drained_end(BdrvChild *c) { BlockJob *job = c->opaque; @@ -164,6 +186,7 @@ static void child_job_drained_end(BdrvChild *c) static const BdrvChildRole child_job = { .get_parent_desc = child_job_get_parent_desc, .drained_begin = child_job_drained_begin, + .drained_poll = child_job_drained_poll, .drained_end = child_job_drained_end, .stay_at_node = true, }; @@ -4599,6 +4599,7 @@ int main(void) { virgl_renderer_poll(); return 0; } EOF virgl_cflags=$($pkg_config --cflags virglrenderer 2>/dev/null) virgl_libs=$($pkg_config --libs virglrenderer 2>/dev/null) + virgl_version=$($pkg_config --modversion virglrenderer 2>/dev/null) if $pkg_config virglrenderer >/dev/null 2>&1 && \ compile_prog "$virgl_cflags" "$virgl_libs" ; then virglrenderer="yes" @@ -5827,7 +5828,7 @@ echo "nettle $nettle $(echo_version $nettle $nettle_version)" echo "nettle kdf $nettle_kdf" echo "libtasn1 $tasn1" echo "curses support $curses" -echo "virgl support $virglrenderer" +echo "virgl support $virglrenderer $(echo_version $virglrenderer $virgl_version)" echo "curl support $curl" echo "mingw32 support $mingw32" echo "Audio drivers $audio_drv_list" @@ -7239,9 +7240,11 @@ for rom in seabios vgabios ; do done # set up tests data directory -if [ ! -e tests/data ]; then - symlink "$source_path/tests/data" tests/data -fi +for tests_subdir in acceptance data; do + if [ ! -e tests/$tests_subdir ]; then + symlink "$source_path/tests/$tests_subdir" tests/$tests_subdir + fi +done # set up qemu-iotests in this build directory iotests_common_env="tests/qemu-iotests/common.env" diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak index 4d7be45..abeeb04 100644 --- a/default-configs/ppc-softmmu.mak +++ b/default-configs/ppc-softmmu.mak @@ -31,12 +31,14 @@ CONFIG_I2C=y CONFIG_MAC=y CONFIG_ESCC=y CONFIG_MACIO=y +CONFIG_MACIO_GPIO=y CONFIG_SUNGEM=y CONFIG_MOS6522=y CONFIG_CUDA=y CONFIG_ADB=y CONFIG_MAC_NVRAM=y CONFIG_MAC_DBDMA=y +CONFIG_MAC_PMU=y CONFIG_HEATHROW_PIC=y CONFIG_GRACKLE_PCI=y CONFIG_UNIN_PCI=y diff --git a/docs/devel/testing.rst b/docs/devel/testing.rst index 0ca1a2d..f33e5a8 100644 --- a/docs/devel/testing.rst +++ b/docs/devel/testing.rst @@ -484,3 +484,195 @@ supported. To start the fuzzer, run Alternatively, some command different from "qemu-img info" can be tested, by changing the ``-c`` option. + +Acceptance tests using the Avocado Framework +============================================ + +The ``tests/acceptance`` directory hosts functional tests, also known +as acceptance level tests. They're usually higher level tests, and +may interact with external resources and with various guest operating +systems. + +These tests are written using the Avocado Testing Framework (which must +be installed separately) in conjunction with a the ``avocado_qemu.Test`` +class, implemented at ``tests/acceptance/avocado_qemu``. + +Tests based on ``avocado_qemu.Test`` can easily: + + * Customize the command line arguments given to the convenience + ``self.vm`` attribute (a QEMUMachine instance) + + * Interact with the QEMU monitor, send QMP commands and check + their results + + * Interact with the guest OS, using the convenience console device + (which may be useful to assert the effectiveness and correctness of + command line arguments or QMP commands) + + * Interact with external data files that accompany the test itself + (see ``self.get_data()``) + + * Download (and cache) remote data files, such as firmware and kernel + images + + * Have access to a library of guest OS images (by means of the + ``avocado.utils.vmimage`` library) + + * Make use of various other test related utilities available at the + test class itself and at the utility library: + + - http://avocado-framework.readthedocs.io/en/latest/api/test/avocado.html#avocado.Test + - http://avocado-framework.readthedocs.io/en/latest/api/utils/avocado.utils.html + +Installation +------------ + +To install Avocado and its dependencies, run: + +.. code:: + + pip install --user avocado-framework + +Alternatively, follow the instructions on this link: + + http://avocado-framework.readthedocs.io/en/latest/GetStartedGuide.html#installing-avocado + +Overview +-------- + +This directory provides the ``avocado_qemu`` Python module, containing +the ``avocado_qemu.Test`` class. Here's a simple usage example: + +.. code:: + + from avocado_qemu import Test + + + class Version(Test): + """ + :avocado: enable + :avocado: tags=quick + """ + def test_qmp_human_info_version(self): + self.vm.launch() + res = self.vm.command('human-monitor-command', + command_line='info version') + self.assertRegexpMatches(res, r'^(\d+\.\d+\.\d)') + +To execute your test, run: + +.. code:: + + avocado run version.py + +Tests may be classified according to a convention by using docstring +directives such as ``:avocado: tags=TAG1,TAG2``. To run all tests +in the current directory, tagged as "quick", run: + +.. code:: + + avocado run -t quick . + +The ``avocado_qemu.Test`` base test class +----------------------------------------- + +The ``avocado_qemu.Test`` class has a number of characteristics that +are worth being mentioned right away. + +First of all, it attempts to give each test a ready to use QEMUMachine +instance, available at ``self.vm``. Because many tests will tweak the +QEMU command line, launching the QEMUMachine (by using ``self.vm.launch()``) +is left to the test writer. + +At test "tear down", ``avocado_qemu.Test`` handles the QEMUMachine +shutdown. + +QEMUMachine +~~~~~~~~~~~ + +The QEMUMachine API is already widely used in the Python iotests, +device-crash-test and other Python scripts. It's a wrapper around the +execution of a QEMU binary, giving its users: + + * the ability to set command line arguments to be given to the QEMU + binary + + * a ready to use QMP connection and interface, which can be used to + send commands and inspect its results, as well as asynchronous + events + + * convenience methods to set commonly used command line arguments in + a more succinct and intuitive way + +QEMU binary selection +~~~~~~~~~~~~~~~~~~~~~ + +The QEMU binary used for the ``self.vm`` QEMUMachine instance will +primarily depend on the value of the ``qemu_bin`` parameter. If it's +not explicitly set, its default value will be the result of a dynamic +probe in the same source tree. A suitable binary will be one that +targets the architecture matching host machine. + +Based on this description, test writers will usually rely on one of +the following approaches: + +1) Set ``qemu_bin``, and use the given binary + +2) Do not set ``qemu_bin``, and use a QEMU binary named like + "${arch}-softmmu/qemu-system-${arch}", either in the current + working directory, or in the current source tree. + +The resulting ``qemu_bin`` value will be preserved in the +``avocado_qemu.Test`` as an attribute with the same name. + +Attribute reference +------------------- + +Besides the attributes and methods that are part of the base +``avocado.Test`` class, the following attributes are available on any +``avocado_qemu.Test`` instance. + +vm +~~ + +A QEMUMachine instance, initially configured according to the given +``qemu_bin`` parameter. + +qemu_bin +~~~~~~~~ + +The preserved value of the ``qemu_bin`` parameter or the result of the +dynamic probe for a QEMU binary in the current working directory or +source tree. + +Parameter reference +------------------- + +To understand how Avocado parameters are accessed by tests, and how +they can be passed to tests, please refer to:: + + http://avocado-framework.readthedocs.io/en/latest/WritingTests.html#accessing-test-parameters + +Parameter values can be easily seen in the log files, and will look +like the following: + +.. code:: + + PARAMS (key=qemu_bin, path=*, default=x86_64-softmmu/qemu-system-x86_64) => 'x86_64-softmmu/qemu-system-x86_64 + +qemu_bin +~~~~~~~~ + +The exact QEMU binary to be used on QEMUMachine. + +Uninstalling Avocado +-------------------- + +If you've followed the installation instructions above, you can easily +uninstall Avocado. Start by listing the packages you have installed:: + + pip list --user + +And remove any package you want with:: + + pip uninstall <package_name> diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c index e4c492e..277ed87 100644 --- a/hw/arm/sysbus-fdt.c +++ b/hw/arm/sysbus-fdt.c @@ -36,6 +36,7 @@ #include "hw/vfio/vfio-platform.h" #include "hw/vfio/vfio-calxeda-xgmac.h" #include "hw/vfio/vfio-amd-xgbe.h" +#include "hw/display/ramfb.h" #include "hw/arm/fdt.h" /* @@ -406,12 +407,18 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque) #endif /* CONFIG_LINUX */ +static int no_fdt_node(SysBusDevice *sbdev, void *opaque) +{ + return 0; +} + /* list of supported dynamic sysbus devices */ static const NodeCreationPair add_fdt_node_functions[] = { #ifdef CONFIG_LINUX {TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node}, {TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node}, #endif + {TYPE_RAMFB_DEVICE, no_fdt_node}, {"", NULL}, /* last element */ }; diff --git a/hw/arm/virt.c b/hw/arm/virt.c index f0a4fa0..98b99cf 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -36,6 +36,7 @@ #include "hw/arm/virt.h" #include "hw/vfio/vfio-calxeda-xgmac.h" #include "hw/vfio/vfio-amd-xgbe.h" +#include "hw/display/ramfb.h" #include "hw/devices.h" #include "net/net.h" #include "sysemu/device_tree.h" @@ -1659,6 +1660,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->max_cpus = 255; machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_CALXEDA_XGMAC); machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE); + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); mc->block_default_type = IF_VIRTIO; mc->no_cdrom = 1; mc->pci_allow_0_address = true; diff --git a/hw/core/bus.c b/hw/core/bus.c index ad0c9df..4651f24 100644 --- a/hw/core/bus.c +++ b/hw/core/bus.c @@ -102,6 +102,7 @@ static void qbus_realize(BusState *bus, DeviceState *parent, const char *name) QLIST_INSERT_HEAD(&bus->parent->child_bus, bus, sibling); bus->parent->num_child_bus++; object_property_add_child(OBJECT(bus->parent), bus->name, OBJECT(bus), NULL); + object_unref(OBJECT(bus)); } else if (bus != sysbus_get_default()) { /* TODO: once all bus devices are qdevified, only reset handler for main_system_bus should be registered here. */ diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs index b5d97ab..fb8408c 100644 --- a/hw/display/Makefile.objs +++ b/hw/display/Makefile.objs @@ -1,3 +1,6 @@ +common-obj-y += ramfb.o +common-obj-y += ramfb-standalone.o + common-obj-$(CONFIG_ADS7846) += ads7846.o common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o common-obj-$(CONFIG_G364FB) += g364fb.o diff --git a/hw/display/ramfb-standalone.c b/hw/display/ramfb-standalone.c new file mode 100644 index 0000000..c0d241b --- /dev/null +++ b/hw/display/ramfb-standalone.c @@ -0,0 +1,62 @@ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/loader.h" +#include "hw/isa/isa.h" +#include "hw/display/ramfb.h" +#include "ui/console.h" +#include "sysemu/sysemu.h" + +#define RAMFB(obj) OBJECT_CHECK(RAMFBStandaloneState, (obj), TYPE_RAMFB_DEVICE) + +typedef struct RAMFBStandaloneState { + SysBusDevice parent_obj; + QemuConsole *con; + RAMFBState *state; +} RAMFBStandaloneState; + +static void display_update_wrapper(void *dev) +{ + RAMFBStandaloneState *ramfb = RAMFB(dev); + + if (0 /* native driver active */) { + /* non-standalone device would run native display update here */; + } else { + ramfb_display_update(ramfb->con, ramfb->state); + } +} + +static const GraphicHwOps wrapper_ops = { + .gfx_update = display_update_wrapper, +}; + +static void ramfb_realizefn(DeviceState *dev, Error **errp) +{ + RAMFBStandaloneState *ramfb = RAMFB(dev); + + ramfb->con = graphic_console_init(dev, 0, &wrapper_ops, dev); + ramfb->state = ramfb_setup(errp); +} + +static void ramfb_class_initfn(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories); + dc->realize = ramfb_realizefn; + dc->desc = "ram framebuffer standalone device"; + dc->user_creatable = true; +} + +static const TypeInfo ramfb_info = { + .name = TYPE_RAMFB_DEVICE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(RAMFBStandaloneState), + .class_init = ramfb_class_initfn, +}; + +static void ramfb_register_types(void) +{ + type_register_static(&ramfb_info); +} + +type_init(ramfb_register_types) diff --git a/hw/display/ramfb.c b/hw/display/ramfb.c new file mode 100644 index 0000000..6867bce --- /dev/null +++ b/hw/display/ramfb.c @@ -0,0 +1,95 @@ +/* + * early boot framebuffer in guest ram + * configured using fw_cfg + * + * Copyright Red Hat, Inc. 2017 + * + * Author: + * Gerd Hoffmann <kraxel@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/loader.h" +#include "hw/display/ramfb.h" +#include "ui/console.h" +#include "sysemu/sysemu.h" + +struct QEMU_PACKED RAMFBCfg { + uint64_t addr; + uint32_t fourcc; + uint32_t flags; + uint32_t width; + uint32_t height; + uint32_t stride; +}; + +struct RAMFBState { + DisplaySurface *ds; + uint32_t width, height; + struct RAMFBCfg cfg; +}; + +static void ramfb_fw_cfg_write(void *dev, off_t offset, size_t len) +{ + RAMFBState *s = dev; + void *framebuffer; + uint32_t stride, fourcc, format; + hwaddr addr, length; + + s->width = be32_to_cpu(s->cfg.width); + s->height = be32_to_cpu(s->cfg.height); + stride = be32_to_cpu(s->cfg.stride); + fourcc = be32_to_cpu(s->cfg.fourcc); + addr = be64_to_cpu(s->cfg.addr); + length = stride * s->height; + format = qemu_drm_format_to_pixman(fourcc); + + fprintf(stderr, "%s: %dx%d @ 0x%" PRIx64 "\n", __func__, + s->width, s->height, addr); + framebuffer = address_space_map(&address_space_memory, + addr, &length, false, + MEMTXATTRS_UNSPECIFIED); + if (!framebuffer || length < stride * s->height) { + s->width = 0; + s->height = 0; + return; + } + s->ds = qemu_create_displaysurface_from(s->width, s->height, + format, stride, framebuffer); +} + +void ramfb_display_update(QemuConsole *con, RAMFBState *s) +{ + if (!s->width || !s->height) { + return; + } + + if (s->ds) { + dpy_gfx_replace_surface(con, s->ds); + s->ds = NULL; + } + + /* simple full screen update */ + dpy_gfx_update_full(con); +} + +RAMFBState *ramfb_setup(Error **errp) +{ + FWCfgState *fw_cfg = fw_cfg_find(); + RAMFBState *s; + + if (!fw_cfg || !fw_cfg->dma_enabled) { + error_setg(errp, "ramfb device requires fw_cfg with DMA"); + return NULL; + } + + s = g_new0(RAMFBState, 1); + + fw_cfg_add_file_callback(fw_cfg, "etc/ramfb", + NULL, ramfb_fw_cfg_write, s, + &s->cfg, sizeof(s->cfg), false); + return s; +} diff --git a/hw/display/sm501.c b/hw/display/sm501.c index e47be99..ca0840f 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -836,27 +836,30 @@ static void sm501_system_config_write(void *opaque, hwaddr addr, switch (addr) { case SM501_SYSTEM_CONTROL: - s->system_control = value & 0xE300B8F7; + s->system_control &= 0x10DB0000; + s->system_control |= value & 0xEF00B8F7; break; case SM501_MISC_CONTROL: - s->misc_control = value & 0xFF7FFF20; + s->misc_control &= 0xEF; + s->misc_control |= value & 0xFF7FFF10; break; case SM501_GPIO31_0_CONTROL: s->gpio_31_0_control = value; break; case SM501_GPIO63_32_CONTROL: - s->gpio_63_32_control = value; + s->gpio_63_32_control = value & 0xFF80FFFF; break; case SM501_DRAM_CONTROL: s->local_mem_size_index = (value >> 13) & 0x7; /* TODO : check validity of size change */ - s->dram_control |= value & 0x7FFFFFC3; + s->dram_control &= 0x80000000; + s->dram_control |= value & 0x7FFFFFC3; break; case SM501_ARBTRTN_CONTROL: - s->arbitration_control = value & 0x37777777; + s->arbitration_control = value & 0x37777777; break; case SM501_IRQ_MASK: - s->irq_mask = value; + s->irq_mask = value & 0xFFDF3F5F; break; case SM501_MISC_TIMING: s->misc_timing = value & 0xF31F1FFF; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 3b87f3c..e9b6f06 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -28,6 +28,7 @@ #include "hw/loader.h" #include "hw/i386/pc.h" #include "hw/i386/apic.h" +#include "hw/display/ramfb.h" #include "hw/smbios/smbios.h" #include "hw/pci/pci.h" #include "hw/pci/pci_ids.h" @@ -423,6 +424,7 @@ static void pc_i440fx_machine_options(MachineClass *m) m->desc = "Standard PC (i440FX + PIIX, 1996)"; m->default_machine_opts = "firmware=bios-256k.bin"; m->default_display = "std"; + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); } static void pc_i440fx_3_0_machine_options(MachineClass *m) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 087f263..1a73e18 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -45,6 +45,7 @@ #include "hw/i386/ich9.h" #include "hw/i386/amd_iommu.h" #include "hw/i386/intel_iommu.h" +#include "hw/display/ramfb.h" #include "hw/smbios/smbios.h" #include "hw/ide/pci.h" #include "hw/ide/ahci.h" @@ -305,6 +306,7 @@ static void pc_q35_machine_options(MachineClass *m) m->no_floppy = 1; machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); m->max_cpus = 288; } diff --git a/hw/input/adb-kbd.c b/hw/input/adb-kbd.c index 50b6271..b026e9d 100644 --- a/hw/input/adb-kbd.c +++ b/hw/input/adb-kbd.c @@ -261,18 +261,21 @@ static int adb_kbd_request(ADBDevice *d, uint8_t *obuf, trace_adb_kbd_request_change_addr(d->devaddr); break; default: - d->devaddr = buf[1] & 0xf; - /* we support handlers: - * 1: Apple Standard Keyboard - * 2: Apple Extended Keyboard (LShift = RShift) - * 3: Apple Extended Keyboard (LShift != RShift) - */ - if (buf[2] == 1 || buf[2] == 2 || buf[2] == 3) { - d->handler = buf[2]; + if (!d->disable_direct_reg3_writes) { + d->devaddr = buf[1] & 0xf; + + /* we support handlers: + * 1: Apple Standard Keyboard + * 2: Apple Extended Keyboard (LShift = RShift) + * 3: Apple Extended Keyboard (LShift != RShift) + */ + if (buf[2] == 1 || buf[2] == 2 || buf[2] == 3) { + d->handler = buf[2]; + } + + trace_adb_kbd_request_change_addr_and_handler(d->devaddr, + d->handler); } - - trace_adb_kbd_request_change_addr_and_handler(d->devaddr, - d->handler); break; } } @@ -290,8 +293,8 @@ static int adb_kbd_request(ADBDevice *d, uint8_t *obuf, olen = 2; break; case 3: - obuf[0] = d->handler; - obuf[1] = d->devaddr; + obuf[0] = d->devaddr; + obuf[1] = d->handler; olen = 2; break; } diff --git a/hw/input/adb-mouse.c b/hw/input/adb-mouse.c index 3ba6027..83833b0 100644 --- a/hw/input/adb-mouse.c +++ b/hw/input/adb-mouse.c @@ -142,24 +142,27 @@ static int adb_mouse_request(ADBDevice *d, uint8_t *obuf, trace_adb_mouse_request_change_addr(d->devaddr); break; default: - d->devaddr = buf[1] & 0xf; - /* we support handlers: - * 0x01: Classic Apple Mouse Protocol / 100 cpi operations - * 0x02: Classic Apple Mouse Protocol / 200 cpi operations - * we don't support handlers (at least): - * 0x03: Mouse systems A3 trackball - * 0x04: Extended Apple Mouse Protocol - * 0x2f: Microspeed mouse - * 0x42: Macally - * 0x5f: Microspeed mouse - * 0x66: Microspeed mouse - */ - if (buf[2] == 1 || buf[2] == 2) { - d->handler = buf[2]; + if (!d->disable_direct_reg3_writes) { + d->devaddr = buf[1] & 0xf; + + /* we support handlers: + * 0x01: Classic Apple Mouse Protocol / 100 cpi operations + * 0x02: Classic Apple Mouse Protocol / 200 cpi operations + * we don't support handlers (at least): + * 0x03: Mouse systems A3 trackball + * 0x04: Extended Apple Mouse Protocol + * 0x2f: Microspeed mouse + * 0x42: Macally + * 0x5f: Microspeed mouse + * 0x66: Microspeed mouse + */ + if (buf[2] == 1 || buf[2] == 2) { + d->handler = buf[2]; + } + + trace_adb_mouse_request_change_addr_and_handler( + d->devaddr, d->handler); } - - trace_adb_mouse_request_change_addr_and_handler(d->devaddr, - d->handler); break; } } @@ -172,8 +175,8 @@ static int adb_mouse_request(ADBDevice *d, uint8_t *obuf, case 1: break; case 3: - obuf[0] = d->handler; - obuf[1] = d->devaddr; + obuf[0] = d->devaddr; + obuf[1] = d->handler; olen = 2; break; } diff --git a/hw/input/adb.c b/hw/input/adb.c index 23ae6f0..bbb40ae 100644 --- a/hw/input/adb.c +++ b/hw/input/adb.c @@ -113,11 +113,18 @@ static void adb_device_realizefn(DeviceState *dev, Error **errp) bus->devices[bus->nb_devices++] = d; } +static Property adb_device_properties[] = { + DEFINE_PROP_BOOL("disable-direct-reg3-writes", ADBDevice, + disable_direct_reg3_writes, false), + DEFINE_PROP_END_OF_LIST(), +}; + static void adb_device_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); dc->realize = adb_device_realizefn; + dc->props = adb_device_properties; dc->bus_type = TYPE_ADB_BUS; } diff --git a/hw/input/ps2.c b/hw/input/ps2.c index eeec618..fdfcadf 100644 --- a/hw/input/ps2.c +++ b/hw/input/ps2.c @@ -927,7 +927,7 @@ static void ps2_common_post_load(PS2State *s) /* reset rptr/wptr/count */ q->rptr = 0; - q->wptr = size; + q->wptr = (size == PS2_QUEUE_SIZE) ? 0 : size; q->count = size; s->update_irq(s->update_arg, q->count != 0); } diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 8bdf6af..8dba2f8 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -186,8 +186,7 @@ static void ics_get_kvm_state(ICSState *ics) kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES, i + ics->offset, &state, false, &local_err); if (local_err) { - error_report("Unable to retrieve KVM interrupt controller state" - " for IRQ %d: %s", i + ics->offset, strerror(errno)); + error_report_err(local_err); exit(1); } @@ -273,11 +272,10 @@ static int ics_set_kvm_state(ICSState *ics, int version_id) state |= KVM_XICS_QUEUED; } - kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES, - i + ics->offset, &state, true, &local_err); + ret = kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES, + i + ics->offset, &state, true, &local_err); if (local_err) { - error_report("Unable to restore KVM interrupt controller state" - " for IRQs %d: %s", i + ics->offset, strerror(errno)); + error_report_err(local_err); return ret; } } diff --git a/hw/isa/smc37c669-superio.c b/hw/isa/smc37c669-superio.c index aa233c6..64466a9 100644 --- a/hw/isa/smc37c669-superio.c +++ b/hw/isa/smc37c669-superio.c @@ -37,7 +37,7 @@ static bool is_parallel_enabled(ISASuperIODevice *sio, uint8_t index) static uint16_t get_parallel_iobase(ISASuperIODevice *sio, uint8_t index) { - return 0x3bc; + return 0x378; } static unsigned int get_parallel_irq(ISASuperIODevice *sio, uint8_t index) diff --git a/hw/misc/macio/Makefile.objs b/hw/misc/macio/Makefile.objs index ef7ac24..07fdb32 100644 --- a/hw/misc/macio/Makefile.objs +++ b/hw/misc/macio/Makefile.objs @@ -1,3 +1,5 @@ common-obj-y += macio.o common-obj-$(CONFIG_CUDA) += cuda.o +common-obj-$(CONFIG_MAC_PMU) += pmu.o common-obj-$(CONFIG_MAC_DBDMA) += mac_dbdma.o +common-obj-$(CONFIG_MACIO_GPIO) += gpio.o diff --git a/hw/misc/macio/gpio.c b/hw/misc/macio/gpio.c new file mode 100644 index 0000000..9317df7 --- /dev/null +++ b/hw/misc/macio/gpio.c @@ -0,0 +1,231 @@ +/* + * PowerMac NewWorld MacIO GPIO emulation + * + * Copyright (c) 2016 Benjamin Herrenschmidt + * Copyright (c) 2018 Mark Cave-Ayland + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "hw/ppc/mac.h" +#include "hw/misc/macio/macio.h" +#include "hw/misc/macio/gpio.h" +#include "hw/nmi.h" +#include "qemu/log.h" +#include "trace.h" + + +void macio_set_gpio(MacIOGPIOState *s, uint32_t gpio, bool state) +{ + uint8_t new_reg; + + trace_macio_set_gpio(gpio, state); + + if (s->gpio_regs[gpio] & 4) { + qemu_log_mask(LOG_GUEST_ERROR, + "GPIO: Setting GPIO %d while it's an output\n", gpio); + } + + new_reg = s->gpio_regs[gpio] & ~2; + if (state) { + new_reg |= 2; + } + + if (new_reg == s->gpio_regs[gpio]) { + return; + } + + s->gpio_regs[gpio] = new_reg; + + /* This is will work until we fix the binding between MacIO and + * the MPIC properly so we can route all GPIOs and avoid going + * via the top level platform code. + * + * Note that we probably need to get access to the MPIC config to + * decode polarity since qemu always use "raise" regardless. + * + * For now, we hard wire known GPIOs + */ + + switch (gpio) { + case 1: + /* Level low */ + if (!state) { + trace_macio_gpio_irq_assert(gpio); + qemu_irq_raise(s->gpio_extirqs[gpio]); + } else { + trace_macio_gpio_irq_deassert(gpio); + qemu_irq_lower(s->gpio_extirqs[gpio]); + } + break; + + case 9: + /* Edge, triggered by NMI below */ + if (state) { + trace_macio_gpio_irq_assert(gpio); + qemu_irq_raise(s->gpio_extirqs[gpio]); + } else { + trace_macio_gpio_irq_deassert(gpio); + qemu_irq_lower(s->gpio_extirqs[gpio]); + } + break; + + default: + qemu_log_mask(LOG_UNIMP, "GPIO: setting unimplemented GPIO %d", gpio); + } +} + +static void macio_gpio_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + MacIOGPIOState *s = opaque; + uint8_t ibit; + + trace_macio_gpio_write(addr, value); + + /* Levels regs are read-only */ + if (addr < 8) { + return; + } + + addr -= 8; + if (addr < 36) { + value &= ~2; + + if (value & 4) { + ibit = (value & 1) << 1; + } else { + ibit = s->gpio_regs[addr] & 2; + } + + s->gpio_regs[addr] = value | ibit; + } +} + +static uint64_t macio_gpio_read(void *opaque, hwaddr addr, unsigned size) +{ + MacIOGPIOState *s = opaque; + uint64_t val = 0; + + /* Levels regs */ + if (addr < 8) { + val = s->gpio_levels[addr]; + } else { + addr -= 8; + + if (addr < 36) { + val = s->gpio_regs[addr]; + } + } + + trace_macio_gpio_write(addr, val); + return val; +} + +static const MemoryRegionOps macio_gpio_ops = { + .read = macio_gpio_read, + .write = macio_gpio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 1, + }, +}; + +static void macio_gpio_realize(DeviceState *dev, Error **errp) +{ + MacIOGPIOState *s = MACIO_GPIO(dev); + + s->gpio_extirqs[1] = qdev_get_gpio_in(DEVICE(s->pic), + NEWWORLD_EXTING_GPIO1); + s->gpio_extirqs[9] = qdev_get_gpio_in(DEVICE(s->pic), + NEWWORLD_EXTING_GPIO9); +} + +static void macio_gpio_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + MacIOGPIOState *s = MACIO_GPIO(obj); + + object_property_add_link(obj, "pic", TYPE_OPENPIC, + (Object **) &s->pic, + qdev_prop_allow_set_link_before_realize, + 0, NULL); + + memory_region_init_io(&s->gpiomem, OBJECT(s), &macio_gpio_ops, obj, + "gpio", 0x30); + sysbus_init_mmio(sbd, &s->gpiomem); +} + +static const VMStateDescription vmstate_macio_gpio = { + .name = "macio_gpio", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(gpio_levels, MacIOGPIOState, 8), + VMSTATE_UINT8_ARRAY(gpio_regs, MacIOGPIOState, 36), + VMSTATE_END_OF_LIST() + } +}; + +static void macio_gpio_reset(DeviceState *dev) +{ + MacIOGPIOState *s = MACIO_GPIO(dev); + + /* GPIO 1 is up by default */ + macio_set_gpio(s, 1, true); +} + +static void macio_gpio_nmi(NMIState *n, int cpu_index, Error **errp) +{ + macio_set_gpio(MACIO_GPIO(n), 9, true); + macio_set_gpio(MACIO_GPIO(n), 9, false); +} + +static void macio_gpio_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + NMIClass *nc = NMI_CLASS(oc); + + dc->realize = macio_gpio_realize; + dc->reset = macio_gpio_reset; + dc->vmsd = &vmstate_macio_gpio; + nc->nmi_monitor_handler = macio_gpio_nmi; +} + +static const TypeInfo macio_gpio_init_info = { + .name = TYPE_MACIO_GPIO, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(MacIOGPIOState), + .instance_init = macio_gpio_init, + .class_init = macio_gpio_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_NMI }, + { } + }, +}; + +static void macio_gpio_register_types(void) +{ + type_register_static(&macio_gpio_init_info); +} + +type_init(macio_gpio_register_types) diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c index f9a40ee..d135e3b 100644 --- a/hw/misc/macio/macio.c +++ b/hw/misc/macio/macio.c @@ -105,17 +105,6 @@ static void macio_common_realize(PCIDevice *d, Error **errp) memory_region_add_subregion(&s->bar, 0x08000, sysbus_mmio_get_region(sysbus_dev, 0)); - qdev_prop_set_uint64(DEVICE(&s->cuda), "timebase-frequency", - s->frequency); - object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err); - if (err) { - error_propagate(errp, err); - return; - } - sysbus_dev = SYS_BUS_DEVICE(&s->cuda); - memory_region_add_subregion(&s->bar, 0x16000, - sysbus_mmio_get_region(sysbus_dev, 0)); - qdev_prop_set_uint32(DEVICE(&s->escc), "disabled", 0); qdev_prop_set_uint32(DEVICE(&s->escc), "frequency", ESCC_CLOCK); qdev_prop_set_uint32(DEVICE(&s->escc), "it_shift", 4); @@ -163,7 +152,16 @@ static void macio_oldworld_realize(PCIDevice *d, Error **errp) return; } + qdev_prop_set_uint64(DEVICE(&s->cuda), "timebase-frequency", + s->frequency); + object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } sysbus_dev = SYS_BUS_DEVICE(&s->cuda); + memory_region_add_subregion(&s->bar, 0x16000, + sysbus_mmio_get_region(sysbus_dev, 0)); sysbus_connect_irq(sysbus_dev, 0, qdev_get_gpio_in(pic_dev, OLDWORLD_CUDA_IRQ)); @@ -234,6 +232,10 @@ static void macio_oldworld_init(Object *obj) qdev_prop_allow_set_link_before_realize, 0, NULL); + object_initialize(&s->cuda, sizeof(s->cuda), TYPE_CUDA); + qdev_set_parent_bus(DEVICE(&s->cuda), sysbus_get_default()); + object_property_add_child(obj, "cuda", OBJECT(&s->cuda), NULL); + object_initialize(&os->nvram, sizeof(os->nvram), TYPE_MACIO_NVRAM); dev = DEVICE(&os->nvram); qdev_prop_set_uint32(dev, "size", 0x2000); @@ -293,10 +295,6 @@ static void macio_newworld_realize(PCIDevice *d, Error **errp) return; } - sysbus_dev = SYS_BUS_DEVICE(&s->cuda); - sysbus_connect_irq(sysbus_dev, 0, qdev_get_gpio_in(pic_dev, - NEWWORLD_CUDA_IRQ)); - sysbus_dev = SYS_BUS_DEVICE(&s->escc); sysbus_connect_irq(sysbus_dev, 0, qdev_get_gpio_in(pic_dev, NEWWORLD_ESCCB_IRQ)); @@ -332,6 +330,53 @@ static void macio_newworld_realize(PCIDevice *d, Error **errp) memory_region_init_io(timer_memory, OBJECT(s), &timer_ops, NULL, "timer", 0x1000); memory_region_add_subregion(&s->bar, 0x15000, timer_memory); + + if (ns->has_pmu) { + /* GPIOs */ + sysbus_dev = SYS_BUS_DEVICE(&ns->gpio); + object_property_set_link(OBJECT(&ns->gpio), OBJECT(pic_dev), "pic", + &error_abort); + memory_region_add_subregion(&s->bar, 0x50, + sysbus_mmio_get_region(sysbus_dev, 0)); + object_property_set_bool(OBJECT(&ns->gpio), true, "realized", &err); + + /* PMU */ + object_initialize(&s->pmu, sizeof(s->pmu), TYPE_VIA_PMU); + object_property_set_link(OBJECT(&s->pmu), OBJECT(sysbus_dev), "gpio", + &error_abort); + qdev_prop_set_bit(DEVICE(&s->pmu), "has-adb", ns->has_adb); + qdev_set_parent_bus(DEVICE(&s->pmu), sysbus_get_default()); + object_property_add_child(OBJECT(s), "pmu", OBJECT(&s->pmu), NULL); + + object_property_set_bool(OBJECT(&s->pmu), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + sysbus_dev = SYS_BUS_DEVICE(&s->pmu); + sysbus_connect_irq(sysbus_dev, 0, qdev_get_gpio_in(pic_dev, + NEWWORLD_PMU_IRQ)); + memory_region_add_subregion(&s->bar, 0x16000, + sysbus_mmio_get_region(sysbus_dev, 0)); + } else { + /* CUDA */ + object_initialize(&s->cuda, sizeof(s->cuda), TYPE_CUDA); + qdev_set_parent_bus(DEVICE(&s->cuda), sysbus_get_default()); + object_property_add_child(OBJECT(s), "cuda", OBJECT(&s->cuda), NULL); + qdev_prop_set_uint64(DEVICE(&s->cuda), "timebase-frequency", + s->frequency); + + object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + sysbus_dev = SYS_BUS_DEVICE(&s->cuda); + sysbus_connect_irq(sysbus_dev, 0, qdev_get_gpio_in(pic_dev, + NEWWORLD_CUDA_IRQ)); + memory_region_add_subregion(&s->bar, 0x16000, + sysbus_mmio_get_region(sysbus_dev, 0)); + } } static void macio_newworld_init(Object *obj) @@ -345,6 +390,9 @@ static void macio_newworld_init(Object *obj) qdev_prop_allow_set_link_before_realize, 0, NULL); + object_initialize(&ns->gpio, sizeof(ns->gpio), TYPE_MACIO_GPIO); + qdev_set_parent_bus(DEVICE(&ns->gpio), sysbus_get_default()); + for (i = 0; i < 2; i++) { macio_init_ide(s, &ns->ide[i], sizeof(ns->ide[i]), i); } @@ -356,10 +404,6 @@ static void macio_instance_init(Object *obj) memory_region_init(&s->bar, obj, "macio", 0x80000); - object_initialize(&s->cuda, sizeof(s->cuda), TYPE_CUDA); - qdev_set_parent_bus(DEVICE(&s->cuda), sysbus_get_default()); - object_property_add_child(obj, "cuda", OBJECT(&s->cuda), NULL); - object_initialize(&s->dbdma, sizeof(s->dbdma), TYPE_MAC_DBDMA); qdev_set_parent_bus(DEVICE(&s->dbdma), sysbus_get_default()); object_property_add_child(obj, "dbdma", OBJECT(&s->dbdma), NULL); @@ -399,6 +443,12 @@ static const VMStateDescription vmstate_macio_newworld = { } }; +static Property macio_newworld_properties[] = { + DEFINE_PROP_BOOL("has-pmu", NewWorldMacIOState, has_pmu, false), + DEFINE_PROP_BOOL("has-adb", NewWorldMacIOState, has_adb, false), + DEFINE_PROP_END_OF_LIST() +}; + static void macio_newworld_class_init(ObjectClass *oc, void *data) { PCIDeviceClass *pdc = PCI_DEVICE_CLASS(oc); @@ -407,6 +457,7 @@ static void macio_newworld_class_init(ObjectClass *oc, void *data) pdc->realize = macio_newworld_realize; pdc->device_id = PCI_DEVICE_ID_APPLE_UNI_N_KEYL; dc->vmsd = &vmstate_macio_newworld; + dc->props = macio_newworld_properties; } static Property macio_properties[] = { diff --git a/hw/misc/macio/pmu.c b/hw/misc/macio/pmu.c new file mode 100644 index 0000000..e246b0f --- /dev/null +++ b/hw/misc/macio/pmu.c @@ -0,0 +1,871 @@ +/* + * QEMU PowerMac PMU device support + * + * Copyright (c) 2016 Benjamin Herrenschmidt, IBM Corp. + * Copyright (c) 2018 Mark Cave-Ayland + * + * Based on the CUDA device by: + * + * Copyright (c) 2004-2007 Fabrice Bellard + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "hw/ppc/mac.h" +#include "hw/input/adb.h" +#include "hw/misc/mos6522.h" +#include "hw/misc/macio/gpio.h" +#include "hw/misc/macio/pmu.h" +#include "qemu/timer.h" +#include "sysemu/sysemu.h" +#include "qemu/cutils.h" +#include "qemu/log.h" +#include "trace.h" + + +/* Bits in B data register: all active low */ +#define TACK 0x08 /* Transfer request (input) */ +#define TREQ 0x10 /* Transfer acknowledge (output) */ + +/* PMU returns time_t's offset from Jan 1, 1904, not 1970 */ +#define RTC_OFFSET 2082844800 + +#define VIA_TIMER_FREQ (4700000 / 6) + +static void via_update_irq(PMUState *s) +{ + MOS6522PMUState *mps = MOS6522_PMU(&s->mos6522_pmu); + MOS6522State *ms = MOS6522(mps); + + bool new_state = !!(ms->ifr & ms->ier & (SR_INT | T1_INT | T2_INT)); + + if (new_state != s->via_irq_state) { + s->via_irq_state = new_state; + qemu_set_irq(s->via_irq, new_state); + } +} + +static void via_set_sr_int(void *opaque) +{ + PMUState *s = opaque; + MOS6522PMUState *mps = MOS6522_PMU(&s->mos6522_pmu); + MOS6522State *ms = MOS6522(mps); + MOS6522DeviceClass *mdc = MOS6522_DEVICE_GET_CLASS(ms); + + mdc->set_sr_int(ms); +} + +static void pmu_update_extirq(PMUState *s) +{ + if ((s->intbits & s->intmask) != 0) { + macio_set_gpio(s->gpio, 1, false); + } else { + macio_set_gpio(s->gpio, 1, true); + } +} + +static void pmu_adb_poll(void *opaque) +{ + PMUState *s = opaque; + int olen; + + if (!(s->intbits & PMU_INT_ADB)) { + olen = adb_poll(&s->adb_bus, s->adb_reply, s->adb_poll_mask); + trace_pmu_adb_poll(olen); + + if (olen > 0) { + s->adb_reply_size = olen; + s->intbits |= PMU_INT_ADB | PMU_INT_ADB_AUTO; + pmu_update_extirq(s); + } + } + + timer_mod(s->adb_poll_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 30); +} + +static void pmu_one_sec_timer(void *opaque) +{ + PMUState *s = opaque; + + trace_pmu_one_sec_timer(); + + s->intbits |= PMU_INT_TICK; + pmu_update_extirq(s); + s->one_sec_target += 1000; + + timer_mod(s->one_sec_timer, s->one_sec_target); +} + +static void pmu_cmd_int_ack(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + if (in_len != 0) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: INT_ACK command, invalid len: %d want: 0\n", + in_len); + return; + } + + /* Make appropriate reply packet */ + if (s->intbits & PMU_INT_ADB) { + if (!s->adb_reply_size) { + qemu_log_mask(LOG_GUEST_ERROR, + "Odd, PMU_INT_ADB set with no reply in buffer\n"); + } + + memcpy(out_data + 1, s->adb_reply, s->adb_reply_size); + out_data[0] = s->intbits & (PMU_INT_ADB | PMU_INT_ADB_AUTO); + *out_len = s->adb_reply_size + 1; + s->intbits &= ~(PMU_INT_ADB | PMU_INT_ADB_AUTO); + s->adb_reply_size = 0; + } else { + out_data[0] = s->intbits; + s->intbits = 0; + *out_len = 1; + } + + pmu_update_extirq(s); +} + +static void pmu_cmd_set_int_mask(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + if (in_len != 1) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: SET_INT_MASK command, invalid len: %d want: 1\n", + in_len); + return; + } + + trace_pmu_cmd_set_int_mask(s->intmask); + s->intmask = in_data[0]; + + pmu_update_extirq(s); +} + +static void pmu_cmd_set_adb_autopoll(PMUState *s, uint16_t mask) +{ + trace_pmu_cmd_set_adb_autopoll(mask); + + if (s->autopoll_mask == mask) { + return; + } + + s->autopoll_mask = mask; + if (mask) { + timer_mod(s->adb_poll_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 30); + } else { + timer_del(s->adb_poll_timer); + } +} + +static void pmu_cmd_adb(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + int len, adblen; + uint8_t adb_cmd[255]; + + if (in_len < 2) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: ADB PACKET, invalid len: %d want at least 2\n", + in_len); + return; + } + + *out_len = 0; + + if (!s->has_adb) { + trace_pmu_cmd_adb_nobus(); + return; + } + + /* Set autopoll is a special form of the command */ + if (in_data[0] == 0 && in_data[1] == 0x86) { + uint16_t mask = in_data[2]; + mask = (mask << 8) | in_data[3]; + if (in_len != 4) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: ADB Autopoll requires 4 bytes, got %d\n", + in_len); + return; + } + + pmu_cmd_set_adb_autopoll(s, mask); + return; + } + + trace_pmu_cmd_adb_request(in_len, in_data[0], in_data[1], in_data[2], + in_data[3], in_data[4]); + + *out_len = 0; + + /* Check ADB len */ + adblen = in_data[2]; + if (adblen > (in_len - 3)) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: ADB len is %d > %d (in_len -3)...erroring\n", + adblen, in_len - 3); + len = -1; + } else if (adblen > 252) { + qemu_log_mask(LOG_GUEST_ERROR, "PMU: ADB command too big!\n"); + len = -1; + } else { + /* Format command */ + adb_cmd[0] = in_data[0]; + memcpy(&adb_cmd[1], &in_data[3], in_len - 3); + len = adb_request(&s->adb_bus, s->adb_reply + 2, adb_cmd, in_len - 2); + + trace_pmu_cmd_adb_reply(len); + } + + if (len > 0) { + /* XXX Check this */ + s->adb_reply_size = len + 2; + s->adb_reply[0] = 0x01; + s->adb_reply[1] = len; + } else { + /* XXX Check this */ + s->adb_reply_size = 1; + s->adb_reply[0] = 0x00; + } + + s->intbits |= PMU_INT_ADB; + pmu_update_extirq(s); +} + +static void pmu_cmd_adb_poll_off(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + if (in_len != 0) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: ADB POLL OFF command, invalid len: %d want: 0\n", + in_len); + return; + } + + if (s->has_adb && s->autopoll_mask) { + timer_del(s->adb_poll_timer); + s->autopoll_mask = false; + } +} + +static void pmu_cmd_shutdown(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + if (in_len != 4) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: SHUTDOWN command, invalid len: %d want: 4\n", + in_len); + return; + } + + *out_len = 1; + out_data[0] = 0; + + if (in_data[0] != 'M' || in_data[1] != 'A' || in_data[2] != 'T' || + in_data[3] != 'T') { + + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: SHUTDOWN command, Bad MATT signature\n"); + return; + } + + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); +} + +static void pmu_cmd_reset(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + if (in_len != 0) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: RESET command, invalid len: %d want: 0\n", + in_len); + return; + } + + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); +} + +static void pmu_cmd_get_rtc(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + uint32_t ti; + + if (in_len != 0) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: GET_RTC command, invalid len: %d want: 0\n", + in_len); + return; + } + + ti = s->tick_offset + (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + / NANOSECONDS_PER_SECOND); + out_data[0] = ti >> 24; + out_data[1] = ti >> 16; + out_data[2] = ti >> 8; + out_data[3] = ti; + *out_len = 4; +} + +static void pmu_cmd_set_rtc(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + uint32_t ti; + + if (in_len != 4) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: SET_RTC command, invalid len: %d want: 4\n", + in_len); + return; + } + + ti = (((uint32_t)in_data[0]) << 24) + (((uint32_t)in_data[1]) << 16) + + (((uint32_t)in_data[2]) << 8) + in_data[3]; + + s->tick_offset = ti - (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + / NANOSECONDS_PER_SECOND); +} + +static void pmu_cmd_system_ready(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + /* Do nothing */ +} + +static void pmu_cmd_get_version(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + *out_len = 1; + *out_data = 1; /* ??? Check what Apple does */ +} + +static void pmu_cmd_power_events(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + if (in_len < 1) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: POWER EVENTS command, invalid len %d, want at least 1\n", + in_len); + return; + } + + switch (in_data[0]) { + /* Dummies for now */ + case PMU_PWR_GET_POWERUP_EVENTS: + *out_len = 2; + out_data[0] = 0; + out_data[1] = 0; + break; + case PMU_PWR_SET_POWERUP_EVENTS: + case PMU_PWR_CLR_POWERUP_EVENTS: + break; + case PMU_PWR_GET_WAKEUP_EVENTS: + *out_len = 2; + out_data[0] = 0; + out_data[1] = 0; + break; + case PMU_PWR_SET_WAKEUP_EVENTS: + case PMU_PWR_CLR_WAKEUP_EVENTS: + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: POWER EVENTS unknown subcommand 0x%02x\n", + in_data[0]); + } +} + +static void pmu_cmd_get_cover(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + /* Not 100% sure here, will have to check what a real Mac + * returns other than byte 0 bit 0 is LID closed on laptops + */ + *out_len = 1; + *out_data = 0x00; +} + +static void pmu_cmd_download_status(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + /* This has to do with PMU firmware updates as far as I can tell. + * + * We return 0x62 which is what OpenPMU expects + */ + *out_len = 1; + *out_data = 0x62; +} + +static void pmu_cmd_read_pmu_ram(PMUState *s, + const uint8_t *in_data, uint8_t in_len, + uint8_t *out_data, uint8_t *out_len) +{ + if (in_len < 3) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: READ_PMU_RAM command, invalid len %d, expected 3\n", + in_len); + return; + } + + qemu_log_mask(LOG_GUEST_ERROR, + "PMU: Unsupported READ_PMU_RAM, args: %02x %02x %02x\n", + in_data[0], in_data[1], in_data[2]); + + *out_len = 0; +} + +/* description of commands */ +typedef struct PMUCmdHandler { + uint8_t command; + const char *name; + void (*handler)(PMUState *s, + const uint8_t *in_args, uint8_t in_len, + uint8_t *out_args, uint8_t *out_len); +} PMUCmdHandler; + +static const PMUCmdHandler PMUCmdHandlers[] = { + { PMU_INT_ACK, "INT ACK", pmu_cmd_int_ack }, + { PMU_SET_INTR_MASK, "SET INT MASK", pmu_cmd_set_int_mask }, + { PMU_ADB_CMD, "ADB COMMAND", pmu_cmd_adb }, + { PMU_ADB_POLL_OFF, "ADB POLL OFF", pmu_cmd_adb_poll_off }, + { PMU_RESET, "REBOOT", pmu_cmd_reset }, + { PMU_SHUTDOWN, "SHUTDOWN", pmu_cmd_shutdown }, + { PMU_READ_RTC, "GET RTC", pmu_cmd_get_rtc }, + { PMU_SET_RTC, "SET RTC", pmu_cmd_set_rtc }, + { PMU_SYSTEM_READY, "SYSTEM READY", pmu_cmd_system_ready }, + { PMU_GET_VERSION, "GET VERSION", pmu_cmd_get_version }, + { PMU_POWER_EVENTS, "POWER EVENTS", pmu_cmd_power_events }, + { PMU_GET_COVER, "GET_COVER", pmu_cmd_get_cover }, + { PMU_DOWNLOAD_STATUS, "DOWNLOAD STATUS", pmu_cmd_download_status }, + { PMU_READ_PMU_RAM, "READ PMGR RAM", pmu_cmd_read_pmu_ram }, +}; + +static void pmu_dispatch_cmd(PMUState *s) +{ + unsigned int i; + + /* No response by default */ + s->cmd_rsp_sz = 0; + + for (i = 0; i < ARRAY_SIZE(PMUCmdHandlers); i++) { + const PMUCmdHandler *desc = &PMUCmdHandlers[i]; + + if (desc->command != s->cmd) { + continue; + } + + trace_pmu_dispatch_cmd(desc->name); + desc->handler(s, s->cmd_buf, s->cmd_buf_pos, + s->cmd_rsp, &s->cmd_rsp_sz); + + if (s->rsplen != -1 && s->rsplen != s->cmd_rsp_sz) { + trace_pmu_debug_protocol_string("QEMU internal cmd resp mismatch!"); + } else { + trace_pmu_debug_protocol_resp_size(s->cmd_rsp_sz); + } + + return; + } + + trace_pmu_dispatch_unknown_cmd(s->cmd); + + /* Manufacture fake response with 0's */ + if (s->rsplen == -1) { + s->cmd_rsp_sz = 0; + } else { + s->cmd_rsp_sz = s->rsplen; + memset(s->cmd_rsp, 0, s->rsplen); + } +} + +static void pmu_update(PMUState *s) +{ + MOS6522PMUState *mps = &s->mos6522_pmu; + MOS6522State *ms = MOS6522(mps); + + /* Only react to changes in reg B */ + if (ms->b == s->last_b) { + return; + } + s->last_b = ms->b; + + /* Check the TREQ / TACK state */ + switch (ms->b & (TREQ | TACK)) { + case TREQ: + /* This is an ack release, handle it and bail out */ + ms->b |= TACK; + s->last_b = ms->b; + + trace_pmu_debug_protocol_string("handshake: TREQ high, setting TACK"); + return; + case TACK: + /* This is a valid request, handle below */ + break; + case TREQ | TACK: + /* This is an idle state */ + return; + default: + /* Invalid state, log and ignore */ + trace_pmu_debug_protocol_error(ms->b); + return; + } + + /* If we wanted to handle commands asynchronously, this is where + * we would delay the clearing of TACK until we are ready to send + * the response + */ + + /* We have a request, handshake TACK so we don't stay in + * an invalid state. If we were concurrent with the OS we + * should only do this after we grabbed the SR but that isn't + * a problem here. + */ + + trace_pmu_debug_protocol_clear_treq(s->cmd_state); + + ms->b &= ~TACK; + s->last_b = ms->b; + + /* Act according to state */ + switch (s->cmd_state) { + case pmu_state_idle: + if (!(ms->acr & SR_OUT)) { + trace_pmu_debug_protocol_string("protocol error! " + "state idle, ACR reading"); + break; + } + + s->cmd = ms->sr; + via_set_sr_int(s); + s->cmdlen = pmu_data_len[s->cmd][0]; + s->rsplen = pmu_data_len[s->cmd][1]; + s->cmd_buf_pos = 0; + s->cmd_rsp_pos = 0; + s->cmd_state = pmu_state_cmd; + + trace_pmu_debug_protocol_cmd(s->cmd, s->cmdlen, s->rsplen); + break; + + case pmu_state_cmd: + if (!(ms->acr & SR_OUT)) { + trace_pmu_debug_protocol_string("protocol error! " + "state cmd, ACR reading"); + break; + } + + if (s->cmdlen == -1) { + trace_pmu_debug_protocol_cmdlen(ms->sr); + + s->cmdlen = ms->sr; + if (s->cmdlen > sizeof(s->cmd_buf)) { + trace_pmu_debug_protocol_cmd_toobig(s->cmdlen); + } + } else if (s->cmd_buf_pos < sizeof(s->cmd_buf)) { + s->cmd_buf[s->cmd_buf_pos++] = ms->sr; + } + + via_set_sr_int(s); + break; + + case pmu_state_rsp: + if (ms->acr & SR_OUT) { + trace_pmu_debug_protocol_string("protocol error! " + "state resp, ACR writing"); + break; + } + + if (s->rsplen == -1) { + trace_pmu_debug_protocol_cmd_send_resp_size(s->cmd_rsp_sz); + + ms->sr = s->cmd_rsp_sz; + s->rsplen = s->cmd_rsp_sz; + } else if (s->cmd_rsp_pos < s->cmd_rsp_sz) { + trace_pmu_debug_protocol_cmd_send_resp(s->cmd_rsp_pos, s->rsplen); + + ms->sr = s->cmd_rsp[s->cmd_rsp_pos++]; + } + + via_set_sr_int(s); + break; + } + + /* Check for state completion */ + if (s->cmd_state == pmu_state_cmd && s->cmdlen == s->cmd_buf_pos) { + trace_pmu_debug_protocol_string("Command reception complete, " + "dispatching..."); + + pmu_dispatch_cmd(s); + s->cmd_state = pmu_state_rsp; + } + + if (s->cmd_state == pmu_state_rsp && s->rsplen == s->cmd_rsp_pos) { + trace_pmu_debug_protocol_cmd_resp_complete(ms->ier); + + s->cmd_state = pmu_state_idle; + } +} + +static uint64_t mos6522_pmu_read(void *opaque, hwaddr addr, unsigned size) +{ + PMUState *s = opaque; + MOS6522PMUState *mps = &s->mos6522_pmu; + MOS6522State *ms = MOS6522(mps); + + addr = (addr >> 9) & 0xf; + return mos6522_read(ms, addr, size); +} + +static void mos6522_pmu_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + PMUState *s = opaque; + MOS6522PMUState *mps = &s->mos6522_pmu; + MOS6522State *ms = MOS6522(mps); + + addr = (addr >> 9) & 0xf; + mos6522_write(ms, addr, val, size); +} + +static const MemoryRegionOps mos6522_pmu_ops = { + .read = mos6522_pmu_read, + .write = mos6522_pmu_write, + .endianness = DEVICE_BIG_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 1, + }, +}; + +static bool pmu_adb_state_needed(void *opaque) +{ + PMUState *s = opaque; + + return s->has_adb; +} + +static const VMStateDescription vmstate_pmu_adb = { + .name = "pmu/adb", + .version_id = 0, + .minimum_version_id = 0, + .needed = pmu_adb_state_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT16(adb_poll_mask, PMUState), + VMSTATE_TIMER_PTR(adb_poll_timer, PMUState), + VMSTATE_UINT8(adb_reply_size, PMUState), + VMSTATE_BUFFER(adb_reply, PMUState), + } +}; + +static const VMStateDescription vmstate_pmu = { + .name = "pmu", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(mos6522_pmu.parent_obj, PMUState, 0, vmstate_mos6522, + MOS6522State), + VMSTATE_UINT8(last_b, PMUState), + VMSTATE_UINT8(cmd, PMUState), + VMSTATE_UINT32(cmdlen, PMUState), + VMSTATE_UINT32(rsplen, PMUState), + VMSTATE_UINT8(cmd_buf_pos, PMUState), + VMSTATE_BUFFER(cmd_buf, PMUState), + VMSTATE_UINT8(cmd_rsp_pos, PMUState), + VMSTATE_UINT8(cmd_rsp_sz, PMUState), + VMSTATE_BUFFER(cmd_rsp, PMUState), + VMSTATE_UINT8(intbits, PMUState), + VMSTATE_UINT8(intmask, PMUState), + VMSTATE_UINT8(autopoll_rate_ms, PMUState), + VMSTATE_UINT8(autopoll_mask, PMUState), + VMSTATE_UINT32(tick_offset, PMUState), + VMSTATE_TIMER_PTR(one_sec_timer, PMUState), + VMSTATE_INT64(one_sec_target, PMUState), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &vmstate_pmu_adb, + } +}; + +static void pmu_reset(DeviceState *dev) +{ + PMUState *s = VIA_PMU(dev); + + /* OpenBIOS needs to do this? MacOS 9 needs it */ + s->intmask = PMU_INT_ADB | PMU_INT_TICK; + s->intbits = 0; + + s->cmd_state = pmu_state_idle; + s->autopoll_mask = 0; +} + +static void pmu_realize(DeviceState *dev, Error **errp) +{ + PMUState *s = VIA_PMU(dev); + SysBusDevice *sbd; + MOS6522State *ms; + DeviceState *d; + struct tm tm; + + /* Pass IRQ from 6522 */ + d = DEVICE(&s->mos6522_pmu); + ms = MOS6522(d); + sbd = SYS_BUS_DEVICE(s); + sysbus_pass_irq(sbd, SYS_BUS_DEVICE(ms)); + + qemu_get_timedate(&tm, 0); + s->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET; + s->one_sec_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, pmu_one_sec_timer, s); + s->one_sec_target = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000; + timer_mod(s->one_sec_timer, s->one_sec_target); + + if (s->has_adb) { + qbus_create_inplace(&s->adb_bus, sizeof(s->adb_bus), TYPE_ADB_BUS, + DEVICE(dev), "adb.0"); + s->adb_poll_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, pmu_adb_poll, s); + s->adb_poll_mask = 0xffff; + s->autopoll_rate_ms = 20; + } +} + +static void pmu_init(Object *obj) +{ + SysBusDevice *d = SYS_BUS_DEVICE(obj); + PMUState *s = VIA_PMU(obj); + + object_property_add_link(obj, "gpio", TYPE_MACIO_GPIO, + (Object **) &s->gpio, + qdev_prop_allow_set_link_before_realize, + 0, NULL); + + object_initialize(&s->mos6522_pmu, sizeof(s->mos6522_pmu), + TYPE_MOS6522_PMU); + qdev_set_parent_bus(DEVICE(&s->mos6522_pmu), sysbus_get_default()); + + memory_region_init_io(&s->mem, obj, &mos6522_pmu_ops, s, "via-pmu", + 0x2000); + sysbus_init_mmio(d, &s->mem); +} + +static Property pmu_properties[] = { + DEFINE_PROP_BOOL("has-adb", PMUState, has_adb, true), + DEFINE_PROP_END_OF_LIST() +}; + +static void pmu_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = pmu_realize; + dc->reset = pmu_reset; + dc->vmsd = &vmstate_pmu; + dc->props = pmu_properties; + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); +} + +static const TypeInfo pmu_type_info = { + .name = TYPE_VIA_PMU, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(PMUState), + .instance_init = pmu_init, + .class_init = pmu_class_init, +}; + +static void mos6522_pmu_portB_write(MOS6522State *s) +{ + MOS6522PMUState *mps = container_of(s, MOS6522PMUState, parent_obj); + PMUState *ps = container_of(mps, PMUState, mos6522_pmu); + + if ((s->pcr & 0xe0) == 0x20 || (s->pcr & 0xe0) == 0x60) { + s->ifr &= ~CB2_INT; + } + s->ifr &= ~CB1_INT; + + via_update_irq(ps); + pmu_update(ps); +} + +static void mos6522_pmu_portA_write(MOS6522State *s) +{ + MOS6522PMUState *mps = container_of(s, MOS6522PMUState, parent_obj); + PMUState *ps = container_of(mps, PMUState, mos6522_pmu); + + if ((s->pcr & 0x0e) == 0x02 || (s->pcr & 0x0e) == 0x06) { + s->ifr &= ~CA2_INT; + } + s->ifr &= ~CA1_INT; + + via_update_irq(ps); +} + +static void mos6522_pmu_reset(DeviceState *dev) +{ + MOS6522State *ms = MOS6522(dev); + MOS6522PMUState *mps = container_of(ms, MOS6522PMUState, parent_obj); + PMUState *s = container_of(mps, PMUState, mos6522_pmu); + MOS6522DeviceClass *mdc = MOS6522_DEVICE_GET_CLASS(ms); + + mdc->parent_reset(dev); + + ms->timers[0].frequency = VIA_TIMER_FREQ; + ms->timers[1].frequency = (SCALE_US * 6000) / 4700; + + s->last_b = ms->b = TACK | TREQ; +} + +static void mos6522_pmu_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + MOS6522DeviceClass *mdc = MOS6522_DEVICE_CLASS(oc); + + dc->reset = mos6522_pmu_reset; + mdc->portB_write = mos6522_pmu_portB_write; + mdc->portA_write = mos6522_pmu_portA_write; +} + +static const TypeInfo mos6522_pmu_type_info = { + .name = TYPE_MOS6522_PMU, + .parent = TYPE_MOS6522, + .instance_size = sizeof(MOS6522PMUState), + .class_init = mos6522_pmu_class_init, +}; + +static void pmu_register_types(void) +{ + type_register_static(&pmu_type_info); + type_register_static(&mos6522_pmu_type_info); +} + +type_init(pmu_register_types) diff --git a/hw/misc/macio/trace-events b/hw/misc/macio/trace-events index d499d78..0501926 100644 --- a/hw/misc/macio/trace-events +++ b/hw/misc/macio/trace-events @@ -13,3 +13,31 @@ cuda_packet_send_data(int i, const uint8_t data) "[%d] 0x%02x" # hw/misc/macio/macio.c macio_timer_write(uint64_t addr, unsigned len, uint64_t val) "write addr 0x%"PRIx64 " len %d val 0x%"PRIx64 macio_timer_read(uint64_t addr, unsigned len, uint32_t val) "read addr 0x%"PRIx64 " len %d val 0x%"PRIx32 + +# hw/misc/macio/gpio.c +macio_set_gpio(int gpio, bool state) "setting GPIO %d to %d" +macio_gpio_irq_assert(int gpio) "asserting GPIO %d" +macio_gpio_irq_deassert(int gpio) "deasserting GPIO %d" +macio_gpio_write(uint64_t addr, uint64_t val) "addr: 0x%"PRIx64" value: 0x%"PRIx64 +macio_gpio_read(uint64_t addr, uint64_t val) "addr: 0x%"PRIx64" value: 0x%"PRIx64 + +# hw/misc/macio/pmu.c +pmu_adb_poll(int olen) "ADB autopoll, olen=%d" +pmu_one_sec_timer(void) "PMU one sec..." +pmu_cmd_set_int_mask(int intmask) "Setting PMU int mask to 0x%02x" +pmu_cmd_set_adb_autopoll(int mask) "ADB set autopoll, mask=0x%04x" +pmu_cmd_adb_nobus(void) "ADB PACKET with no ADB bus!" +pmu_cmd_adb_request(int inlen, int indata0, int indata1, int indata2, int indata3, int indata4) "ADB request: len=%d, cmd=0x%02x, pflags=0x%02x, adblen=%d: 0x%02x 0x%02x..." +pmu_cmd_adb_reply(int len) "ADB reply is %d bytes" +pmu_dispatch_cmd(const char *name) "handling command %s" +pmu_dispatch_unknown_cmd(int cmd) "Unknown PMU command 0x%02x" +pmu_debug_protocol_string(const char *str) "%s" +pmu_debug_protocol_resp_size(int size) "sending %d resp bytes" +pmu_debug_protocol_error(int portB) "protocol error! portB=0x%02x" +pmu_debug_protocol_clear_treq(int state) "TREQ cleared, clearing TACK, state: %d" +pmu_debug_protocol_cmd(int cmd, int cmdlen, int rsplen) "Got command byte 0x%02x, clen=%d, rlen=%d" +pmu_debug_protocol_cmdlen(int len) "got cmd length byte: %d" +pmu_debug_protocol_cmd_toobig(int len) "command too big (%d bytes)" +pmu_debug_protocol_cmd_send_resp_size(int len) "sending length byte: %d" +pmu_debug_protocol_cmd_send_resp(int pos, int len) "sending byte: %d/%d" +pmu_debug_protocol_cmd_resp_complete(int ier) "Response send complete. IER=0x%02x" diff --git a/hw/misc/mos6522.c b/hw/misc/mos6522.c index 44eb306..14cff26 100644 --- a/hw/misc/mos6522.c +++ b/hw/misc/mos6522.c @@ -40,7 +40,7 @@ static void mos6522_timer_update(MOS6522State *s, MOS6522Timer *ti, static void mos6522_update_irq(MOS6522State *s) { - if (s->ifr & s->ier & (SR_INT | T1_INT | T2_INT)) { + if (s->ifr & s->ier) { qemu_irq_raise(s->irq); } else { qemu_irq_lower(s->irq); @@ -241,7 +241,7 @@ uint64_t mos6522_read(void *opaque, hwaddr addr, unsigned size) break; case VIA_REG_SR: val = s->sr; - s->ifr &= ~(SR_INT | CB1_INT | CB2_INT); + s->ifr &= ~SR_INT; mos6522_update_irq(s); break; case VIA_REG_ACR: @@ -463,6 +463,7 @@ static void mos6522_class_init(ObjectClass *oc, void *data) mdc->set_sr_int = mos6522_set_sr_int; mdc->portB_write = mos6522_portB_write; mdc->portA_write = mos6522_portA_write; + mdc->update_irq = mos6522_update_irq; mdc->get_timer1_counter_value = mos6522_get_counter_value; mdc->get_timer2_counter_value = mos6522_get_counter_value; mdc->get_timer1_load_time = mos6522_get_load_time; diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h index 89fa8bb..c0217e6 100644 --- a/hw/ppc/mac.h +++ b/hw/ppc/mac.h @@ -27,6 +27,7 @@ #define PPC_MAC_H #include "exec/memory.h" +#include "hw/boards.h" #include "hw/sysbus.h" #include "hw/ide/internal.h" #include "hw/input/adb.h" @@ -58,12 +59,31 @@ /* New World IRQs */ #define NEWWORLD_CUDA_IRQ 0x19 +#define NEWWORLD_PMU_IRQ 0x19 #define NEWWORLD_ESCCB_IRQ 0x24 #define NEWWORLD_ESCCA_IRQ 0x25 #define NEWWORLD_IDE0_IRQ 0xd #define NEWWORLD_IDE0_DMA_IRQ 0x2 #define NEWWORLD_IDE1_IRQ 0xe #define NEWWORLD_IDE1_DMA_IRQ 0x3 +#define NEWWORLD_EXTING_GPIO1 0x2f +#define NEWWORLD_EXTING_GPIO9 0x37 + +/* Core99 machine */ +#define TYPE_CORE99_MACHINE MACHINE_TYPE_NAME("mac99") +#define CORE99_MACHINE(obj) OBJECT_CHECK(Core99MachineState, (obj), \ + TYPE_CORE99_MACHINE) + +#define CORE99_VIA_CONFIG_CUDA 0x0 +#define CORE99_VIA_CONFIG_PMU 0x1 +#define CORE99_VIA_CONFIG_PMU_ADB 0x2 + +typedef struct Core99MachineState { + /*< private >*/ + MachineState parent; + + uint8_t via_config; +} Core99MachineState; /* MacIO */ #define TYPE_MACIO_IDE "macio-ide" diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 744acdf..ff715ff 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -111,6 +111,7 @@ static void ppc_core99_init(MachineState *machine) const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; const char *boot_device = machine->boot_order; + Core99MachineState *core99_machine = CORE99_MACHINE(machine); PowerPCCPU *cpu = NULL; CPUPPCState *env = NULL; char *filename; @@ -122,6 +123,7 @@ static void ppc_core99_init(MachineState *machine) UNINHostState *uninorth_pci; PCIBus *pci_bus; NewWorldMacIOState *macio; + bool has_pmu, has_adb; MACIOIDEState *macio_ide; BusState *adb_bus; MacIONVRAMState *nvr; @@ -361,6 +363,9 @@ static void ppc_core99_init(MachineState *machine) } machine->usb |= defaults_enabled() && !machine->usb_disabled; + has_pmu = (core99_machine->via_config != CORE99_VIA_CONFIG_CUDA); + has_adb = (core99_machine->via_config == CORE99_VIA_CONFIG_CUDA || + core99_machine->via_config == CORE99_VIA_CONFIG_PMU_ADB); /* Timebase Frequency */ if (kvm_enabled()) { @@ -376,6 +381,8 @@ static void ppc_core99_init(MachineState *machine) macio = NEWWORLD_MACIO(pci_create(pci_bus, -1, TYPE_NEWWORLD_MACIO)); dev = DEVICE(macio); qdev_prop_set_uint64(dev, "frequency", tbfreq); + qdev_prop_set_bit(dev, "has-pmu", has_pmu); + qdev_prop_set_bit(dev, "has-adb", has_adb); object_property_set_link(OBJECT(macio), OBJECT(pic_dev), "pic", &error_abort); qdev_init_nofail(dev); @@ -391,19 +398,29 @@ static void ppc_core99_init(MachineState *machine) "ide[1]")); macio_ide_init_drives(macio_ide, &hd[MAX_IDE_DEVS]); - dev = DEVICE(object_resolve_path_component(OBJECT(macio), "cuda")); - adb_bus = qdev_get_child_bus(dev, "adb.0"); - dev = qdev_create(adb_bus, TYPE_ADB_KEYBOARD); - qdev_init_nofail(dev); - dev = qdev_create(adb_bus, TYPE_ADB_MOUSE); - qdev_init_nofail(dev); + if (has_adb) { + if (has_pmu) { + dev = DEVICE(object_resolve_path_component(OBJECT(macio), "pmu")); + } else { + dev = DEVICE(object_resolve_path_component(OBJECT(macio), "cuda")); + } + + adb_bus = qdev_get_child_bus(dev, "adb.0"); + dev = qdev_create(adb_bus, TYPE_ADB_KEYBOARD); + qdev_prop_set_bit(dev, "disable-direct-reg3-writes", has_pmu); + qdev_init_nofail(dev); + + dev = qdev_create(adb_bus, TYPE_ADB_MOUSE); + qdev_prop_set_bit(dev, "disable-direct-reg3-writes", has_pmu); + qdev_init_nofail(dev); + } if (machine->usb) { pci_create_simple(pci_bus, -1, "pci-ohci"); /* U3 needs to use USB for input because Linux doesn't support via-cuda on PPC64 */ - if (machine_arch == ARCH_MAC99_U3) { + if (!has_adb || machine_arch == ARCH_MAC99_U3) { USBBus *usb_bus = usb_bus_find(-1); usb_create_simple(usb_bus, "usb-kbd"); @@ -459,6 +476,8 @@ static void ppc_core99_init(MachineState *machine) fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height); fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_VIACONFIG, core99_machine->via_config); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled()); if (kvm_enabled()) { #ifdef CONFIG_KVM @@ -515,10 +534,61 @@ static void core99_machine_class_init(ObjectClass *oc, void *data) #endif } +static char *core99_get_via_config(Object *obj, Error **errp) +{ + Core99MachineState *cms = CORE99_MACHINE(obj); + + switch (cms->via_config) { + default: + case CORE99_VIA_CONFIG_CUDA: + return g_strdup("cuda"); + + case CORE99_VIA_CONFIG_PMU: + return g_strdup("pmu"); + + case CORE99_VIA_CONFIG_PMU_ADB: + return g_strdup("pmu-adb"); + } +} + +static void core99_set_via_config(Object *obj, const char *value, Error **errp) +{ + Core99MachineState *cms = CORE99_MACHINE(obj); + + if (!strcmp(value, "cuda")) { + cms->via_config = CORE99_VIA_CONFIG_CUDA; + } else if (!strcmp(value, "pmu")) { + cms->via_config = CORE99_VIA_CONFIG_PMU; + } else if (!strcmp(value, "pmu-adb")) { + cms->via_config = CORE99_VIA_CONFIG_PMU_ADB; + } else { + error_setg(errp, "Invalid via value"); + error_append_hint(errp, "Valid values are cuda, pmu, pmu-adb.\n"); + } +} + +static void core99_instance_init(Object *obj) +{ + Core99MachineState *cms = CORE99_MACHINE(obj); + + /* Default via_config is CORE99_VIA_CONFIG_CUDA */ + cms->via_config = CORE99_VIA_CONFIG_CUDA; + object_property_add_str(obj, "via", core99_get_via_config, + core99_set_via_config, NULL); + object_property_set_description(obj, "via", + "Set VIA configuration. " + "Valid values are cuda, pmu and pmu-adb", + NULL); + + return; +} + static const TypeInfo core99_machine_info = { .name = MACHINE_TYPE_NAME("mac99"), .parent = TYPE_MACHINE, .class_init = core99_machine_class_init, + .instance_init = core99_instance_init, + .instance_size = sizeof(Core99MachineState) }; static void mac_machine_register_types(void) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 0314881..0d2b79f 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -121,9 +121,9 @@ static int get_cpus_node(void *fdt) */ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) { - CPUState *cs = CPU(DEVICE(pc->threads)); + PowerPCCPU *cpu = pc->threads[0]; + CPUState *cs = CPU(cpu); DeviceClass *dc = DEVICE_GET_CLASS(cs); - PowerPCCPU *cpu = POWERPC_CPU(cs); int smt_threads = CPU_CORE(pc)->nr_threads; CPUPPCState *env = &cpu->env; PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); @@ -849,9 +849,8 @@ static void pnv_chip_icp_realize(PnvChip *chip, Error **errp) } } -static void pnv_chip_realize(DeviceState *dev, Error **errp) +static void pnv_chip_core_realize(PnvChip *chip, Error **errp) { - PnvChip *chip = PNV_CHIP(dev); Error *error = NULL; PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); const char *typename = pnv_chip_core_typename(chip); @@ -863,14 +862,6 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp) return; } - /* XSCOM bridge */ - pnv_xscom_realize(chip, &error); - if (error) { - error_propagate(errp, error); - return; - } - sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip)); - /* Cores */ pnv_chip_core_sanitize(chip, &error); if (error) { @@ -918,6 +909,27 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp) &PNV_CORE(pnv_core)->xscom_regs); i++; } +} + +static void pnv_chip_realize(DeviceState *dev, Error **errp) +{ + PnvChip *chip = PNV_CHIP(dev); + Error *error = NULL; + + /* XSCOM bridge */ + pnv_xscom_realize(chip, &error); + if (error) { + error_propagate(errp, error); + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip)); + + /* Cores */ + pnv_chip_core_realize(chip, &error); + if (error) { + error_propagate(errp, error); + return; + } /* Create LPC controller */ object_property_set_bool(OBJECT(&chip->lpc), true, "realized", diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 13ad7d9..f7cf33f 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -54,28 +54,6 @@ static void pnv_cpu_reset(void *opaque) env->msr |= MSR_HVB; /* Hypervisor mode */ } -static void pnv_cpu_init(PowerPCCPU *cpu, Error **errp) -{ - CPUPPCState *env = &cpu->env; - int core_pir; - int thread_index = 0; /* TODO: TCG supports only one thread */ - ppc_spr_t *pir = &env->spr_cb[SPR_PIR]; - - core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", &error_abort); - - /* - * The PIR of a thread is the core PIR + the thread index. We will - * need to find a way to get the thread index when TCG supports - * more than 1. We could use the object name ? - */ - pir->default_value = core_pir + thread_index; - - /* Set time-base frequency to 512 MHz */ - cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ); - - qemu_register_reset(pnv_cpu_reset, cpu); -} - /* * These values are read by the PowerNV HW monitors under Linux */ @@ -121,29 +99,39 @@ static const MemoryRegionOps pnv_core_xscom_ops = { .endianness = DEVICE_BIG_ENDIAN, }; -static void pnv_core_realize_child(Object *child, XICSFabric *xi, Error **errp) +static void pnv_realize_vcpu(PowerPCCPU *cpu, XICSFabric *xi, Error **errp) { + CPUPPCState *env = &cpu->env; + int core_pir; + int thread_index = 0; /* TODO: TCG supports only one thread */ + ppc_spr_t *pir = &env->spr_cb[SPR_PIR]; Error *local_err = NULL; - CPUState *cs = CPU(child); - PowerPCCPU *cpu = POWERPC_CPU(cs); - object_property_set_bool(child, true, "realized", &local_err); + object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); if (local_err) { error_propagate(errp, local_err); return; } - cpu->intc = icp_create(child, TYPE_PNV_ICP, xi, &local_err); + cpu->intc = icp_create(OBJECT(cpu), TYPE_PNV_ICP, xi, &local_err); if (local_err) { error_propagate(errp, local_err); return; } - pnv_cpu_init(cpu, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", &error_abort); + + /* + * The PIR of a thread is the core PIR + the thread index. We will + * need to find a way to get the thread index when TCG supports + * more than 1. We could use the object name ? + */ + pir->default_value = core_pir + thread_index; + + /* Set time-base frequency to 512 MHz */ + cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ); + + qemu_register_reset(pnv_cpu_reset, cpu); } static void pnv_core_realize(DeviceState *dev, Error **errp) @@ -151,7 +139,6 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) PnvCore *pc = PNV_CORE(OBJECT(dev)); CPUCore *cc = CPU_CORE(OBJECT(dev)); const char *typename = pnv_core_cpu_typename(pc); - size_t size = object_type_get_instance_size(typename); Error *local_err = NULL; void *obj; int i, j; @@ -165,26 +152,21 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) return; } - pc->threads = g_malloc0(size * cc->nr_threads); + pc->threads = g_new(PowerPCCPU *, cc->nr_threads); for (i = 0; i < cc->nr_threads; i++) { - obj = pc->threads + i * size; + obj = object_new(typename); - object_initialize(obj, size, typename); + pc->threads[i] = POWERPC_CPU(obj); snprintf(name, sizeof(name), "thread[%d]", i); - object_property_add_child(OBJECT(pc), name, obj, &local_err); + object_property_add_child(OBJECT(pc), name, obj, &error_abort); object_property_add_alias(obj, "core-pir", OBJECT(pc), - "pir", &local_err); - if (local_err) { - goto err; - } + "pir", &error_abort); object_unref(obj); } for (j = 0; j < cc->nr_threads; j++) { - obj = pc->threads + j * size; - - pnv_core_realize_child(obj, XICS_FABRIC(xi), &local_err); + pnv_realize_vcpu(pc->threads[j], XICS_FABRIC(xi), &local_err); if (local_err) { goto err; } @@ -197,13 +179,33 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) err: while (--i >= 0) { - obj = pc->threads + i * size; + obj = OBJECT(pc->threads[i]); object_unparent(obj); } g_free(pc->threads); error_propagate(errp, local_err); } +static void pnv_unrealize_vcpu(PowerPCCPU *cpu) +{ + qemu_unregister_reset(pnv_cpu_reset, cpu); + object_unparent(cpu->intc); + cpu_remove_sync(CPU(cpu)); + object_unparent(OBJECT(cpu)); +} + +static void pnv_core_unrealize(DeviceState *dev, Error **errp) +{ + PnvCore *pc = PNV_CORE(dev); + CPUCore *cc = CPU_CORE(dev); + int i; + + for (i = 0; i < cc->nr_threads; i++) { + pnv_unrealize_vcpu(pc->threads[i]); + } + g_free(pc->threads); +} + static Property pnv_core_properties[] = { DEFINE_PROP_UINT32("pir", PnvCore, pir, 0), DEFINE_PROP_END_OF_LIST(), @@ -214,6 +216,7 @@ static void pnv_core_class_init(ObjectClass *oc, void *data) DeviceClass *dc = DEVICE_CLASS(oc); dc->realize = pnv_core_realize; + dc->unrealize = pnv_core_unrealize; dc->props = pnv_core_properties; } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index f59999d..db0fb38 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -186,27 +186,33 @@ static int xics_max_server_number(sPAPRMachineState *spapr) static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) { sPAPRMachineState *spapr = SPAPR_MACHINE(machine); + Error *local_err = NULL; if (kvm_enabled()) { if (machine_kernel_irqchip_allowed(machine) && - !xics_kvm_init(spapr, errp)) { + !xics_kvm_init(spapr, &local_err)) { spapr->icp_type = TYPE_KVM_ICP; - spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, errp); + spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, + &local_err); } if (machine_kernel_irqchip_required(machine) && !spapr->ics) { - error_prepend(errp, "kernel_irqchip requested but unavailable: "); - return; + error_prepend(&local_err, + "kernel_irqchip requested but unavailable: "); + goto error; } + error_free(local_err); + local_err = NULL; } if (!spapr->ics) { xics_spapr_init(spapr); spapr->icp_type = TYPE_ICP; - spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, errp); - if (!spapr->ics) { - return; - } + spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, + &local_err); } + +error: + error_propagate(errp, local_err); } static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 531e145..00e43a9 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -335,14 +335,10 @@ static sPAPRCapabilities default_caps_with_cpu(sPAPRMachineState *spapr, caps = smc->default_caps; - if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, - 0, spapr->max_compat_pvr)) { - caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; - } - if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_07, 0, spapr->max_compat_pvr)) { caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF; + caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; } if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_06_PLUS, diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index f3e9b87..aef3be3 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -28,6 +28,7 @@ static void spapr_cpu_reset(void *opaque) CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); target_ulong lpcr; cpu_reset(cs); @@ -69,6 +70,12 @@ static void spapr_cpu_reset(void *opaque) /* Set a full AMOR so guest can use the AMR as it sees fit */ env->spr[SPR_AMOR] = 0xffffffffffffffffull; + + spapr_cpu->vpa_addr = 0; + spapr_cpu->slb_shadow_addr = 0; + spapr_cpu->slb_shadow_size = 0; + spapr_cpu->dtl_addr = 0; + spapr_cpu->dtl_size = 0; } void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip, target_ulong r3) @@ -83,26 +90,6 @@ void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip, target_ulong r ppc_store_lpcr(cpu, env->spr[SPR_LPCR] | pcc->lpcr_pm); } -static void spapr_cpu_destroy(PowerPCCPU *cpu) -{ - qemu_unregister_reset(spapr_cpu_reset, cpu); -} - -static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, - Error **errp) -{ - CPUPPCState *env = &cpu->env; - - /* Set time-base frequency to 512 MHz */ - cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); - - cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); - kvmppc_set_papr(cpu); - - qemu_register_reset(spapr_cpu_reset, cpu); - spapr_cpu_reset(cpu); -} - /* * Return the sPAPR CPU core type for @model which essentially is the CPU * model specified with -cpu cmdline option. @@ -122,55 +109,110 @@ const char *spapr_get_cpu_core_type(const char *cpu_type) return object_class_get_name(oc); } -static void spapr_cpu_core_unrealizefn(DeviceState *dev, Error **errp) +static void spapr_unrealize_vcpu(PowerPCCPU *cpu) +{ + qemu_unregister_reset(spapr_cpu_reset, cpu); + object_unparent(cpu->intc); + cpu_remove_sync(CPU(cpu)); + object_unparent(OBJECT(cpu)); +} + +static void spapr_cpu_core_unrealize(DeviceState *dev, Error **errp) { sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev)); CPUCore *cc = CPU_CORE(dev); int i; for (i = 0; i < cc->nr_threads; i++) { - Object *obj = OBJECT(sc->threads[i]); - DeviceState *dev = DEVICE(obj); - CPUState *cs = CPU(dev); - PowerPCCPU *cpu = POWERPC_CPU(cs); - - spapr_cpu_destroy(cpu); - object_unparent(cpu->intc); - cpu_remove_sync(cs); - object_unparent(obj); + spapr_unrealize_vcpu(sc->threads[i]); } g_free(sc->threads); } -static void spapr_cpu_core_realize_child(Object *child, - sPAPRMachineState *spapr, Error **errp) +static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, + Error **errp) { + CPUPPCState *env = &cpu->env; Error *local_err = NULL; - CPUState *cs = CPU(child); - PowerPCCPU *cpu = POWERPC_CPU(cs); - object_property_set_bool(child, true, "realized", &local_err); + object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); if (local_err) { goto error; } - spapr_cpu_init(spapr, cpu, &local_err); - if (local_err) { - goto error; - } + /* Set time-base frequency to 512 MHz */ + cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); + + cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); + kvmppc_set_papr(cpu); - cpu->intc = icp_create(child, spapr->icp_type, XICS_FABRIC(spapr), + qemu_register_reset(spapr_cpu_reset, cpu); + spapr_cpu_reset(cpu); + + cpu->intc = icp_create(OBJECT(cpu), spapr->icp_type, XICS_FABRIC(spapr), &local_err); if (local_err) { - goto error; + goto error_unregister; } return; +error_unregister: + qemu_unregister_reset(spapr_cpu_reset, cpu); + cpu_remove_sync(CPU(cpu)); error: error_propagate(errp, local_err); } +static PowerPCCPU *spapr_create_vcpu(sPAPRCPUCore *sc, int i, Error **errp) +{ + sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(sc); + CPUCore *cc = CPU_CORE(sc); + Object *obj; + char *id; + CPUState *cs; + PowerPCCPU *cpu; + Error *local_err = NULL; + + obj = object_new(scc->cpu_type); + + cs = CPU(obj); + cpu = POWERPC_CPU(obj); + cs->cpu_index = cc->core_id + i; + spapr_set_vcpu_id(cpu, cs->cpu_index, &local_err); + if (local_err) { + goto err; + } + + cpu->node_id = sc->node_id; + + id = g_strdup_printf("thread[%d]", i); + object_property_add_child(OBJECT(sc), id, obj, &local_err); + g_free(id); + if (local_err) { + goto err; + } + + cpu->machine_data = g_new0(sPAPRCPUState, 1); + + object_unref(obj); + return cpu; + +err: + object_unref(obj); + error_propagate(errp, local_err); + return NULL; +} + +static void spapr_delete_vcpu(PowerPCCPU *cpu) +{ + sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); + + cpu->machine_data = NULL; + g_free(spapr_cpu); + object_unparent(OBJECT(cpu)); +} + static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) { /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user @@ -180,10 +222,8 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) (sPAPRMachineState *) object_dynamic_cast(qdev_get_machine(), TYPE_SPAPR_MACHINE); sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev)); - sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(dev)); CPUCore *cc = CPU_CORE(OBJECT(dev)); Error *local_err = NULL; - Object *obj; int i, j; if (!spapr) { @@ -193,46 +233,27 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) sc->threads = g_new(PowerPCCPU *, cc->nr_threads); for (i = 0; i < cc->nr_threads; i++) { - char id[32]; - CPUState *cs; - PowerPCCPU *cpu; - - obj = object_new(scc->cpu_type); - - cs = CPU(obj); - cpu = sc->threads[i] = POWERPC_CPU(obj); - cs->cpu_index = cc->core_id + i; - spapr_set_vcpu_id(cpu, cs->cpu_index, &local_err); + sc->threads[i] = spapr_create_vcpu(sc, i, &local_err); if (local_err) { goto err; } - - - /* Set NUMA node for the threads belonged to core */ - cpu->node_id = sc->node_id; - - snprintf(id, sizeof(id), "thread[%d]", i); - object_property_add_child(OBJECT(sc), id, obj, &local_err); - if (local_err) { - goto err; - } - object_unref(obj); } for (j = 0; j < cc->nr_threads; j++) { - obj = OBJECT(sc->threads[j]); - - spapr_cpu_core_realize_child(obj, spapr, &local_err); + spapr_realize_vcpu(sc->threads[j], spapr, &local_err); if (local_err) { - goto err; + goto err_unrealize; } } return; +err_unrealize: + while (--j >= 0) { + spapr_unrealize_vcpu(sc->threads[j]); + } err: while (--i >= 0) { - obj = OBJECT(sc->threads[i]); - object_unparent(obj); + spapr_delete_vcpu(sc->threads[i]); } g_free(sc->threads); error_propagate(errp, local_err); @@ -249,7 +270,7 @@ static void spapr_cpu_core_class_init(ObjectClass *oc, void *data) sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_CLASS(oc); dc->realize = spapr_cpu_core_realize; - dc->unrealize = spapr_cpu_core_unrealizefn; + dc->unrealize = spapr_cpu_core_unrealize; dc->props = spapr_cpu_core_properties; scc->cpu_type = data; } diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 022f6d8..ae913d0 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -8,6 +8,7 @@ #include "exec/exec-all.h" #include "helper_regs.h" #include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_cpu_core.h" #include "mmu-hash64.h" #include "cpu-models.h" #include "trace.h" @@ -908,9 +909,11 @@ unmap_out: #define VPA_SHARED_PROC_OFFSET 0x9 #define VPA_SHARED_PROC_VAL 0x2 -static target_ulong register_vpa(CPUPPCState *env, target_ulong vpa) +static target_ulong register_vpa(PowerPCCPU *cpu, target_ulong vpa) { - CPUState *cs = CPU(ppc_env_get_cpu(env)); + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); uint16_t size; uint8_t tmp; @@ -935,32 +938,34 @@ static target_ulong register_vpa(CPUPPCState *env, target_ulong vpa) return H_PARAMETER; } - env->vpa_addr = vpa; + spapr_cpu->vpa_addr = vpa; - tmp = ldub_phys(cs->as, env->vpa_addr + VPA_SHARED_PROC_OFFSET); + tmp = ldub_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET); tmp |= VPA_SHARED_PROC_VAL; - stb_phys(cs->as, env->vpa_addr + VPA_SHARED_PROC_OFFSET, tmp); + stb_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET, tmp); return H_SUCCESS; } -static target_ulong deregister_vpa(CPUPPCState *env, target_ulong vpa) +static target_ulong deregister_vpa(PowerPCCPU *cpu, target_ulong vpa) { - if (env->slb_shadow_addr) { + sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); + + if (spapr_cpu->slb_shadow_addr) { return H_RESOURCE; } - if (env->dtl_addr) { + if (spapr_cpu->dtl_addr) { return H_RESOURCE; } - env->vpa_addr = 0; + spapr_cpu->vpa_addr = 0; return H_SUCCESS; } -static target_ulong register_slb_shadow(CPUPPCState *env, target_ulong addr) +static target_ulong register_slb_shadow(PowerPCCPU *cpu, target_ulong addr) { - CPUState *cs = CPU(ppc_env_get_cpu(env)); + sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); uint32_t size; if (addr == 0) { @@ -968,7 +973,7 @@ static target_ulong register_slb_shadow(CPUPPCState *env, target_ulong addr) return H_HARDWARE; } - size = ldl_be_phys(cs->as, addr + 0x4); + size = ldl_be_phys(CPU(cpu)->as, addr + 0x4); if (size < 0x8) { return H_PARAMETER; } @@ -977,26 +982,28 @@ static target_ulong register_slb_shadow(CPUPPCState *env, target_ulong addr) return H_PARAMETER; } - if (!env->vpa_addr) { + if (!spapr_cpu->vpa_addr) { return H_RESOURCE; } - env->slb_shadow_addr = addr; - env->slb_shadow_size = size; + spapr_cpu->slb_shadow_addr = addr; + spapr_cpu->slb_shadow_size = size; return H_SUCCESS; } -static target_ulong deregister_slb_shadow(CPUPPCState *env, target_ulong addr) +static target_ulong deregister_slb_shadow(PowerPCCPU *cpu, target_ulong addr) { - env->slb_shadow_addr = 0; - env->slb_shadow_size = 0; + sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); + + spapr_cpu->slb_shadow_addr = 0; + spapr_cpu->slb_shadow_size = 0; return H_SUCCESS; } -static target_ulong register_dtl(CPUPPCState *env, target_ulong addr) +static target_ulong register_dtl(PowerPCCPU *cpu, target_ulong addr) { - CPUState *cs = CPU(ppc_env_get_cpu(env)); + sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); uint32_t size; if (addr == 0) { @@ -1004,26 +1011,28 @@ static target_ulong register_dtl(CPUPPCState *env, target_ulong addr) return H_HARDWARE; } - size = ldl_be_phys(cs->as, addr + 0x4); + size = ldl_be_phys(CPU(cpu)->as, addr + 0x4); if (size < 48) { return H_PARAMETER; } - if (!env->vpa_addr) { + if (!spapr_cpu->vpa_addr) { return H_RESOURCE; } - env->dtl_addr = addr; - env->dtl_size = size; + spapr_cpu->dtl_addr = addr; + spapr_cpu->dtl_size = size; return H_SUCCESS; } -static target_ulong deregister_dtl(CPUPPCState *env, target_ulong addr) +static target_ulong deregister_dtl(PowerPCCPU *cpu, target_ulong addr) { - env->dtl_addr = 0; - env->dtl_size = 0; + sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); + + spapr_cpu->dtl_addr = 0; + spapr_cpu->dtl_size = 0; return H_SUCCESS; } @@ -1035,38 +1044,36 @@ static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong procno = args[1]; target_ulong vpa = args[2]; target_ulong ret = H_PARAMETER; - CPUPPCState *tenv; PowerPCCPU *tcpu; tcpu = spapr_find_cpu(procno); if (!tcpu) { return H_PARAMETER; } - tenv = &tcpu->env; switch (flags) { case FLAGS_REGISTER_VPA: - ret = register_vpa(tenv, vpa); + ret = register_vpa(tcpu, vpa); break; case FLAGS_DEREGISTER_VPA: - ret = deregister_vpa(tenv, vpa); + ret = deregister_vpa(tcpu, vpa); break; case FLAGS_REGISTER_SLBSHADOW: - ret = register_slb_shadow(tenv, vpa); + ret = register_slb_shadow(tcpu, vpa); break; case FLAGS_DEREGISTER_SLBSHADOW: - ret = deregister_slb_shadow(tenv, vpa); + ret = deregister_slb_shadow(tcpu, vpa); break; case FLAGS_REGISTER_DTL: - ret = register_dtl(tenv, vpa); + ret = register_dtl(tcpu, vpa); break; case FLAGS_DEREGISTER_DTL: - ret = deregister_dtl(tenv, vpa); + ret = deregister_dtl(tcpu, vpa); break; } @@ -1547,6 +1554,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, error_report_err(local_err); return H_HARDWARE; } + error_free(local_err); local_err = NULL; } } diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index 0ee779f..b984d2d 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -572,23 +572,36 @@ typedef struct IDRegState { MemoryRegion mem; } IDRegState; -static void idreg_init1(Object *obj) +static void idreg_realize(DeviceState *ds, Error **errp) { - IDRegState *s = MACIO_ID_REGISTER(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); + IDRegState *s = MACIO_ID_REGISTER(ds); + SysBusDevice *dev = SYS_BUS_DEVICE(ds); + Error *local_err = NULL; + + memory_region_init_ram_nomigrate(&s->mem, OBJECT(ds), "sun4m.idreg", + sizeof(idreg_data), &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } - memory_region_init_ram_nomigrate(&s->mem, obj, - "sun4m.idreg", sizeof(idreg_data), &error_fatal); vmstate_register_ram_global(&s->mem); memory_region_set_readonly(&s->mem, true); sysbus_init_mmio(dev, &s->mem); } +static void idreg_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = idreg_realize; +} + static const TypeInfo idreg_info = { .name = TYPE_MACIO_ID_REGISTER, .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(IDRegState), - .instance_init = idreg_init1, + .class_init = idreg_class_init, }; #define TYPE_TCX_AFX "tcx_afx" @@ -613,21 +626,35 @@ static void afx_init(hwaddr addr) sysbus_mmio_map(s, 0, addr); } -static void afx_init1(Object *obj) +static void afx_realize(DeviceState *ds, Error **errp) { - AFXState *s = TCX_AFX(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); + AFXState *s = TCX_AFX(ds); + SysBusDevice *dev = SYS_BUS_DEVICE(ds); + Error *local_err = NULL; + + memory_region_init_ram_nomigrate(&s->mem, OBJECT(ds), "sun4m.afx", 4, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } - memory_region_init_ram_nomigrate(&s->mem, obj, "sun4m.afx", 4, &error_fatal); vmstate_register_ram_global(&s->mem); sysbus_init_mmio(dev, &s->mem); } +static void afx_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = afx_realize; +} + static const TypeInfo afx_info = { .name = TYPE_TCX_AFX, .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(AFXState), - .instance_init = afx_init1, + .class_init = afx_class_init, }; #define TYPE_OPENPROM "openprom" @@ -680,13 +707,19 @@ static void prom_init(hwaddr addr, const char *bios_name) } } -static void prom_init1(Object *obj) +static void prom_realize(DeviceState *ds, Error **errp) { - PROMState *s = OPENPROM(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); + PROMState *s = OPENPROM(ds); + SysBusDevice *dev = SYS_BUS_DEVICE(ds); + Error *local_err = NULL; + + memory_region_init_ram_nomigrate(&s->prom, OBJECT(ds), "sun4m.prom", + PROM_SIZE_MAX, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } - memory_region_init_ram_nomigrate(&s->prom, obj, "sun4m.prom", PROM_SIZE_MAX, - &error_fatal); vmstate_register_ram_global(&s->prom); memory_region_set_readonly(&s->prom, true); sysbus_init_mmio(dev, &s->prom); @@ -701,6 +734,7 @@ static void prom_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); dc->props = prom_properties; + dc->realize = prom_realize; } static const TypeInfo prom_info = { @@ -708,7 +742,6 @@ static const TypeInfo prom_info = { .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(PROMState), .class_init = prom_class_init, - .instance_init = prom_init1, }; #define TYPE_SUN4M_MEMORY "memory" diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 1bede85..3975a7b 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -425,13 +425,19 @@ static void prom_init(hwaddr addr, const char *bios_name) } } -static void prom_init1(Object *obj) +static void prom_realize(DeviceState *ds, Error **errp) { - PROMState *s = OPENPROM(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); + PROMState *s = OPENPROM(ds); + SysBusDevice *dev = SYS_BUS_DEVICE(ds); + Error *local_err = NULL; + + memory_region_init_ram_nomigrate(&s->prom, OBJECT(ds), "sun4u.prom", + PROM_SIZE_MAX, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } - memory_region_init_ram_nomigrate(&s->prom, obj, "sun4u.prom", PROM_SIZE_MAX, - &error_fatal); vmstate_register_ram_global(&s->prom); memory_region_set_readonly(&s->prom, true); sysbus_init_mmio(dev, &s->prom); @@ -446,6 +452,7 @@ static void prom_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); dc->props = prom_properties; + dc->realize = prom_realize; } static const TypeInfo prom_info = { @@ -453,7 +460,6 @@ static const TypeInfo prom_info = { .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(PROMState), .class_init = prom_class_init, - .instance_init = prom_init1, }; diff --git a/hw/usb/dev-smartcard-reader.c b/hw/usb/dev-smartcard-reader.c index fa546fb..13d0bef 100644 --- a/hw/usb/dev-smartcard-reader.c +++ b/hw/usb/dev-smartcard-reader.c @@ -1147,7 +1147,6 @@ static void ccid_unrealize(USBDevice *dev, Error **errp) USBCCIDState *s = USB_CCID_DEV(dev); ccid_bulk_in_clear(s); - object_unref(OBJECT(&s->bus)); } static void ccid_flush_pending_answers(USBCCIDState *s) diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index 47b992f..c99398b 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -588,13 +588,6 @@ static const struct SCSIBusInfo usb_msd_scsi_info_bot = { .load_request = usb_msd_load_request, }; -static void usb_msd_unrealize_storage(USBDevice *dev, Error **errp) -{ - MSDState *s = USB_STORAGE_DEV(dev); - - object_unref(OBJECT(&s->bus)); -} - static void usb_msd_storage_realize(USBDevice *dev, Error **errp) { MSDState *s = USB_STORAGE_DEV(dev); @@ -642,13 +635,6 @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp) s->scsi_dev = scsi_dev; } -static void usb_msd_bot_unrealize(USBDevice *dev, Error **errp) -{ - MSDState *s = USB_STORAGE_DEV(dev); - - object_unref(OBJECT(&s->bus)); -} - static void usb_msd_bot_realize(USBDevice *dev, Error **errp) { MSDState *s = USB_STORAGE_DEV(dev); @@ -712,7 +698,6 @@ static void usb_msd_class_storage_initfn(ObjectClass *klass, void *data) USBDeviceClass *uc = USB_DEVICE_CLASS(klass); uc->realize = usb_msd_storage_realize; - uc->unrealize = usb_msd_unrealize_storage; dc->props = msd_properties; } @@ -775,7 +760,6 @@ static void usb_msd_class_bot_initfn(ObjectClass *klass, void *data) USBDeviceClass *uc = USB_DEVICE_CLASS(klass); uc->realize = usb_msd_bot_realize; - uc->unrealize = usb_msd_bot_unrealize; uc->attached_settable = true; } diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c index aaf5a88..be566ca 100644 --- a/hw/usb/dev-uas.c +++ b/hw/usb/dev-uas.c @@ -896,8 +896,6 @@ static void usb_uas_unrealize(USBDevice *dev, Error **errp) UASDevice *uas = USB_UAS(dev); qemu_bh_delete(uas->status_bh); - - object_unref(OBJECT(&uas->bus)); } static void usb_uas_realize(USBDevice *dev, Error **errp) diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h index 8c90a2e..c85a62f 100644 --- a/include/block/aio-wait.h +++ b/include/block/aio-wait.h @@ -57,7 +57,8 @@ typedef struct { /** * AIO_WAIT_WHILE: * @wait: the aio wait object - * @ctx: the aio context + * @ctx: the aio context, or NULL if multiple aio contexts (for which the + * caller does not hold a lock) are involved in the polling condition. * @cond: wait while this conditional expression is true * * Wait while a condition is true. Use this to implement synchronous @@ -73,29 +74,27 @@ typedef struct { */ #define AIO_WAIT_WHILE(wait, ctx, cond) ({ \ bool waited_ = false; \ - bool busy_ = true; \ AioWait *wait_ = (wait); \ AioContext *ctx_ = (ctx); \ - if (in_aio_context_home_thread(ctx_)) { \ - while ((cond) || busy_) { \ - busy_ = aio_poll(ctx_, (cond)); \ - waited_ |= !!(cond) | busy_; \ + if (ctx_ && in_aio_context_home_thread(ctx_)) { \ + while ((cond)) { \ + aio_poll(ctx_, true); \ + waited_ = true; \ } \ } else { \ assert(qemu_get_current_aio_context() == \ qemu_get_aio_context()); \ /* Increment wait_->num_waiters before evaluating cond. */ \ atomic_inc(&wait_->num_waiters); \ - while (busy_) { \ - if ((cond)) { \ - waited_ = busy_ = true; \ + while ((cond)) { \ + if (ctx_) { \ aio_context_release(ctx_); \ - aio_poll(qemu_get_aio_context(), true); \ + } \ + aio_poll(qemu_get_aio_context(), true); \ + if (ctx_) { \ aio_context_acquire(ctx_); \ - } else { \ - busy_ = aio_poll(ctx_, false); \ - waited_ |= busy_; \ } \ + waited_ = true; \ } \ atomic_dec(&wait_->num_waiters); \ } \ diff --git a/include/block/block.h b/include/block/block.h index e677080..b1d6fdb 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -421,6 +421,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device, Error **errp); bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); BlockDriverState *bdrv_next_node(BlockDriverState *bs); +BlockDriverState *bdrv_next_all_states(BlockDriverState *bs); typedef struct BdrvNextIterator { enum { @@ -557,7 +558,8 @@ void bdrv_io_unplug(BlockDriverState *bs); * Begin a quiesced section of all users of @bs. This is part of * bdrv_drained_begin. */ -void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents); /** * bdrv_parent_drained_end: @@ -565,7 +567,23 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); * End a quiesced section of all users of @bs. This is part of * bdrv_drained_end. */ -void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents); + +/** + * bdrv_drain_poll: + * + * Poll for pending requests in @bs, its parents (except for @ignore_parent), + * and if @recursive is true its children as well (used for subtree drain). + * + * If @ignore_bds_parents is true, parents that are BlockDriverStates must + * ignore the drain request because they will be drained separately (used for + * drain_all). + * + * This is part of bdrv_drained_begin. + */ +bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent, bool ignore_bds_parents); /** * bdrv_drained_begin: @@ -580,6 +598,15 @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); void bdrv_drained_begin(BlockDriverState *bs); /** + * bdrv_do_drained_begin_quiesce: + * + * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already + * running requests to complete. + */ +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + BdrvChild *parent, bool ignore_bds_parents); + +/** * Like bdrv_drained_begin, but recursively begins a quiesced section for * exclusive access to all child nodes as well. */ diff --git a/include/block/block_int.h b/include/block/block_int.h index 327e478..74646ed 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -577,6 +577,12 @@ struct BdrvChildRole { * points to. */ bool stay_at_node; + /* If true, the parent is a BlockDriverState and bdrv_next_all_states() + * will return it. This information is used for drain_all, where every node + * will be drained separately, so the drain only needs to be propagated to + * non-BDS parents. */ + bool parent_is_bds; + void (*inherit_options)(int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options); @@ -605,6 +611,13 @@ struct BdrvChildRole { void (*drained_begin)(BdrvChild *child); void (*drained_end)(BdrvChild *child); + /* + * Returns whether the parent has pending requests for the child. This + * callback is polled after .drained_begin() has been called until all + * activity on the child has stopped. + */ + bool (*drained_poll)(BdrvChild *child); + /* Notifies the parent that the child has been activated/inactivated (e.g. * when migration is completing) and it can start/stop requesting * permissions and doing I/O on it. */ @@ -841,6 +854,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); +extern unsigned int bdrv_drain_all_count; void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); @@ -1017,6 +1031,7 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, * @filter_node_name: The node name that should be assigned to the filter * driver that the mirror job inserts into the graph above @bs. NULL means that * a node name should be autogenerated. + * @copy_mode: When to trigger writes to the target. * @errp: Error object. * * Start a mirroring operation on @bs. Clusters that are allocated @@ -1030,7 +1045,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, Error **errp); + bool unmap, const char *filter_node_name, + MirrorCopyMode copy_mode, Error **errp); /* * backup_job_create: diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 5cd50c6..e4a318d 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -39,6 +39,14 @@ struct BlockJobDriver { JobDriver job_driver; /* + * Returns whether the job has pending requests for the child or will + * submit new requests before the next pause point. This callback is polled + * in the context of draining a job node after requesting that the job be + * paused, until all activity on the child has stopped. + */ + bool (*drained_poll)(BlockJob *job); + + /* * If the callback is not NULL, it will be invoked before the job is * resumed in a new AioContext. This is the place to move any resources * besides job->blk to the new AioContext. diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 02e0cba..288dc6a 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -82,6 +82,8 @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, int64_t offset, int64_t bytes); int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); +bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset, + uint64_t *offset, int *bytes); void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); diff --git a/include/hw/display/ramfb.h b/include/hw/display/ramfb.h new file mode 100644 index 0000000..b33a2c4 --- /dev/null +++ b/include/hw/display/ramfb.h @@ -0,0 +1,12 @@ +#ifndef RAMFB_H +#define RAMFB_H + +/* ramfb.c */ +typedef struct RAMFBState RAMFBState; +void ramfb_display_update(QemuConsole *con, RAMFBState *s); +RAMFBState *ramfb_setup(Error **errp); + +/* ramfb-standalone.c */ +#define TYPE_RAMFB_DEVICE "ramfb" + +#endif /* RAMFB_H */ diff --git a/include/hw/input/adb.h b/include/hw/input/adb.h index 3ae8445..f99d478 100644 --- a/include/hw/input/adb.h +++ b/include/hw/input/adb.h @@ -49,6 +49,7 @@ struct ADBDevice { int devaddr; int handler; + bool disable_direct_reg3_writes; }; #define ADB_DEVICE_CLASS(cls) \ diff --git a/include/hw/misc/macio/gpio.h b/include/hw/misc/macio/gpio.h new file mode 100644 index 0000000..2838ae5 --- /dev/null +++ b/include/hw/misc/macio/gpio.h @@ -0,0 +1,47 @@ +/* + * PowerMac NewWorld MacIO GPIO emulation + * + * Copyright (c) 2016 Benjamin Herrenschmidt + * Copyright (c) 2018 Mark Cave-Ayland + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MACIO_GPIO_H +#define MACIO_GPIO_H + +#define TYPE_MACIO_GPIO "macio-gpio" +#define MACIO_GPIO(obj) OBJECT_CHECK(MacIOGPIOState, (obj), TYPE_MACIO_GPIO) + +typedef struct MacIOGPIOState { + /*< private >*/ + SysBusDevice parent; + /*< public >*/ + + OpenPICState *pic; + + MemoryRegion gpiomem; + qemu_irq gpio_extirqs[10]; + uint8_t gpio_levels[8]; + uint8_t gpio_regs[36]; /* XXX Check count */ +} MacIOGPIOState; + +void macio_set_gpio(MacIOGPIOState *s, uint32_t gpio, bool state); + +#endif diff --git a/include/hw/misc/macio/macio.h b/include/hw/misc/macio/macio.h index 838eaf1..cfaa145 100644 --- a/include/hw/misc/macio/macio.h +++ b/include/hw/misc/macio/macio.h @@ -26,8 +26,11 @@ #ifndef MACIO_H #define MACIO_H +#include "hw/char/escc.h" #include "hw/intc/heathrow_pic.h" #include "hw/misc/macio/cuda.h" +#include "hw/misc/macio/gpio.h" +#include "hw/misc/macio/pmu.h" #include "hw/ppc/mac_dbdma.h" #include "hw/ppc/openpic.h" @@ -41,6 +44,7 @@ typedef struct MacIOState { MemoryRegion bar; CUDAState cuda; + PMUState pmu; DBDMAState dbdma; ESCCState escc; uint64_t frequency; @@ -70,8 +74,11 @@ typedef struct NewWorldMacIOState { MacIOState parent_obj; /*< public >*/ + bool has_pmu; + bool has_adb; OpenPICState *pic; MACIOIDEState ide[2]; + MacIOGPIOState gpio; } NewWorldMacIOState; #endif /* MACIO_H */ diff --git a/include/hw/misc/macio/pmu.h b/include/hw/misc/macio/pmu.h new file mode 100644 index 0000000..d10895b --- /dev/null +++ b/include/hw/misc/macio/pmu.h @@ -0,0 +1,237 @@ +/* + * Definitions for talking to the PMU. The PMU is a microcontroller + * which controls battery charging and system power on PowerBook 3400 + * and 2400 models as well as the RTC and various other things. + * + * Copyright (C) 1998 Paul Mackerras. + * Copyright (C) 2016 Ben Herrenschmidt + */ + +#ifndef PMU_H +#define PMU_H + +/* + * PMU commands + */ + +#define PMU_POWER_CTRL0 0x10 /* control power of some devices */ +#define PMU_POWER_CTRL 0x11 /* control power of some devices */ +#define PMU_ADB_CMD 0x20 /* send ADB packet */ +#define PMU_ADB_POLL_OFF 0x21 /* disable ADB auto-poll */ +#define PMU_WRITE_NVRAM 0x33 /* write non-volatile RAM */ +#define PMU_READ_NVRAM 0x3b /* read non-volatile RAM */ +#define PMU_SET_RTC 0x30 /* set real-time clock */ +#define PMU_READ_RTC 0x38 /* read real-time clock */ +#define PMU_SET_VOLBUTTON 0x40 /* set volume up/down position */ +#define PMU_BACKLIGHT_BRIGHT 0x41 /* set backlight brightness */ +#define PMU_GET_VOLBUTTON 0x48 /* get volume up/down position */ +#define PMU_PCEJECT 0x4c /* eject PC-card from slot */ +#define PMU_BATTERY_STATE 0x6b /* report battery state etc. */ +#define PMU_SMART_BATTERY_STATE 0x6f /* report battery state (new way) */ +#define PMU_SET_INTR_MASK 0x70 /* set PMU interrupt mask */ +#define PMU_INT_ACK 0x78 /* read interrupt bits */ +#define PMU_SHUTDOWN 0x7e /* turn power off */ +#define PMU_CPU_SPEED 0x7d /* control CPU speed on some models */ +#define PMU_SLEEP 0x7f /* put CPU to sleep */ +#define PMU_POWER_EVENTS 0x8f /* Send power-event commands to PMU */ +#define PMU_I2C_CMD 0x9a /* I2C operations */ +#define PMU_RESET 0xd0 /* reset CPU */ +#define PMU_GET_BRIGHTBUTTON 0xd9 /* report brightness up/down pos */ +#define PMU_GET_COVER 0xdc /* report cover open/closed */ +#define PMU_SYSTEM_READY 0xdf /* tell PMU we are awake */ +#define PMU_DOWNLOAD_STATUS 0xe2 /* Called by MacOS during boot... */ +#define PMU_READ_PMU_RAM 0xe8 /* read the PMU RAM... ??? */ +#define PMU_GET_VERSION 0xea /* read the PMU version */ + +/* Bits to use with the PMU_POWER_CTRL0 command */ +#define PMU_POW0_ON 0x80 /* OR this to power ON the device */ +#define PMU_POW0_OFF 0x00 /* leave bit 7 to 0 to power it OFF */ +#define PMU_POW0_HARD_DRIVE 0x04 /* Hard drive power + * (on wallstreet/lombard ?) */ + +/* Bits to use with the PMU_POWER_CTRL command */ +#define PMU_POW_ON 0x80 /* OR this to power ON the device */ +#define PMU_POW_OFF 0x00 /* leave bit 7 to 0 to power it OFF */ +#define PMU_POW_BACKLIGHT 0x01 /* backlight power */ +#define PMU_POW_CHARGER 0x02 /* battery charger power */ +#define PMU_POW_IRLED 0x04 /* IR led power (on wallstreet) */ +#define PMU_POW_MEDIABAY 0x08 /* media bay power + * (wallstreet/lombard ?) */ + +/* Bits in PMU interrupt and interrupt mask bytes */ +#define PMU_INT_PCEJECT 0x04 /* PC-card eject buttons */ +#define PMU_INT_SNDBRT 0x08 /* sound/brightness up/down buttons */ +#define PMU_INT_ADB 0x10 /* ADB autopoll or reply data */ +#define PMU_INT_BATTERY 0x20 /* Battery state change */ +#define PMU_INT_ENVIRONMENT 0x40 /* Environment interrupts */ +#define PMU_INT_TICK 0x80 /* 1-second tick interrupt */ + +/* Other bits in PMU interrupt valid when PMU_INT_ADB is set */ +#define PMU_INT_ADB_AUTO 0x04 /* ADB autopoll, when PMU_INT_ADB */ +#define PMU_INT_WAITING_CHARGER 0x01 /* ??? */ +#define PMU_INT_AUTO_SRQ_POLL 0x02 /* ??? */ + +/* Bits in the environement message (either obtained via PMU_GET_COVER, + * or via PMU_INT_ENVIRONMENT on core99 */ +#define PMU_ENV_LID_CLOSED 0x01 /* The lid is closed */ + +/* I2C related definitions */ +#define PMU_I2C_MODE_SIMPLE 0 +#define PMU_I2C_MODE_STDSUB 1 +#define PMU_I2C_MODE_COMBINED 2 + +#define PMU_I2C_BUS_STATUS 0 +#define PMU_I2C_BUS_SYSCLK 1 +#define PMU_I2C_BUS_POWER 2 + +#define PMU_I2C_STATUS_OK 0 +#define PMU_I2C_STATUS_DATAREAD 1 +#define PMU_I2C_STATUS_BUSY 0xfe + +/* Kind of PMU (model) */ +enum { + PMU_UNKNOWN, + PMU_OHARE_BASED, /* 2400, 3400, 3500 (old G3 powerbook) */ + PMU_HEATHROW_BASED, /* PowerBook G3 series */ + PMU_PADDINGTON_BASED, /* 1999 PowerBook G3 */ + PMU_KEYLARGO_BASED, /* Core99 motherboard (PMU99) */ + PMU_68K_V1, /* 68K PMU, version 1 */ + PMU_68K_V2, /* 68K PMU, version 2 */ +}; + +/* PMU PMU_POWER_EVENTS commands */ +enum { + PMU_PWR_GET_POWERUP_EVENTS = 0x00, + PMU_PWR_SET_POWERUP_EVENTS = 0x01, + PMU_PWR_CLR_POWERUP_EVENTS = 0x02, + PMU_PWR_GET_WAKEUP_EVENTS = 0x03, + PMU_PWR_SET_WAKEUP_EVENTS = 0x04, + PMU_PWR_CLR_WAKEUP_EVENTS = 0x05, +}; + +/* Power events wakeup bits */ +enum { + PMU_PWR_WAKEUP_KEY = 0x01, /* Wake on key press */ + PMU_PWR_WAKEUP_AC_INSERT = 0x02, /* Wake on AC adapter plug */ + PMU_PWR_WAKEUP_AC_CHANGE = 0x04, + PMU_PWR_WAKEUP_LID_OPEN = 0x08, + PMU_PWR_WAKEUP_RING = 0x10, +}; + +/* + * This table indicates for each PMU opcode: + * - the number of data bytes to be sent with the command, or -1 + * if a length byte should be sent, + * - the number of response bytes which the PMU will return, or + * -1 if it will send a length byte. + */ + +static const int8_t pmu_data_len[256][2] = { +/* 0 1 2 3 4 5 6 7 */ + {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + { 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 1},{ 0, 1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{ 0, 0}, + {-1, 0},{ 0, 0},{ 2, 0},{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, -1},{ 0, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{ 0, -1}, + { 4, 0},{20, 0},{-1, 0},{ 3, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 4},{ 0, 20},{ 2, -1},{ 2, 1},{ 3, -1},{-1, -1},{-1, -1},{ 4, 0}, + { 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 1},{ 0, 1},{-1, -1},{ 1, 0},{ 1, 0},{-1, -1},{-1, -1},{-1, -1}, + { 1, 0},{ 0, 0},{ 2, 0},{ 2, 0},{-1, 0},{ 1, 0},{ 3, 0},{ 1, 0}, + { 0, 1},{ 1, 0},{ 0, 2},{ 0, 2},{ 0, -1},{-1, -1},{-1, -1},{-1, -1}, + { 2, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 3},{ 0, 3},{ 0, 2},{ 0, 8},{ 0, -1},{ 0, -1},{-1, -1},{-1, -1}, + { 1, 0},{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, -1},{ 0, -1},{-1, -1},{-1, -1},{-1, -1},{ 5, 1},{ 4, 1},{ 4, 1}, + { 4, 0},{-1, 0},{ 0, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 5},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + { 1, 0},{ 2, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 1},{ 0, 1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + { 2, 0},{ 2, 0},{ 2, 0},{ 4, 0},{-1, 0},{ 0, 0},{-1, 0},{-1, 0}, + { 1, 1},{ 1, 0},{ 3, 0},{ 2, 0},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + { 0, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 1, 1},{ 1, 1},{-1, -1},{-1, -1},{ 0, 1},{ 0, -1},{-1, -1},{-1, -1}, + {-1, 0},{ 4, 0},{ 0, 1},{-1, 0},{-1, 0},{ 4, 0},{-1, 0},{-1, 0}, + { 3, -1},{-1, -1},{ 0, 1},{-1, -1},{ 0, -1},{-1, -1},{-1, -1},{ 0, 0}, + {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, +}; + +/* Command protocol state machine */ +typedef enum { + pmu_state_idle, /* Waiting for command */ + pmu_state_cmd, /* Receiving command */ + pmu_state_rsp, /* Responding to command */ +} PMUCmdState; + +/* MOS6522 PMU */ +typedef struct MOS6522PMUState { + /*< private >*/ + MOS6522State parent_obj; +} MOS6522PMUState; + +#define TYPE_MOS6522_PMU "mos6522-pmu" +#define MOS6522_PMU(obj) OBJECT_CHECK(MOS6522PMUState, (obj), \ + TYPE_MOS6522_PMU) +/** + * PMUState: + * @last_b: last value of B register + */ + +typedef struct PMUState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion mem; + uint64_t frequency; + qemu_irq via_irq; + bool via_irq_state; + + /* PMU state */ + MOS6522PMUState mos6522_pmu; + + /* PMU low level protocol state */ + PMUCmdState cmd_state; + uint8_t last_b; + uint8_t cmd; + uint32_t cmdlen; + uint32_t rsplen; + uint8_t cmd_buf_pos; + uint8_t cmd_buf[128]; + uint8_t cmd_rsp_pos; + uint8_t cmd_rsp_sz; + uint8_t cmd_rsp[128]; + + /* PMU events/interrupts */ + uint8_t intbits; + uint8_t intmask; + + /* ADB */ + bool has_adb; + ADBBusState adb_bus; + uint16_t adb_poll_mask; + uint8_t autopoll_rate_ms; + uint8_t autopoll_mask; + QEMUTimer *adb_poll_timer; + uint8_t adb_reply_size; + uint8_t adb_reply[ADB_MAX_OUT_LEN]; + + /* RTC */ + uint32_t tick_offset; + QEMUTimer *one_sec_timer; + int64_t one_sec_target; + + /* GPIO */ + MacIOGPIOState *gpio; +} PMUState; + +#define TYPE_VIA_PMU "via-pmu" +#define VIA_PMU(obj) OBJECT_CHECK(PMUState, (obj), TYPE_VIA_PMU) + +#endif /* PMU_H */ diff --git a/include/hw/misc/mos6522.h b/include/hw/misc/mos6522.h index f52b419..03d9f0c 100644 --- a/include/hw/misc/mos6522.h +++ b/include/hw/misc/mos6522.h @@ -134,6 +134,7 @@ typedef struct MOS6522DeviceClass { void (*set_sr_int)(MOS6522State *dev); void (*portB_write)(MOS6522State *dev); void (*portA_write)(MOS6522State *dev); + void (*update_irq)(MOS6522State *dev); /* These are used to influence the CUDA MacOS timebase calibration */ uint64_t (*get_timer1_counter_value)(MOS6522State *dev, MOS6522Timer *ti); uint64_t (*get_timer2_counter_value)(MOS6522State *dev, MOS6522Timer *ti); diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index e337af7..447ae76 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -34,7 +34,7 @@ typedef struct PnvCore { CPUCore parent_obj; /*< public >*/ - void *threads; + PowerPCCPU **threads; uint32_t pir; MemoryRegion xscom_regs; diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h index b18ef3e..298ec35 100644 --- a/include/hw/ppc/ppc.h +++ b/include/hw/ppc/ppc.h @@ -101,6 +101,7 @@ enum { #define FW_CFG_PPC_NVRAM_ADDR (FW_CFG_ARCH_LOCAL + 0x08) #define FW_CFG_PPC_BUSFREQ (FW_CFG_ARCH_LOCAL + 0x09) #define FW_CFG_PPC_NVRAM_FLAT (FW_CFG_ARCH_LOCAL + 0x0a) +#define FW_CFG_PPC_VIACONFIG (FW_CFG_ARCH_LOCAL + 0x0b) #define PPC_SERIAL_MM_BAUDBASE 399193 diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h index 47dcfda..8ceea29 100644 --- a/include/hw/ppc/spapr_cpu_core.h +++ b/include/hw/ppc/spapr_cpu_core.h @@ -41,4 +41,15 @@ typedef struct sPAPRCPUCoreClass { const char *spapr_get_cpu_core_type(const char *cpu_type); void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip, target_ulong r3); +typedef struct sPAPRCPUState { + uint64_t vpa_addr; + uint64_t slb_shadow_addr, slb_shadow_size; + uint64_t dtl_addr, dtl_size; +} sPAPRCPUState; + +static inline sPAPRCPUState *spapr_cpu_state(PowerPCCPU *cpu) +{ + return (sPAPRCPUState *)cpu->machine_data; +} + #endif diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h index 6b6490e..ddca52c 100644 --- a/include/qemu/hbitmap.h +++ b/include/qemu/hbitmap.h @@ -324,11 +324,14 @@ void hbitmap_free_meta(HBitmap *hb); /** * hbitmap_iter_next: * @hbi: HBitmapIter to operate on. + * @advance: If true, advance the iterator. Otherwise, the next call + * of this function will return the same result (if that + * position is still dirty). * * Return the next bit that is set in @hbi's associated HBitmap, * or -1 if all remaining bits are zero. */ -int64_t hbitmap_iter_next(HBitmapIter *hbi); +int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance); /** * hbitmap_iter_next_word: diff --git a/include/qemu/job.h b/include/qemu/job.h index 1d82053..18c9223 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -335,6 +335,21 @@ void job_progress_update(Job *job, uint64_t done); */ void job_progress_set_remaining(Job *job, uint64_t remaining); +/** + * @job: The job whose expected progress end value is updated + * @delta: Value which is to be added to the current expected end + * value + * + * Increases the expected end value of the progress counter of a job. + * This is useful for parenthesis operations: If a job has to + * conditionally perform a high-priority operation as part of its + * progress, it calls this function with the expected operation's + * length before, and job_progress_update() afterwards. + * (So the operation acts as a parenthesis in regards to the main job + * operation running in background.) + */ +void job_progress_increase_remaining(Job *job, uint64_t delta); + /** To be called when a cancelled job is finalised. */ void job_event_cancelled(Job *job); @@ -385,6 +385,11 @@ void job_progress_set_remaining(Job *job, uint64_t remaining) job->progress_total = job->progress_current + remaining; } +void job_progress_increase_remaining(Job *job, uint64_t delta) +{ + job->progress_total += delta; +} + void job_event_cancelled(Job *job) { notifier_list_notify(&job->on_finalize_cancelled, job); @@ -207,22 +207,35 @@ struct Monitor { bool skip_flush; bool use_io_thr; - /* We can't access guest memory when holding the lock */ - QemuMutex out_lock; - QString *outbuf; - guint out_watch; - - /* Read under either BQL or out_lock, written with BQL+out_lock. */ - int mux_out; - + /* + * State used only in the thread "owning" the monitor. + * If @use_io_thr, this is mon_global.mon_iothread. + * Else, it's the main thread. + * These members can be safely accessed without locks. + */ ReadLineState *rs; + MonitorQMP qmp; gchar *mon_cpu_path; BlockCompletionFunc *password_completion_cb; void *password_opaque; mon_cmd_t *cmd_table; - QLIST_HEAD(,mon_fd_t) fds; QTAILQ_ENTRY(Monitor) entry; + + /* + * The per-monitor lock. We can't access guest memory when holding + * the lock. + */ + QemuMutex mon_lock; + + /* + * Fields that are protected by the per-monitor lock. + */ + QLIST_HEAD(, mon_fd_t) fds; + QString *outbuf; + guint out_watch; + /* Read under either BQL or mon_lock, written with BQL+mon_lock. */ + int mux_out; }; /* Let's add monitor global variables to this struct. */ @@ -253,11 +266,15 @@ typedef struct QMPRequest QMPRequest; /* QMP checker flags */ #define QMP_ACCEPT_UNKNOWNS 1 -/* Protects mon_list, monitor_event_state. */ +/* Protects mon_list, monitor_qapi_event_state. */ static QemuMutex monitor_lock; - +static GHashTable *monitor_qapi_event_state; static QTAILQ_HEAD(mon_list, Monitor) mon_list; + +/* Protects mon_fdsets */ +static QemuMutex mon_fdsets_lock; static QLIST_HEAD(mon_fdsets, MonFdset) mon_fdsets; + static int mon_refcount; static mon_cmd_t mon_cmds[]; @@ -267,8 +284,6 @@ QmpCommandList qmp_commands, qmp_cap_negotiation_commands; Monitor *cur_mon; -static QEMUClockType event_clock_type = QEMU_CLOCK_REALTIME; - static void monitor_command_cb(void *opaque, const char *cmdline, void *readline_opaque); @@ -295,6 +310,19 @@ static inline bool monitor_is_hmp_non_interactive(const Monitor *mon) return !monitor_is_qmp(mon) && !monitor_uses_readline(mon); } +/* + * Return the clock to use for recording an event's time. + * Beware: result is invalid before configure_accelerator(). + */ +static inline QEMUClockType monitor_get_event_clock(void) +{ + /* + * This allows us to perform tests on the monitor queues to verify + * that the rate limits are enforced. + */ + return qtest_enabled() ? QEMU_CLOCK_VIRTUAL : QEMU_CLOCK_REALTIME; +} + /** * Is the current monitor, if any, a QMP monitor? */ @@ -365,14 +393,14 @@ static gboolean monitor_unblocked(GIOChannel *chan, GIOCondition cond, { Monitor *mon = opaque; - qemu_mutex_lock(&mon->out_lock); + qemu_mutex_lock(&mon->mon_lock); mon->out_watch = 0; monitor_flush_locked(mon); - qemu_mutex_unlock(&mon->out_lock); + qemu_mutex_unlock(&mon->mon_lock); return FALSE; } -/* Called with mon->out_lock held. */ +/* Called with mon->mon_lock held. */ static void monitor_flush_locked(Monitor *mon) { int rc; @@ -410,9 +438,9 @@ static void monitor_flush_locked(Monitor *mon) void monitor_flush(Monitor *mon) { - qemu_mutex_lock(&mon->out_lock); + qemu_mutex_lock(&mon->mon_lock); monitor_flush_locked(mon); - qemu_mutex_unlock(&mon->out_lock); + qemu_mutex_unlock(&mon->mon_lock); } /* flush at every end of line */ @@ -420,7 +448,7 @@ static void monitor_puts(Monitor *mon, const char *str) { char c; - qemu_mutex_lock(&mon->out_lock); + qemu_mutex_lock(&mon->mon_lock); for(;;) { c = *str++; if (c == '\0') @@ -433,7 +461,7 @@ static void monitor_puts(Monitor *mon, const char *str) monitor_flush_locked(mon); } } - qemu_mutex_unlock(&mon->out_lock); + qemu_mutex_unlock(&mon->mon_lock); } void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap) @@ -558,8 +586,6 @@ static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = { [QAPI_EVENT_VSERPORT_CHANGE] = { 1000 * SCALE_MS }, }; -GHashTable *monitor_qapi_event_state; - /* * Emits the event to every monitor instance, @event is only used for trace * Called with monitor_lock held. @@ -620,7 +646,7 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp) * monitor_qapi_event_handler() in evconf->rate ns. Any * events arriving before then will be delayed until then. */ - int64_t now = qemu_clock_get_ns(event_clock_type); + int64_t now = qemu_clock_get_ns(monitor_get_event_clock()); monitor_qapi_event_emit(event, qdict); @@ -628,7 +654,7 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp) evstate->event = event; evstate->data = qobject_ref(data); evstate->qdict = NULL; - evstate->timer = timer_new_ns(event_clock_type, + evstate->timer = timer_new_ns(monitor_get_event_clock(), monitor_qapi_event_handler, evstate); g_hash_table_add(monitor_qapi_event_state, evstate); @@ -653,7 +679,7 @@ static void monitor_qapi_event_handler(void *opaque) qemu_mutex_lock(&monitor_lock); if (evstate->qdict) { - int64_t now = qemu_clock_get_ns(event_clock_type); + int64_t now = qemu_clock_get_ns(monitor_get_event_clock()); monitor_qapi_event_emit(evstate->event, evstate->qdict); qobject_unref(evstate->qdict); @@ -709,10 +735,6 @@ static gboolean qapi_event_throttle_equal(const void *a, const void *b) static void monitor_qapi_event_init(void) { - if (qtest_enabled()) { - event_clock_type = QEMU_CLOCK_VIRTUAL; - } - monitor_qapi_event_state = g_hash_table_new(qapi_event_throttle_hash, qapi_event_throttle_equal); qmp_event_set_func_emit(monitor_qapi_event_queue); @@ -724,7 +746,7 @@ static void monitor_data_init(Monitor *mon, bool skip_flush, bool use_io_thr) { memset(mon, 0, sizeof(Monitor)); - qemu_mutex_init(&mon->out_lock); + qemu_mutex_init(&mon->mon_lock); qemu_mutex_init(&mon->qmp.qmp_queue_lock); mon->outbuf = qstring_new(); /* Use *mon_cmds by default. */ @@ -744,7 +766,7 @@ static void monitor_data_destroy(Monitor *mon) } readline_free(mon->rs); qobject_unref(mon->outbuf); - qemu_mutex_destroy(&mon->out_lock); + qemu_mutex_destroy(&mon->mon_lock); qemu_mutex_destroy(&mon->qmp.qmp_queue_lock); monitor_qmp_cleanup_req_queue_locked(mon); monitor_qmp_cleanup_resp_queue_locked(mon); @@ -776,13 +798,13 @@ char *qmp_human_monitor_command(const char *command_line, bool has_cpu_index, handle_hmp_command(&hmp, command_line); cur_mon = old_mon; - qemu_mutex_lock(&hmp.out_lock); + qemu_mutex_lock(&hmp.mon_lock); if (qstring_get_length(hmp.outbuf) > 0) { output = g_strdup(qstring_get_str(hmp.outbuf)); } else { output = g_strdup(""); } - qemu_mutex_unlock(&hmp.out_lock); + qemu_mutex_unlock(&hmp.mon_lock); out: monitor_data_destroy(&hmp); @@ -1306,7 +1328,7 @@ void qmp_qmp_capabilities(bool has_enable, QMPCapabilityList *enable, cur_mon->qmp.commands = &qmp_commands; } -/* set the current CPU defined by the user */ +/* Set the current CPU defined by the user. Callers must hold BQL. */ int monitor_set_cpu(int cpu_index) { CPUState *cpu; @@ -1320,6 +1342,7 @@ int monitor_set_cpu(int cpu_index) return 0; } +/* Callers must hold BQL. */ static CPUState *mon_get_cpu_sync(bool synchronize) { CPUState *cpu; @@ -2182,7 +2205,7 @@ static void hmp_acl_remove(Monitor *mon, const QDict *qdict) void qmp_getfd(const char *fdname, Error **errp) { mon_fd_t *monfd; - int fd; + int fd, tmp_fd; fd = qemu_chr_fe_get_msgfd(&cur_mon->chr); if (fd == -1) { @@ -2197,13 +2220,17 @@ void qmp_getfd(const char *fdname, Error **errp) return; } + qemu_mutex_lock(&cur_mon->mon_lock); QLIST_FOREACH(monfd, &cur_mon->fds, next) { if (strcmp(monfd->name, fdname) != 0) { continue; } - close(monfd->fd); + tmp_fd = monfd->fd; monfd->fd = fd; + qemu_mutex_unlock(&cur_mon->mon_lock); + /* Make sure close() is out of critical section */ + close(tmp_fd); return; } @@ -2212,24 +2239,31 @@ void qmp_getfd(const char *fdname, Error **errp) monfd->fd = fd; QLIST_INSERT_HEAD(&cur_mon->fds, monfd, next); + qemu_mutex_unlock(&cur_mon->mon_lock); } void qmp_closefd(const char *fdname, Error **errp) { mon_fd_t *monfd; + int tmp_fd; + qemu_mutex_lock(&cur_mon->mon_lock); QLIST_FOREACH(monfd, &cur_mon->fds, next) { if (strcmp(monfd->name, fdname) != 0) { continue; } QLIST_REMOVE(monfd, next); - close(monfd->fd); + tmp_fd = monfd->fd; g_free(monfd->name); g_free(monfd); + qemu_mutex_unlock(&cur_mon->mon_lock); + /* Make sure close() is out of critical section */ + close(tmp_fd); return; } + qemu_mutex_unlock(&cur_mon->mon_lock); error_setg(errp, QERR_FD_NOT_FOUND, fdname); } @@ -2237,6 +2271,7 @@ int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp) { mon_fd_t *monfd; + qemu_mutex_lock(&mon->mon_lock); QLIST_FOREACH(monfd, &mon->fds, next) { int fd; @@ -2250,10 +2285,12 @@ int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp) QLIST_REMOVE(monfd, next); g_free(monfd->name); g_free(monfd); + qemu_mutex_unlock(&mon->mon_lock); return fd; } + qemu_mutex_unlock(&mon->mon_lock); error_setg(errp, "File descriptor named '%s' has not been found", fdname); return -1; } @@ -2285,9 +2322,11 @@ static void monitor_fdsets_cleanup(void) MonFdset *mon_fdset; MonFdset *mon_fdset_next; + qemu_mutex_lock(&mon_fdsets_lock); QLIST_FOREACH_SAFE(mon_fdset, &mon_fdsets, next, mon_fdset_next) { monitor_fdset_cleanup(mon_fdset); } + qemu_mutex_unlock(&mon_fdsets_lock); } AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque, @@ -2322,6 +2361,7 @@ void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t fd, Error **errp) MonFdsetFd *mon_fdset_fd; char fd_str[60]; + qemu_mutex_lock(&mon_fdsets_lock); QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { if (mon_fdset->id != fdset_id) { continue; @@ -2341,10 +2381,12 @@ void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t fd, Error **errp) goto error; } monitor_fdset_cleanup(mon_fdset); + qemu_mutex_unlock(&mon_fdsets_lock); return; } error: + qemu_mutex_unlock(&mon_fdsets_lock); if (has_fd) { snprintf(fd_str, sizeof(fd_str), "fdset-id:%" PRId64 ", fd:%" PRId64, fdset_id, fd); @@ -2360,6 +2402,7 @@ FdsetInfoList *qmp_query_fdsets(Error **errp) MonFdsetFd *mon_fdset_fd; FdsetInfoList *fdset_list = NULL; + qemu_mutex_lock(&mon_fdsets_lock); QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { FdsetInfoList *fdset_info = g_malloc0(sizeof(*fdset_info)); FdsetFdInfoList *fdsetfd_list = NULL; @@ -2389,6 +2432,7 @@ FdsetInfoList *qmp_query_fdsets(Error **errp) fdset_info->next = fdset_list; fdset_list = fdset_info; } + qemu_mutex_unlock(&mon_fdsets_lock); return fdset_list; } @@ -2401,6 +2445,7 @@ AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id, MonFdsetFd *mon_fdset_fd; AddfdInfo *fdinfo; + qemu_mutex_lock(&mon_fdsets_lock); if (has_fdset_id) { QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { /* Break if match found or match impossible due to ordering by ID */ @@ -2421,6 +2466,7 @@ AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id, if (fdset_id < 0) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id", "a non-negative value"); + qemu_mutex_unlock(&mon_fdsets_lock); return NULL; } /* Use specified fdset ID */ @@ -2471,16 +2517,21 @@ AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id, fdinfo->fdset_id = mon_fdset->id; fdinfo->fd = mon_fdset_fd->fd; + qemu_mutex_unlock(&mon_fdsets_lock); return fdinfo; } int monitor_fdset_get_fd(int64_t fdset_id, int flags) { -#ifndef _WIN32 +#ifdef _WIN32 + return -ENOENT; +#else MonFdset *mon_fdset; MonFdsetFd *mon_fdset_fd; int mon_fd_flags; + int ret; + qemu_mutex_lock(&mon_fdsets_lock); QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { if (mon_fdset->id != fdset_id) { continue; @@ -2488,20 +2539,24 @@ int monitor_fdset_get_fd(int64_t fdset_id, int flags) QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) { mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL); if (mon_fd_flags == -1) { - return -1; + ret = -errno; + goto out; } if ((flags & O_ACCMODE) == (mon_fd_flags & O_ACCMODE)) { - return mon_fdset_fd->fd; + ret = mon_fdset_fd->fd; + goto out; } } - errno = EACCES; - return -1; + ret = -EACCES; + goto out; } -#endif + ret = -ENOENT; - errno = ENOENT; - return -1; +out: + qemu_mutex_unlock(&mon_fdsets_lock); + return ret; +#endif } int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd) @@ -2509,20 +2564,25 @@ int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd) MonFdset *mon_fdset; MonFdsetFd *mon_fdset_fd_dup; + qemu_mutex_lock(&mon_fdsets_lock); QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { if (mon_fdset->id != fdset_id) { continue; } QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) { if (mon_fdset_fd_dup->fd == dup_fd) { - return -1; + goto err; } } mon_fdset_fd_dup = g_malloc0(sizeof(*mon_fdset_fd_dup)); mon_fdset_fd_dup->fd = dup_fd; QLIST_INSERT_HEAD(&mon_fdset->dup_fds, mon_fdset_fd_dup, next); + qemu_mutex_unlock(&mon_fdsets_lock); return 0; } + +err: + qemu_mutex_unlock(&mon_fdsets_lock); return -1; } @@ -2531,6 +2591,7 @@ static int monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove) MonFdset *mon_fdset; MonFdsetFd *mon_fdset_fd_dup; + qemu_mutex_lock(&mon_fdsets_lock); QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) { if (mon_fdset_fd_dup->fd == dup_fd) { @@ -2539,13 +2600,17 @@ static int monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove) if (QLIST_EMPTY(&mon_fdset->dup_fds)) { monitor_fdset_cleanup(mon_fdset); } - return -1; + goto err; } else { + qemu_mutex_unlock(&mon_fdsets_lock); return mon_fdset->id; } } } } + +err: + qemu_mutex_unlock(&mon_fdsets_lock); return -1; } @@ -4381,9 +4446,9 @@ static void monitor_event(void *opaque, int event) switch (event) { case CHR_EVENT_MUX_IN: - qemu_mutex_lock(&mon->out_lock); + qemu_mutex_lock(&mon->mon_lock); mon->mux_out = 0; - qemu_mutex_unlock(&mon->out_lock); + qemu_mutex_unlock(&mon->mon_lock); if (mon->reset_seen) { readline_restart(mon->rs); monitor_resume(mon); @@ -4403,9 +4468,9 @@ static void monitor_event(void *opaque, int event) } else { atomic_inc(&mon->suspend_cnt); } - qemu_mutex_lock(&mon->out_lock); + qemu_mutex_lock(&mon->mon_lock); mon->mux_out = 1; - qemu_mutex_unlock(&mon->out_lock); + qemu_mutex_unlock(&mon->mon_lock); break; case CHR_EVENT_OPENED: @@ -4485,6 +4550,7 @@ void monitor_init_globals(void) monitor_qapi_event_init(); sortcmdlist(); qemu_mutex_init(&monitor_lock); + qemu_mutex_init(&mon_fdsets_lock); monitor_iothread_init(); } diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc Binary files differindex af70c36..a39cbe5 100644 --- a/pc-bios/openbios-ppc +++ b/pc-bios/openbios-ppc diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32 Binary files differindex 9e736e8..7163ba8 100644 --- a/pc-bios/openbios-sparc32 +++ b/pc-bios/openbios-sparc32 diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64 Binary files differindex 82ea0f8..0a9a338 100644 --- a/pc-bios/openbios-sparc64 +++ b/pc-bios/openbios-sparc64 diff --git a/qapi/block-core.json b/qapi/block-core.json index ab629d1..cc3ede0 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1051,6 +1051,24 @@ 'data': ['top', 'full', 'none', 'incremental'] } ## +# @MirrorCopyMode: +# +# An enumeration whose values tell the mirror block job when to +# trigger writes to the target. +# +# @background: copy data in background only. +# +# @write-blocking: when data is written to the source, write it +# (synchronously) to the target as well. In +# addition, data is copied in background just like in +# @background mode. +# +# Since: 3.0 +## +{ 'enum': 'MirrorCopyMode', + 'data': ['background', 'write-blocking'] } + +## # @BlockJobInfo: # # Information about a long-running block device operation. @@ -1692,6 +1710,9 @@ # written. Both will result in identical contents. # Default is true. (Since 2.4) # +# @copy-mode: when to copy data to the destination; defaults to 'background' +# (Since: 3.0) +# # Since: 1.3 ## { 'struct': 'DriveMirror', @@ -1701,7 +1722,7 @@ '*speed': 'int', '*granularity': 'uint32', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError', - '*unmap': 'bool' } } + '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode' } } ## # @BlockDirtyBitmap: @@ -1964,6 +1985,9 @@ # above @device. If this option is not given, a node name is # autogenerated. (Since: 2.9) # +# @copy-mode: when to copy data to the destination; defaults to 'background' +# (Since: 3.0) +# # Returns: nothing on success. # # Since: 2.6 @@ -1984,7 +2008,8 @@ '*speed': 'int', '*granularity': 'uint32', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError', - '*filter-node-name': 'str' } } + '*filter-node-name': 'str', + '*copy-mode': 'MirrorCopyMode' } } ## # @block_set_io_throttle: diff --git a/roms/openbios b/roms/openbios -Subproject 54d959d97fb331708767b2fd4a878efd2bbc41b +Subproject 8fe6f5f96f6ca39f1f62200be7fa130e929f13f diff --git a/scripts/qemu.py b/scripts/qemu.py index 08a3e9a..f099ce7 100644 --- a/scripts/qemu.py +++ b/scripts/qemu.py @@ -17,19 +17,41 @@ import logging import os import subprocess import qmp.qmp +import re import shutil +import socket import tempfile LOG = logging.getLogger(__name__) +#: Maps machine types to the preferred console device types +CONSOLE_DEV_TYPES = { + r'^clipper$': 'isa-serial', + r'^malta': 'isa-serial', + r'^(pc.*|q35.*|isapc)$': 'isa-serial', + r'^(40p|powernv|prep)$': 'isa-serial', + r'^pseries.*': 'spapr-vty', + r'^s390-ccw-virtio.*': 'sclpconsole', + } + + class QEMUMachineError(Exception): """ Exception called when an error in QEMUMachine happens. """ +class QEMUMachineAddDeviceError(QEMUMachineError): + """ + Exception raised when a request to add a device can not be fulfilled + + The failures are caused by limitations, lack of information or conflicting + requests on the QEMUMachine methods. This exception does not represent + failures reported by the QEMU binary itself. + """ + class MonitorResponseError(qmp.qmp.QMPError): ''' Represents erroneous QMP monitor reply @@ -91,6 +113,10 @@ class QEMUMachine(object): self._test_dir = test_dir self._temp_dir = None self._launched = False + self._machine = None + self._console_device_type = None + self._console_address = None + self._console_socket = None # just in case logging wasn't configured by the main script: logging.basicConfig() @@ -175,9 +201,19 @@ class QEMUMachine(object): self._monitor_address[1]) else: moncdev = 'socket,id=mon,path=%s' % self._vm_monitor - return ['-chardev', moncdev, + args = ['-chardev', moncdev, '-mon', 'chardev=mon,mode=control', '-display', 'none', '-vga', 'none'] + if self._machine is not None: + args.extend(['-machine', self._machine]) + if self._console_device_type is not None: + self._console_address = os.path.join(self._temp_dir, + self._name + "-console.sock") + chardev = ('socket,id=console,path=%s,server,nowait' % + self._console_address) + device = '%s,chardev=console' % self._console_device_type + args.extend(['-chardev', chardev, '-device', device]) + return args def _pre_launch(self): self._temp_dir = tempfile.mkdtemp(dir=self._test_dir) @@ -202,6 +238,10 @@ class QEMUMachine(object): self._qemu_log_path = None + if self._console_socket is not None: + self._console_socket.close() + self._console_socket = None + if self._temp_dir is not None: shutil.rmtree(self._temp_dir) self._temp_dir = None @@ -359,3 +399,64 @@ class QEMUMachine(object): of the qemu process. ''' return self._iolog + + def add_args(self, *args): + ''' + Adds to the list of extra arguments to be given to the QEMU binary + ''' + self._args.extend(args) + + def set_machine(self, machine_type): + ''' + Sets the machine type + + If set, the machine type will be added to the base arguments + of the resulting QEMU command line. + ''' + self._machine = machine_type + + def set_console(self, device_type=None): + ''' + Sets the device type for a console device + + If set, the console device and a backing character device will + be added to the base arguments of the resulting QEMU command + line. + + This is a convenience method that will either use the provided + device type, of if not given, it will used the device type set + on CONSOLE_DEV_TYPES. + + The actual setting of command line arguments will be be done at + machine launch time, as it depends on the temporary directory + to be created. + + @param device_type: the device type, such as "isa-serial" + @raises: QEMUMachineAddDeviceError if the device type is not given + and can not be determined. + ''' + if device_type is None: + if self._machine is None: + raise QEMUMachineAddDeviceError("Can not add a console device:" + " QEMU instance without a " + "defined machine type") + for regex, device in CONSOLE_DEV_TYPES.items(): + if re.match(regex, self._machine): + device_type = device + break + if device_type is None: + raise QEMUMachineAddDeviceError("Can not add a console device:" + " no matching console device " + "type definition") + self._console_device_type = device_type + + @property + def console_socket(self): + """ + Returns a socket connected to the console + """ + if self._console_socket is None: + self._console_socket = socket.socket(socket.AF_UNIX, + socket.SOCK_STREAM) + self._console_socket.connect(self._console_address) + return self._console_socket diff --git a/stubs/fdset.c b/stubs/fdset.c index 6020cf2..4f3edf2 100644 --- a/stubs/fdset.c +++ b/stubs/fdset.c @@ -14,7 +14,7 @@ int monitor_fdset_dup_fd_find(int dup_fd) int monitor_fdset_get_fd(int64_t fdset_id, int flags) { - return -1; + return -ENOENT; } void monitor_fdset_dup_fd_remove(int dupfd) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 0247c1f..874da6e 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1091,12 +1091,6 @@ struct CPUPPCState { target_ulong rmls; #endif -#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) - uint64_t vpa_addr; - uint64_t slb_shadow_addr, slb_shadow_size; - uint64_t dtl_addr, dtl_size; -#endif /* TARGET_PPC64 */ - int error_code; uint32_t pending_interrupts; #if !defined(CONFIG_USER_ONLY) @@ -1205,6 +1199,7 @@ struct PowerPCCPU { uint32_t compat_pvr; PPCVirtualHypervisor *vhyp; Object *intc; + void *machine_data; int32_t node_id; /* NUMA node this CPU belongs to */ PPCHash64Options *hash64_opts; @@ -1300,8 +1295,6 @@ void ppc_store_ptcr(CPUPPCState *env, target_ulong value); void ppc_store_msr (CPUPPCState *env, target_ulong value); void ppc_cpu_list (FILE *f, fprintf_function cpu_fprintf); -#if defined(TARGET_PPC64) -#endif /* Time-base and decrementer management */ #ifndef NO_CPU_IO_DEFS diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 2c0c34e..5c0e313 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -829,22 +829,22 @@ static int kvm_get_fp(CPUState *cs) static int kvm_get_vpa(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; + sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); struct kvm_one_reg reg; int ret; reg.id = KVM_REG_PPC_VPA_ADDR; - reg.addr = (uintptr_t)&env->vpa_addr; + reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret < 0) { DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); return ret; } - assert((uintptr_t)&env->slb_shadow_size - == ((uintptr_t)&env->slb_shadow_addr + 8)); + assert((uintptr_t)&spapr_cpu->slb_shadow_size + == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); reg.id = KVM_REG_PPC_VPA_SLB; - reg.addr = (uintptr_t)&env->slb_shadow_addr; + reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret < 0) { DPRINTF("Unable to get SLB shadow state from KVM: %s\n", @@ -852,9 +852,10 @@ static int kvm_get_vpa(CPUState *cs) return ret; } - assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); + assert((uintptr_t)&spapr_cpu->dtl_size + == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); reg.id = KVM_REG_PPC_VPA_DTL; - reg.addr = (uintptr_t)&env->dtl_addr; + reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret < 0) { DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", @@ -868,7 +869,7 @@ static int kvm_get_vpa(CPUState *cs) static int kvm_put_vpa(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; + sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); struct kvm_one_reg reg; int ret; @@ -876,11 +877,12 @@ static int kvm_put_vpa(CPUState *cs) * registered. That means when restoring state, if a VPA *is* * registered, we need to set that up first. If not, we need to * deregister the others before deregistering the master VPA */ - assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); + assert(spapr_cpu->vpa_addr + || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr)); - if (env->vpa_addr) { + if (spapr_cpu->vpa_addr) { reg.id = KVM_REG_PPC_VPA_ADDR; - reg.addr = (uintptr_t)&env->vpa_addr; + reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); @@ -888,19 +890,20 @@ static int kvm_put_vpa(CPUState *cs) } } - assert((uintptr_t)&env->slb_shadow_size - == ((uintptr_t)&env->slb_shadow_addr + 8)); + assert((uintptr_t)&spapr_cpu->slb_shadow_size + == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); reg.id = KVM_REG_PPC_VPA_SLB; - reg.addr = (uintptr_t)&env->slb_shadow_addr; + reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); return ret; } - assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); + assert((uintptr_t)&spapr_cpu->dtl_size + == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); reg.id = KVM_REG_PPC_VPA_DTL; - reg.addr = (uintptr_t)&env->dtl_addr; + reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", @@ -908,9 +911,9 @@ static int kvm_put_vpa(CPUState *cs) return ret; } - if (!env->vpa_addr) { + if (!spapr_cpu->vpa_addr) { reg.id = KVM_REG_PPC_VPA_ADDR; - reg.addr = (uintptr_t)&env->vpa_addr; + reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); @@ -2412,11 +2415,28 @@ bool kvmppc_has_cap_mmu_hash_v3(void) return cap_mmu_hash_v3; } +static bool kvmppc_power8_host(void) +{ + bool ret = false; +#ifdef TARGET_PPC64 + { + uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr(); + ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) || + (base_pvr == CPU_POWERPC_POWER8NVL_BASE) || + (base_pvr == CPU_POWERPC_POWER8_BASE); + } +#endif /* TARGET_PPC64 */ + return ret; +} + static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c) { + bool l1d_thread_priv_req = !kvmppc_power8_host(); + if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) { return 2; - } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) && + } else if ((!l1d_thread_priv_req || + c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) && (c.character & c.character_mask & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) { return 1; diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index bb9296f..76d6f3f 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -10316,14 +10316,6 @@ static void ppc_cpu_reset(CPUState *s) s->exception_index = POWERPC_EXCP_NONE; env->error_code = 0; -#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) - env->vpa_addr = 0; - env->slb_shadow_addr = 0; - env->slb_shadow_size = 0; - env->dtl_addr = 0; - env->dtl_size = 0; -#endif /* TARGET_PPC64 */ - for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) { ppc_spr_t *spr = &env->spr_cb[i]; diff --git a/target/sparc/translate.c b/target/sparc/translate.c index f3d430c..74315cd 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -41,6 +41,8 @@ #define JUMP_PC 2 /* dynamic pc value which takes only two values according to jump_pc[T2] */ +#define DISAS_EXIT DISAS_TARGET_0 + /* global register indexes */ static TCGv_ptr cpu_regwptr; static TCGv cpu_cc_src, cpu_cc_src2, cpu_cc_dst; @@ -3400,11 +3402,17 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) r_const = tcg_const_i32(dc->mem_idx); tcg_gen_ld_ptr(r_tickptr, cpu_env, offsetof(CPUSPARCState, tick)); + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_tick_get_count(cpu_dst, cpu_env, r_tickptr, r_const); tcg_temp_free_ptr(r_tickptr); tcg_temp_free_i32(r_const); gen_store_gpr(dc, rd, cpu_dst); + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_end(); + } } break; case 0x5: /* V9 rdpc */ @@ -3447,11 +3455,17 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) r_const = tcg_const_i32(dc->mem_idx); tcg_gen_ld_ptr(r_tickptr, cpu_env, offsetof(CPUSPARCState, stick)); + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_tick_get_count(cpu_dst, cpu_env, r_tickptr, r_const); tcg_temp_free_ptr(r_tickptr); tcg_temp_free_i32(r_const); gen_store_gpr(dc, rd, cpu_dst); + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_end(); + } } break; case 0x19: /* System tick compare */ @@ -3576,10 +3590,16 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) r_const = tcg_const_i32(dc->mem_idx); tcg_gen_ld_ptr(r_tickptr, cpu_env, offsetof(CPUSPARCState, tick)); + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_tick_get_count(cpu_tmp0, cpu_env, r_tickptr, r_const); tcg_temp_free_ptr(r_tickptr); tcg_temp_free_i32(r_const); + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_end(); + } } break; case 5: // tba @@ -4385,9 +4405,19 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) r_tickptr = tcg_temp_new_ptr(); tcg_gen_ld_ptr(r_tickptr, cpu_env, offsetof(CPUSPARCState, tick)); + if (tb_cflags(dc->base.tb) & + CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_tick_set_limit(r_tickptr, cpu_tick_cmpr); tcg_temp_free_ptr(r_tickptr); + if (tb_cflags(dc->base.tb) & + CF_USE_ICOUNT) { + gen_io_end(); + } + /* End TB to handle timer interrupt */ + dc->base.is_jmp = DISAS_EXIT; } break; case 0x18: /* System tick */ @@ -4403,9 +4433,19 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) r_tickptr = tcg_temp_new_ptr(); tcg_gen_ld_ptr(r_tickptr, cpu_env, offsetof(CPUSPARCState, stick)); + if (tb_cflags(dc->base.tb) & + CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_tick_set_count(r_tickptr, cpu_tmp0); tcg_temp_free_ptr(r_tickptr); + if (tb_cflags(dc->base.tb) & + CF_USE_ICOUNT) { + gen_io_end(); + } + /* End TB to handle timer interrupt */ + dc->base.is_jmp = DISAS_EXIT; } break; case 0x19: /* System tick compare */ @@ -4421,9 +4461,19 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) r_tickptr = tcg_temp_new_ptr(); tcg_gen_ld_ptr(r_tickptr, cpu_env, offsetof(CPUSPARCState, stick)); + if (tb_cflags(dc->base.tb) & + CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_tick_set_limit(r_tickptr, cpu_stick_cmpr); tcg_temp_free_ptr(r_tickptr); + if (tb_cflags(dc->base.tb) & + CF_USE_ICOUNT) { + gen_io_end(); + } + /* End TB to handle timer interrupt */ + dc->base.is_jmp = DISAS_EXIT; } break; @@ -4531,9 +4581,19 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) r_tickptr = tcg_temp_new_ptr(); tcg_gen_ld_ptr(r_tickptr, cpu_env, offsetof(CPUSPARCState, tick)); + if (tb_cflags(dc->base.tb) & + CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_tick_set_count(r_tickptr, cpu_tmp0); tcg_temp_free_ptr(r_tickptr); + if (tb_cflags(dc->base.tb) & + CF_USE_ICOUNT) { + gen_io_end(); + } + /* End TB to handle timer interrupt */ + dc->base.is_jmp = DISAS_EXIT; } break; case 5: // tba @@ -4541,7 +4601,13 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) break; case 6: // pstate save_state(dc); + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_wrpstate(cpu_env, cpu_tmp0); + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_end(); + } dc->npc = DYNAMIC_PC; break; case 7: // tl @@ -4551,7 +4617,13 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) dc->npc = DYNAMIC_PC; break; case 8: // pil + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_wrpil(cpu_env, cpu_tmp0); + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_end(); + } break; case 9: // cwp gen_helper_wrcwp(cpu_env, cpu_tmp0); @@ -4642,9 +4714,19 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) r_tickptr = tcg_temp_new_ptr(); tcg_gen_ld_ptr(r_tickptr, cpu_env, offsetof(CPUSPARCState, hstick)); + if (tb_cflags(dc->base.tb) & + CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_tick_set_limit(r_tickptr, cpu_hstick_cmpr); tcg_temp_free_ptr(r_tickptr); + if (tb_cflags(dc->base.tb) & + CF_USE_ICOUNT) { + gen_io_end(); + } + /* End TB to handle timer interrupt */ + dc->base.is_jmp = DISAS_EXIT; } break; case 6: // hver readonly @@ -5265,14 +5347,26 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) goto priv_insn; dc->npc = DYNAMIC_PC; dc->pc = DYNAMIC_PC; + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_done(cpu_env); + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_end(); + } goto jmp_insn; case 1: if (!supervisor(dc)) goto priv_insn; dc->npc = DYNAMIC_PC; dc->pc = DYNAMIC_PC; + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_retry(cpu_env); + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { + gen_io_end(); + } goto jmp_insn; default: goto illegal_insn; @@ -5822,7 +5916,9 @@ static void sparc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) { DisasContext *dc = container_of(dcbase, DisasContext, base); - if (dc->base.is_jmp != DISAS_NORETURN) { + switch (dc->base.is_jmp) { + case DISAS_NEXT: + case DISAS_TOO_MANY: if (dc->pc != DYNAMIC_PC && (dc->npc != DYNAMIC_PC && dc->npc != JUMP_PC)) { /* static PC and NPC: we can use direct chaining */ @@ -5834,6 +5930,19 @@ static void sparc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) save_npc(dc); tcg_gen_exit_tb(NULL, 0); } + break; + + case DISAS_NORETURN: + break; + + case DISAS_EXIT: + /* Exit TB */ + save_state(dc); + tcg_gen_exit_tb(NULL, 0); + break; + + default: + g_assert_not_reached(); } } diff --git a/tests/acceptance/README.rst b/tests/acceptance/README.rst new file mode 100644 index 0000000..89260fa --- /dev/null +++ b/tests/acceptance/README.rst @@ -0,0 +1,10 @@ +============================================ +Acceptance tests using the Avocado Framework +============================================ + +This directory contains functional tests, also known as acceptance +level tests. They're usually higher level, and may interact with +external resources and with various guest operating systems. + +For more information, please refer to ``docs/devel/testing.rst``, +section "Acceptance tests using the Avocado Framework". diff --git a/tests/acceptance/avocado_qemu/__init__.py b/tests/acceptance/avocado_qemu/__init__.py new file mode 100644 index 0000000..1e54fd5 --- /dev/null +++ b/tests/acceptance/avocado_qemu/__init__.py @@ -0,0 +1,54 @@ +# Test class and utilities for functional tests +# +# Copyright (c) 2018 Red Hat, Inc. +# +# Author: +# Cleber Rosa <crosa@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +import os +import sys + +import avocado + +SRC_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +SRC_ROOT_DIR = os.path.abspath(os.path.dirname(SRC_ROOT_DIR)) +sys.path.append(os.path.join(SRC_ROOT_DIR, 'scripts')) + +from qemu import QEMUMachine + +def is_readable_executable_file(path): + return os.path.isfile(path) and os.access(path, os.R_OK | os.X_OK) + + +def pick_default_qemu_bin(): + """ + Picks the path of a QEMU binary, starting either in the current working + directory or in the source tree root directory. + """ + arch = os.uname()[4] + qemu_bin_relative_path = os.path.join("%s-softmmu" % arch, + "qemu-system-%s" % arch) + if is_readable_executable_file(qemu_bin_relative_path): + return qemu_bin_relative_path + + qemu_bin_from_src_dir_path = os.path.join(SRC_ROOT_DIR, + qemu_bin_relative_path) + if is_readable_executable_file(qemu_bin_from_src_dir_path): + return qemu_bin_from_src_dir_path + + +class Test(avocado.Test): + def setUp(self): + self.vm = None + self.qemu_bin = self.params.get('qemu_bin', + default=pick_default_qemu_bin()) + if self.qemu_bin is None: + self.cancel("No QEMU binary defined or found in the source tree") + self.vm = QEMUMachine(self.qemu_bin) + + def tearDown(self): + if self.vm is not None: + self.vm.shutdown() diff --git a/tests/acceptance/boot_linux_console.py b/tests/acceptance/boot_linux_console.py new file mode 100644 index 0000000..98324f7 --- /dev/null +++ b/tests/acceptance/boot_linux_console.py @@ -0,0 +1,47 @@ +# Functional test that boots a Linux kernel and checks the console +# +# Copyright (c) 2018 Red Hat, Inc. +# +# Author: +# Cleber Rosa <crosa@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +import logging + +from avocado_qemu import Test + + +class BootLinuxConsole(Test): + """ + Boots a x86_64 Linux kernel and checks that the console is operational + and the kernel command line is properly passed from QEMU to the kernel + + :avocado: enable + :avocado: tags=x86_64 + """ + + timeout = 60 + + def test(self): + kernel_url = ('https://mirrors.kernel.org/fedora/releases/28/' + 'Everything/x86_64/os/images/pxeboot/vmlinuz') + kernel_hash = '238e083e114c48200f80d889f7e32eeb2793e02a' + kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) + + self.vm.set_machine('pc') + self.vm.set_console() + kernel_command_line = 'console=ttyS0' + self.vm.add_args('-kernel', kernel_path, + '-append', kernel_command_line) + self.vm.launch() + console = self.vm.console_socket.makefile() + console_logger = logging.getLogger('console') + while True: + msg = console.readline() + console_logger.debug(msg.strip()) + if 'Kernel command line: %s' % kernel_command_line in msg: + break + if 'Kernel panic - not syncing' in msg: + self.fail("Kernel panic reached") diff --git a/tests/acceptance/version.py b/tests/acceptance/version.py new file mode 100644 index 0000000..13b0a74 --- /dev/null +++ b/tests/acceptance/version.py @@ -0,0 +1,24 @@ +# Version check example test +# +# Copyright (c) 2018 Red Hat, Inc. +# +# Author: +# Cleber Rosa <crosa@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + + +from avocado_qemu import Test + + +class Version(Test): + """ + :avocado: enable + :avocado: tags=quick + """ + def test_qmp_human_info_version(self): + self.vm.launch() + res = self.vm.command('human-monitor-command', + command_line='info version') + self.assertRegexpMatches(res, r'^(\d+\.\d+\.\d)') diff --git a/tests/acceptance/vnc.py b/tests/acceptance/vnc.py new file mode 100644 index 0000000..b1ef9d7 --- /dev/null +++ b/tests/acceptance/vnc.py @@ -0,0 +1,60 @@ +# Simple functional tests for VNC functionality +# +# Copyright (c) 2018 Red Hat, Inc. +# +# Author: +# Cleber Rosa <crosa@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +from avocado_qemu import Test + + +class Vnc(Test): + """ + :avocado: enable + :avocado: tags=vnc,quick + """ + def test_no_vnc(self): + self.vm.add_args('-nodefaults', '-S') + self.vm.launch() + self.assertFalse(self.vm.qmp('query-vnc')['return']['enabled']) + + def test_no_vnc_change_password(self): + self.vm.add_args('-nodefaults', '-S') + self.vm.launch() + self.assertFalse(self.vm.qmp('query-vnc')['return']['enabled']) + set_password_response = self.vm.qmp('change', + device='vnc', + target='password', + arg='new_password') + self.assertIn('error', set_password_response) + self.assertEqual(set_password_response['error']['class'], + 'GenericError') + self.assertEqual(set_password_response['error']['desc'], + 'Could not set password') + + def test_vnc_change_password_requires_a_password(self): + self.vm.add_args('-nodefaults', '-S', '-vnc', ':0') + self.vm.launch() + self.assertTrue(self.vm.qmp('query-vnc')['return']['enabled']) + set_password_response = self.vm.qmp('change', + device='vnc', + target='password', + arg='new_password') + self.assertIn('error', set_password_response) + self.assertEqual(set_password_response['error']['class'], + 'GenericError') + self.assertEqual(set_password_response['error']['desc'], + 'Could not set password') + + def test_vnc_change_password(self): + self.vm.add_args('-nodefaults', '-S', '-vnc', ':0,password') + self.vm.launch() + self.assertTrue(self.vm.qmp('query-vnc')['return']['enabled']) + set_password_response = self.vm.qmp('change', + device='vnc', + target='password', + arg='new_password') + self.assertEqual(set_password_response['return'], {}) diff --git a/tests/qemu-iotests/151 b/tests/qemu-iotests/151 new file mode 100755 index 0000000..fe53b9f --- /dev/null +++ b/tests/qemu-iotests/151 @@ -0,0 +1,120 @@ +#!/usr/bin/env python +# +# Tests for active mirroring +# +# Copyright (C) 2018 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os +import iotests +from iotests import qemu_img + +source_img = os.path.join(iotests.test_dir, 'source.' + iotests.imgfmt) +target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) + +class TestActiveMirror(iotests.QMPTestCase): + image_len = 128 * 1024 * 1024 # MB + potential_writes_in_flight = True + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, source_img, '128M') + qemu_img('create', '-f', iotests.imgfmt, target_img, '128M') + + blk_source = {'id': 'source', + 'if': 'none', + 'node-name': 'source-node', + 'driver': iotests.imgfmt, + 'file': {'driver': 'file', + 'filename': source_img}} + + blk_target = {'node-name': 'target-node', + 'driver': iotests.imgfmt, + 'file': {'driver': 'file', + 'filename': target_img}} + + self.vm = iotests.VM() + self.vm.add_drive_raw(self.vm.qmp_to_opts(blk_source)) + self.vm.add_blockdev(self.vm.qmp_to_opts(blk_target)) + self.vm.add_device('virtio-blk,drive=source') + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + + if not self.potential_writes_in_flight: + self.assertTrue(iotests.compare_images(source_img, target_img), + 'mirror target does not match source') + + os.remove(source_img) + os.remove(target_img) + + def doActiveIO(self, sync_source_and_target): + # Fill the source image + self.vm.hmp_qemu_io('source', + 'write -P 1 0 %i' % self.image_len); + + # Start some background requests + for offset in range(1 * self.image_len / 8, 3 * self.image_len / 8, 1024 * 1024): + self.vm.hmp_qemu_io('source', 'aio_write -P 2 %i 1M' % offset) + for offset in range(2 * self.image_len / 8, 3 * self.image_len / 8, 1024 * 1024): + self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset) + + # Start the block job + result = self.vm.qmp('blockdev-mirror', + job_id='mirror', + filter_node_name='mirror-node', + device='source-node', + target='target-node', + sync='full', + copy_mode='write-blocking') + self.assert_qmp(result, 'return', {}) + + # Start some more requests + for offset in range(3 * self.image_len / 8, 5 * self.image_len / 8, 1024 * 1024): + self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset) + for offset in range(4 * self.image_len / 8, 5 * self.image_len / 8, 1024 * 1024): + self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset) + + # Wait for the READY event + self.wait_ready(drive='mirror') + + # Now start some final requests; all of these (which land on + # the source) should be settled using the active mechanism. + # The mirror code itself asserts that the source BDS's dirty + # bitmap will stay clean between READY and COMPLETED. + for offset in range(5 * self.image_len / 8, 7 * self.image_len / 8, 1024 * 1024): + self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset) + for offset in range(6 * self.image_len / 8, 7 * self.image_len / 8, 1024 * 1024): + self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset) + + if sync_source_and_target: + # If source and target should be in sync after the mirror, + # we have to flush before completion + self.vm.hmp_qemu_io('source', 'aio_flush') + self.potential_writes_in_flight = False + + self.complete_and_wait(drive='mirror', wait_ready=False) + + def testActiveIO(self): + self.doActiveIO(False) + + def testActiveIOFlushed(self): + self.doActiveIO(True) + + + +if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2', 'raw']) diff --git a/tests/qemu-iotests/151.out b/tests/qemu-iotests/151.out new file mode 100644 index 0000000..fbc63e6 --- /dev/null +++ b/tests/qemu-iotests/151.out @@ -0,0 +1,5 @@ +.. +---------------------------------------------------------------------- +Ran 2 tests + +OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 937a3d0..eea7581 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -157,6 +157,7 @@ 148 rw auto quick 149 rw auto sudo 150 rw auto quick +151 rw auto 152 rw auto quick 153 rw auto quick 154 rw auto backing quick diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index a11c4cf..291a050 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -27,15 +27,23 @@ #include "block/blockjob_int.h" #include "sysemu/block-backend.h" #include "qapi/error.h" +#include "iothread.h" + +static QemuEvent done_event; typedef struct BDRVTestState { int drain_count; + AioContext *bh_indirection_ctx; + bool sleep_in_drain_begin; } BDRVTestState; static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) { BDRVTestState *s = bs->opaque; s->drain_count++; + if (s->sleep_in_drain_begin) { + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); + } } static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) @@ -50,19 +58,48 @@ static void bdrv_test_close(BlockDriverState *bs) g_assert_cmpint(s->drain_count, >, 0); } +static void co_reenter_bh(void *opaque) +{ + aio_co_wake(opaque); +} + static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { + BDRVTestState *s = bs->opaque; + /* We want this request to stay until the polling loop in drain waits for * it to complete. We need to sleep a while as bdrv_drain_invoke() comes * first and polls its result, too, but it shouldn't accidentally complete * this request yet. */ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); + if (s->bh_indirection_ctx) { + aio_bh_schedule_oneshot(s->bh_indirection_ctx, co_reenter_bh, + qemu_coroutine_self()); + qemu_coroutine_yield(); + } + return 0; } +static void bdrv_test_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + /* bdrv_format_default_perms() accepts only these two, so disguise + * detach_by_driver_cb_role as one of them. */ + if (role != &child_file && role != &child_backing) { + role = &child_file; + } + + bdrv_format_default_perms(bs, c, role, reopen_queue, perm, shared, + nperm, nshared); +} + static BlockDriver bdrv_test = { .format_name = "test", .instance_size = sizeof(BDRVTestState), @@ -73,7 +110,7 @@ static BlockDriver bdrv_test = { .bdrv_co_drain_begin = bdrv_test_co_drain_begin, .bdrv_co_drain_end = bdrv_test_co_drain_end, - .bdrv_child_perm = bdrv_format_default_perms, + .bdrv_child_perm = bdrv_test_child_perm, }; static void aio_ret_cb(void *opaque, int ret) @@ -216,6 +253,11 @@ static void test_drv_cb_drain_subtree(void) test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); } +static void test_drv_cb_co_drain_all(void) +{ + call_in_coroutine(test_drv_cb_drain_all); +} + static void test_drv_cb_co_drain(void) { call_in_coroutine(test_drv_cb_drain); @@ -259,8 +301,7 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) static void test_quiesce_drain_all(void) { - // XXX drain_all doesn't quiesce - //test_quiesce_common(BDRV_DRAIN_ALL, true); + test_quiesce_common(BDRV_DRAIN_ALL, true); } static void test_quiesce_drain(void) @@ -273,6 +314,11 @@ static void test_quiesce_drain_subtree(void) test_quiesce_common(BDRV_SUBTREE_DRAIN, true); } +static void test_quiesce_co_drain_all(void) +{ + call_in_coroutine(test_quiesce_drain_all); +} + static void test_quiesce_co_drain(void) { call_in_coroutine(test_quiesce_drain); @@ -302,12 +348,7 @@ static void test_nested(void) for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { - /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */ - int bs_quiesce = (outer != BDRV_DRAIN_ALL) + - (inner != BDRV_DRAIN_ALL); - int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) + - (inner == BDRV_SUBTREE_DRAIN); - int backing_cb_cnt = (outer != BDRV_DRAIN) + + int backing_quiesce = (outer != BDRV_DRAIN) + (inner != BDRV_DRAIN); g_assert_cmpint(bs->quiesce_counter, ==, 0); @@ -318,10 +359,10 @@ static void test_nested(void) do_drain_begin(outer, bs); do_drain_begin(inner, bs); - g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce); + g_assert_cmpint(bs->quiesce_counter, ==, 2); g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); g_assert_cmpint(s->drain_count, ==, 2); - g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt); + g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce); do_drain_end(inner, bs); do_drain_end(outer, bs); @@ -411,7 +452,7 @@ static void test_multiparent(void) blk_unref(blk_b); } -static void test_graph_change(void) +static void test_graph_change_drain_subtree(void) { BlockBackend *blk_a, *blk_b; BlockDriverState *bs_a, *bs_b, *backing; @@ -490,6 +531,221 @@ static void test_graph_change(void) blk_unref(blk_b); } +static void test_graph_change_drain_all(void) +{ + BlockBackend *blk_a, *blk_b; + BlockDriverState *bs_a, *bs_b; + BDRVTestState *a_s, *b_s; + + /* Create node A with a BlockBackend */ + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, + &error_abort); + a_s = bs_a->opaque; + blk_insert_bs(blk_a, bs_a, &error_abort); + + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); + g_assert_cmpint(a_s->drain_count, ==, 0); + + /* Call bdrv_drain_all_begin() */ + bdrv_drain_all_begin(); + + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); + g_assert_cmpint(a_s->drain_count, ==, 1); + + /* Create node B with a BlockBackend */ + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, + &error_abort); + b_s = bs_b->opaque; + blk_insert_bs(blk_b, bs_b, &error_abort); + + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); + g_assert_cmpint(a_s->drain_count, ==, 1); + g_assert_cmpint(b_s->drain_count, ==, 1); + + /* Unref and finally delete node A */ + blk_unref(blk_a); + + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); + g_assert_cmpint(a_s->drain_count, ==, 1); + g_assert_cmpint(b_s->drain_count, ==, 1); + + bdrv_unref(bs_a); + + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); + g_assert_cmpint(b_s->drain_count, ==, 1); + + /* End the drained section */ + bdrv_drain_all_end(); + + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); + g_assert_cmpint(b_s->drain_count, ==, 0); + + bdrv_unref(bs_b); + blk_unref(blk_b); +} + +struct test_iothread_data { + BlockDriverState *bs; + enum drain_type drain_type; + int *aio_ret; +}; + +static void test_iothread_drain_entry(void *opaque) +{ + struct test_iothread_data *data = opaque; + + aio_context_acquire(bdrv_get_aio_context(data->bs)); + do_drain_begin(data->drain_type, data->bs); + g_assert_cmpint(*data->aio_ret, ==, 0); + do_drain_end(data->drain_type, data->bs); + aio_context_release(bdrv_get_aio_context(data->bs)); + + qemu_event_set(&done_event); +} + +static void test_iothread_aio_cb(void *opaque, int ret) +{ + int *aio_ret = opaque; + *aio_ret = ret; + qemu_event_set(&done_event); +} + +/* + * Starts an AIO request on a BDS that runs in the AioContext of iothread 1. + * The request involves a BH on iothread 2 before it can complete. + * + * @drain_thread = 0 means that do_drain_begin/end are called from the main + * thread, @drain_thread = 1 means that they are called from iothread 1. Drain + * for this BDS cannot be called from iothread 2 because only the main thread + * may do cross-AioContext polling. + */ +static void test_iothread_common(enum drain_type drain_type, int drain_thread) +{ + BlockBackend *blk; + BlockDriverState *bs; + BDRVTestState *s; + BlockAIOCB *acb; + int aio_ret; + struct test_iothread_data data; + + IOThread *a = iothread_new(); + IOThread *b = iothread_new(); + AioContext *ctx_a = iothread_get_aio_context(a); + AioContext *ctx_b = iothread_get_aio_context(b); + + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = NULL, + .iov_len = 0, + }; + qemu_iovec_init_external(&qiov, &iov, 1); + + /* bdrv_drain_all() may only be called from the main loop thread */ + if (drain_type == BDRV_DRAIN_ALL && drain_thread != 0) { + goto out; + } + + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, + &error_abort); + s = bs->opaque; + blk_insert_bs(blk, bs, &error_abort); + + blk_set_aio_context(blk, ctx_a); + aio_context_acquire(ctx_a); + + s->bh_indirection_ctx = ctx_b; + + aio_ret = -EINPROGRESS; + if (drain_thread == 0) { + acb = blk_aio_preadv(blk, 0, &qiov, 0, test_iothread_aio_cb, &aio_ret); + } else { + acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret); + } + g_assert(acb != NULL); + g_assert_cmpint(aio_ret, ==, -EINPROGRESS); + + aio_context_release(ctx_a); + + data = (struct test_iothread_data) { + .bs = bs, + .drain_type = drain_type, + .aio_ret = &aio_ret, + }; + + switch (drain_thread) { + case 0: + if (drain_type != BDRV_DRAIN_ALL) { + aio_context_acquire(ctx_a); + } + + /* The request is running on the IOThread a. Draining its block device + * will make sure that it has completed as far as the BDS is concerned, + * but the drain in this thread can continue immediately after + * bdrv_dec_in_flight() and aio_ret might be assigned only slightly + * later. */ + qemu_event_reset(&done_event); + do_drain_begin(drain_type, bs); + g_assert_cmpint(bs->in_flight, ==, 0); + + if (drain_type != BDRV_DRAIN_ALL) { + aio_context_release(ctx_a); + } + qemu_event_wait(&done_event); + if (drain_type != BDRV_DRAIN_ALL) { + aio_context_acquire(ctx_a); + } + + g_assert_cmpint(aio_ret, ==, 0); + do_drain_end(drain_type, bs); + + if (drain_type != BDRV_DRAIN_ALL) { + aio_context_release(ctx_a); + } + break; + case 1: + qemu_event_reset(&done_event); + aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data); + qemu_event_wait(&done_event); + break; + default: + g_assert_not_reached(); + } + + aio_context_acquire(ctx_a); + blk_set_aio_context(blk, qemu_get_aio_context()); + aio_context_release(ctx_a); + + bdrv_unref(bs); + blk_unref(blk); + +out: + iothread_join(a); + iothread_join(b); +} + +static void test_iothread_drain_all(void) +{ + test_iothread_common(BDRV_DRAIN_ALL, 0); + test_iothread_common(BDRV_DRAIN_ALL, 1); +} + +static void test_iothread_drain(void) +{ + test_iothread_common(BDRV_DRAIN, 0); + test_iothread_common(BDRV_DRAIN, 1); +} + +static void test_iothread_drain_subtree(void) +{ + test_iothread_common(BDRV_SUBTREE_DRAIN, 0); + test_iothread_common(BDRV_SUBTREE_DRAIN, 1); +} + typedef struct TestBlockJob { BlockJob common; @@ -507,7 +763,11 @@ static void coroutine_fn test_job_start(void *opaque) job_transition_to_ready(&s->common.job); while (!s->should_complete) { - job_sleep_ns(&s->common.job, 100000); + /* Avoid block_job_sleep_ns() because it marks the job as !busy. We + * want to emulate some actual activity (probably some I/O) here so + * that drain has to wait for this acitivity to stop. */ + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); + job_pause_point(&s->common.job); } job_defer_to_main_loop(&s->common.job, test_job_completed, NULL); @@ -554,7 +814,7 @@ static void test_blockjob_common(enum drain_type drain_type) g_assert_cmpint(job->job.pause_count, ==, 0); g_assert_false(job->job.paused); - g_assert_false(job->job.busy); /* We're in job_sleep_ns() */ + g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ do_drain_begin(drain_type, src); @@ -564,15 +824,14 @@ static void test_blockjob_common(enum drain_type drain_type) } else { g_assert_cmpint(job->job.pause_count, ==, 1); } - /* XXX We don't wait until the job is actually paused. Is this okay? */ - /* g_assert_true(job->job.paused); */ + g_assert_true(job->job.paused); g_assert_false(job->job.busy); /* The job is paused */ do_drain_end(drain_type, src); g_assert_cmpint(job->job.pause_count, ==, 0); g_assert_false(job->job.paused); - g_assert_false(job->job.busy); /* We're in job_sleep_ns() */ + g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ do_drain_begin(drain_type, target); @@ -582,15 +841,14 @@ static void test_blockjob_common(enum drain_type drain_type) } else { g_assert_cmpint(job->job.pause_count, ==, 1); } - /* XXX We don't wait until the job is actually paused. Is this okay? */ - /* g_assert_true(job->job.paused); */ + g_assert_true(job->job.paused); g_assert_false(job->job.busy); /* The job is paused */ do_drain_end(drain_type, target); g_assert_cmpint(job->job.pause_count, ==, 0); g_assert_false(job->job.paused); - g_assert_false(job->job.busy); /* We're in job_sleep_ns() */ + g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ ret = job_complete_sync(&job->job, &error_abort); g_assert_cmpint(ret, ==, 0); @@ -616,19 +874,399 @@ static void test_blockjob_drain_subtree(void) test_blockjob_common(BDRV_SUBTREE_DRAIN); } + +typedef struct BDRVTestTopState { + BdrvChild *wait_child; +} BDRVTestTopState; + +static void bdrv_test_top_close(BlockDriverState *bs) +{ + BdrvChild *c, *next_c; + QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { + bdrv_unref_child(bs, c); + } +} + +static int coroutine_fn bdrv_test_top_co_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BDRVTestTopState *tts = bs->opaque; + return bdrv_co_preadv(tts->wait_child, offset, bytes, qiov, flags); +} + +static BlockDriver bdrv_test_top_driver = { + .format_name = "test_top_driver", + .instance_size = sizeof(BDRVTestTopState), + + .bdrv_close = bdrv_test_top_close, + .bdrv_co_preadv = bdrv_test_top_co_preadv, + + .bdrv_child_perm = bdrv_format_default_perms, +}; + +typedef struct TestCoDeleteByDrainData { + BlockBackend *blk; + bool detach_instead_of_delete; + bool done; +} TestCoDeleteByDrainData; + +static void coroutine_fn test_co_delete_by_drain(void *opaque) +{ + TestCoDeleteByDrainData *dbdd = opaque; + BlockBackend *blk = dbdd->blk; + BlockDriverState *bs = blk_bs(blk); + BDRVTestTopState *tts = bs->opaque; + void *buffer = g_malloc(65536); + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = buffer, + .iov_len = 65536, + }; + + qemu_iovec_init_external(&qiov, &iov, 1); + + /* Pretend some internal write operation from parent to child. + * Important: We have to read from the child, not from the parent! + * Draining works by first propagating it all up the tree to the + * root and then waiting for drainage from root to the leaves + * (protocol nodes). If we have a request waiting on the root, + * everything will be drained before we go back down the tree, but + * we do not want that. We want to be in the middle of draining + * when this following requests returns. */ + bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0); + + g_assert_cmpint(bs->refcnt, ==, 1); + + if (!dbdd->detach_instead_of_delete) { + blk_unref(blk); + } else { + BdrvChild *c, *next_c; + QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { + bdrv_unref_child(bs, c); + } + } + + dbdd->done = true; +} + +/** + * Test what happens when some BDS has some children, you drain one of + * them and this results in the BDS being deleted. + * + * If @detach_instead_of_delete is set, the BDS is not going to be + * deleted but will only detach all of its children. + */ +static void do_test_delete_by_drain(bool detach_instead_of_delete, + enum drain_type drain_type) +{ + BlockBackend *blk; + BlockDriverState *bs, *child_bs, *null_bs; + BDRVTestTopState *tts; + TestCoDeleteByDrainData dbdd; + Coroutine *co; + + bs = bdrv_new_open_driver(&bdrv_test_top_driver, "top", BDRV_O_RDWR, + &error_abort); + bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; + tts = bs->opaque; + + null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, + &error_abort); + bdrv_attach_child(bs, null_bs, "null-child", &child_file, &error_abort); + + /* This child will be the one to pass to requests through to, and + * it will stall until a drain occurs */ + child_bs = bdrv_new_open_driver(&bdrv_test, "child", BDRV_O_RDWR, + &error_abort); + child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; + /* Takes our reference to child_bs */ + tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", &child_file, + &error_abort); + + /* This child is just there to be deleted + * (for detach_instead_of_delete == true) */ + null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, + &error_abort); + bdrv_attach_child(bs, null_bs, "null-child", &child_file, &error_abort); + + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_insert_bs(blk, bs, &error_abort); + + /* Referenced by blk now */ + bdrv_unref(bs); + + g_assert_cmpint(bs->refcnt, ==, 1); + g_assert_cmpint(child_bs->refcnt, ==, 1); + g_assert_cmpint(null_bs->refcnt, ==, 1); + + + dbdd = (TestCoDeleteByDrainData){ + .blk = blk, + .detach_instead_of_delete = detach_instead_of_delete, + .done = false, + }; + co = qemu_coroutine_create(test_co_delete_by_drain, &dbdd); + qemu_coroutine_enter(co); + + /* Drain the child while the read operation is still pending. + * This should result in the operation finishing and + * test_co_delete_by_drain() resuming. Thus, @bs will be deleted + * and the coroutine will exit while this drain operation is still + * in progress. */ + switch (drain_type) { + case BDRV_DRAIN: + bdrv_ref(child_bs); + bdrv_drain(child_bs); + bdrv_unref(child_bs); + break; + case BDRV_SUBTREE_DRAIN: + /* Would have to ref/unref bs here for !detach_instead_of_delete, but + * then the whole test becomes pointless because the graph changes + * don't occur during the drain any more. */ + assert(detach_instead_of_delete); + bdrv_subtree_drained_begin(bs); + bdrv_subtree_drained_end(bs); + break; + case BDRV_DRAIN_ALL: + bdrv_drain_all_begin(); + bdrv_drain_all_end(); + break; + default: + g_assert_not_reached(); + } + + while (!dbdd.done) { + aio_poll(qemu_get_aio_context(), true); + } + + if (detach_instead_of_delete) { + /* Here, the reference has not passed over to the coroutine, + * so we have to delete the BB ourselves */ + blk_unref(blk); + } +} + +static void test_delete_by_drain(void) +{ + do_test_delete_by_drain(false, BDRV_DRAIN); +} + +static void test_detach_by_drain_all(void) +{ + do_test_delete_by_drain(true, BDRV_DRAIN_ALL); +} + +static void test_detach_by_drain(void) +{ + do_test_delete_by_drain(true, BDRV_DRAIN); +} + +static void test_detach_by_drain_subtree(void) +{ + do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN); +} + + +struct detach_by_parent_data { + BlockDriverState *parent_b; + BdrvChild *child_b; + BlockDriverState *c; + BdrvChild *child_c; + bool by_parent_cb; +}; +static struct detach_by_parent_data detach_by_parent_data; + +static void detach_indirect_bh(void *opaque) +{ + struct detach_by_parent_data *data = opaque; + + bdrv_unref_child(data->parent_b, data->child_b); + + bdrv_ref(data->c); + data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C", + &child_file, &error_abort); +} + +static void detach_by_parent_aio_cb(void *opaque, int ret) +{ + struct detach_by_parent_data *data = &detach_by_parent_data; + + g_assert_cmpint(ret, ==, 0); + if (data->by_parent_cb) { + detach_indirect_bh(data); + } +} + +static void detach_by_driver_cb_drained_begin(BdrvChild *child) +{ + aio_bh_schedule_oneshot(qemu_get_current_aio_context(), + detach_indirect_bh, &detach_by_parent_data); + child_file.drained_begin(child); +} + +static BdrvChildRole detach_by_driver_cb_role; + +/* + * Initial graph: + * + * PA PB + * \ / \ + * A B C + * + * by_parent_cb == true: Test that parent callbacks don't poll + * + * PA has a pending write request whose callback changes the child nodes of + * PB: It removes B and adds C instead. The subtree of PB is drained, which + * will indirectly drain the write request, too. + * + * by_parent_cb == false: Test that bdrv_drain_invoke() doesn't poll + * + * PA's BdrvChildRole has a .drained_begin callback that schedules a BH + * that does the same graph change. If bdrv_drain_invoke() calls it, the + * state is messed up, but if it is only polled in the single + * BDRV_POLL_WHILE() at the end of the drain, this should work fine. + */ +static void test_detach_indirect(bool by_parent_cb) +{ + BlockBackend *blk; + BlockDriverState *parent_a, *parent_b, *a, *b, *c; + BdrvChild *child_a, *child_b; + BlockAIOCB *acb; + + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = NULL, + .iov_len = 0, + }; + qemu_iovec_init_external(&qiov, &iov, 1); + + if (!by_parent_cb) { + detach_by_driver_cb_role = child_file; + detach_by_driver_cb_role.drained_begin = + detach_by_driver_cb_drained_begin; + } + + /* Create all involved nodes */ + parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR, + &error_abort); + parent_b = bdrv_new_open_driver(&bdrv_test, "parent-b", 0, + &error_abort); + + a = bdrv_new_open_driver(&bdrv_test, "a", BDRV_O_RDWR, &error_abort); + b = bdrv_new_open_driver(&bdrv_test, "b", BDRV_O_RDWR, &error_abort); + c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort); + + /* blk is a BB for parent-a */ + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_insert_bs(blk, parent_a, &error_abort); + bdrv_unref(parent_a); + + /* If we want to get bdrv_drain_invoke() to call aio_poll(), the driver + * callback must not return immediately. */ + if (!by_parent_cb) { + BDRVTestState *s = parent_a->opaque; + s->sleep_in_drain_begin = true; + } + + /* Set child relationships */ + bdrv_ref(b); + bdrv_ref(a); + child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_file, &error_abort); + child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_backing, &error_abort); + + bdrv_ref(a); + bdrv_attach_child(parent_a, a, "PA-A", + by_parent_cb ? &child_file : &detach_by_driver_cb_role, + &error_abort); + + g_assert_cmpint(parent_a->refcnt, ==, 1); + g_assert_cmpint(parent_b->refcnt, ==, 1); + g_assert_cmpint(a->refcnt, ==, 3); + g_assert_cmpint(b->refcnt, ==, 2); + g_assert_cmpint(c->refcnt, ==, 1); + + g_assert(QLIST_FIRST(&parent_b->children) == child_a); + g_assert(QLIST_NEXT(child_a, next) == child_b); + g_assert(QLIST_NEXT(child_b, next) == NULL); + + /* Start the evil write request */ + detach_by_parent_data = (struct detach_by_parent_data) { + .parent_b = parent_b, + .child_b = child_b, + .c = c, + .by_parent_cb = by_parent_cb, + }; + acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL); + g_assert(acb != NULL); + + /* Drain and check the expected result */ + bdrv_subtree_drained_begin(parent_b); + + g_assert(detach_by_parent_data.child_c != NULL); + + g_assert_cmpint(parent_a->refcnt, ==, 1); + g_assert_cmpint(parent_b->refcnt, ==, 1); + g_assert_cmpint(a->refcnt, ==, 3); + g_assert_cmpint(b->refcnt, ==, 1); + g_assert_cmpint(c->refcnt, ==, 2); + + g_assert(QLIST_FIRST(&parent_b->children) == detach_by_parent_data.child_c); + g_assert(QLIST_NEXT(detach_by_parent_data.child_c, next) == child_a); + g_assert(QLIST_NEXT(child_a, next) == NULL); + + g_assert_cmpint(parent_a->quiesce_counter, ==, 1); + g_assert_cmpint(parent_b->quiesce_counter, ==, 1); + g_assert_cmpint(a->quiesce_counter, ==, 1); + g_assert_cmpint(b->quiesce_counter, ==, 0); + g_assert_cmpint(c->quiesce_counter, ==, 1); + + bdrv_subtree_drained_end(parent_b); + + bdrv_unref(parent_b); + blk_unref(blk); + + /* XXX Once bdrv_close() unref's children instead of just detaching them, + * this won't be necessary any more. */ + bdrv_unref(a); + bdrv_unref(a); + bdrv_unref(c); + + g_assert_cmpint(a->refcnt, ==, 1); + g_assert_cmpint(b->refcnt, ==, 1); + g_assert_cmpint(c->refcnt, ==, 1); + bdrv_unref(a); + bdrv_unref(b); + bdrv_unref(c); +} + +static void test_detach_by_parent_cb(void) +{ + test_detach_indirect(true); +} + +static void test_detach_by_driver_cb(void) +{ + test_detach_indirect(false); +} + int main(int argc, char **argv) { + int ret; + bdrv_init(); qemu_init_main_loop(&error_abort); g_test_init(&argc, &argv, NULL); + qemu_event_init(&done_event, false); g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", test_drv_cb_drain_subtree); - // XXX bdrv_drain_all() doesn't work in coroutine context + g_test_add_func("/bdrv-drain/driver-cb/co/drain_all", + test_drv_cb_co_drain_all); g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", test_drv_cb_co_drain_subtree); @@ -639,19 +1277,38 @@ int main(int argc, char **argv) g_test_add_func("/bdrv-drain/quiesce/drain_subtree", test_quiesce_drain_subtree); - // XXX bdrv_drain_all() doesn't work in coroutine context + g_test_add_func("/bdrv-drain/quiesce/co/drain_all", + test_quiesce_co_drain_all); g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", test_quiesce_co_drain_subtree); g_test_add_func("/bdrv-drain/nested", test_nested); g_test_add_func("/bdrv-drain/multiparent", test_multiparent); - g_test_add_func("/bdrv-drain/graph-change", test_graph_change); + + g_test_add_func("/bdrv-drain/graph-change/drain_subtree", + test_graph_change_drain_subtree); + g_test_add_func("/bdrv-drain/graph-change/drain_all", + test_graph_change_drain_all); + + g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); + g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); + g_test_add_func("/bdrv-drain/iothread/drain_subtree", + test_iothread_drain_subtree); g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); g_test_add_func("/bdrv-drain/blockjob/drain_subtree", test_blockjob_drain_subtree); - return g_test_run(); + g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); + g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); + g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); + g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); + g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); + g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb); + + ret = g_test_run(); + qemu_event_destroy(&done_event); + return ret; } diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c index f29631f..5e67ac1 100644 --- a/tests/test-hbitmap.c +++ b/tests/test-hbitmap.c @@ -30,6 +30,18 @@ typedef struct TestHBitmapData { } TestHBitmapData; +static int64_t check_hbitmap_iter_next(HBitmapIter *hbi) +{ + int next0, next1; + + next0 = hbitmap_iter_next(hbi, false); + next1 = hbitmap_iter_next(hbi, true); + + g_assert_cmpint(next0, ==, next1); + + return next0; +} + /* Check that the HBitmap and the shadow bitmap contain the same data, * ignoring the same "first" bits. */ @@ -46,7 +58,7 @@ static void hbitmap_test_check(TestHBitmapData *data, i = first; for (;;) { - next = hbitmap_iter_next(&hbi); + next = check_hbitmap_iter_next(&hbi); if (next < 0) { next = data->size; } @@ -435,25 +447,25 @@ static void test_hbitmap_iter_granularity(TestHBitmapData *data, /* Note that hbitmap_test_check has to be invoked manually in this test. */ hbitmap_test_init(data, 131072 << 7, 7); hbitmap_iter_init(&hbi, data->hb, 0); - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0); hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8); hbitmap_iter_init(&hbi, data->hb, 0); - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); + g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0); hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); hbitmap_test_set(data, (131072 << 7) - 8, 8); hbitmap_iter_init(&hbi, data->hb, 0); - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); + g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, 131071 << 7); + g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0); hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, 131071 << 7); + g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0); } static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff) @@ -893,7 +905,7 @@ static void test_hbitmap_serialize_zeroes(TestHBitmapData *data, for (i = 0; i < num_positions; i++) { hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true); hbitmap_iter_init(&iter, data->hb, 0); - next = hbitmap_iter_next(&iter); + next = check_hbitmap_iter_next(&iter); if (i == num_positions - 1) { g_assert_cmpint(next, ==, -1); } else { @@ -919,10 +931,10 @@ static void test_hbitmap_iter_and_reset(TestHBitmapData *data, hbitmap_iter_init(&hbi, data->hb, BITS_PER_LONG - 1); - hbitmap_iter_next(&hbi); + check_hbitmap_iter_next(&hbi); hbitmap_reset_all(data->hb); - hbitmap_iter_next(&hbi); + check_hbitmap_iter_next(&hbi); } static void test_hbitmap_next_zero_check(TestHBitmapData *data, int64_t start) diff --git a/util/hbitmap.c b/util/hbitmap.c index 58a2c93..bcd3040 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -141,7 +141,7 @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi) return cur; } -int64_t hbitmap_iter_next(HBitmapIter *hbi) +int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance) { unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1] & hbi->hb->levels[HBITMAP_LEVELS - 1][hbi->pos]; @@ -154,8 +154,12 @@ int64_t hbitmap_iter_next(HBitmapIter *hbi) } } - /* The next call will resume work from the next bit. */ - hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1); + if (advance) { + /* The next call will resume work from the next bit. */ + hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1); + } else { + hbi->cur[HBITMAP_LEVELS - 1] = cur; + } item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur); return item << hbi->granularity; diff --git a/util/osdep.c b/util/osdep.c index a73de0e..ea51d50 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -302,7 +302,8 @@ int qemu_open(const char *name, int flags, ...) } fd = monitor_fdset_get_fd(fdset_id, flags); - if (fd == -1) { + if (fd < 0) { + errno = -fd; return -1; } @@ -2978,6 +2978,7 @@ int main(int argc, char **argv, char **envp) runstate_init(); postcopy_infrastructure_init(); + monitor_init_globals(); if (qcrypto_init(&err) < 0) { error_reportf_err(err, "cannot initialize crypto: "); @@ -4412,12 +4413,6 @@ int main(int argc, char **argv, char **envp) default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); - /* - * Note: qtest_enabled() (which is used in monitor_qapi_event_init()) - * depends on configure_accelerator() above. - */ - monitor_init_globals(); - if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, NULL)) { exit(1); |