aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.shippable.yml2
-rw-r--r--block.c583
-rw-r--r--block/backup.c22
-rw-r--r--block/blkdebug.c2
-rwxr-xr-xblock/blkreplay.c1
-rw-r--r--block/blkverify.c1
-rw-r--r--block/block-backend.c116
-rw-r--r--block/bochs.c1
-rw-r--r--block/cloop.c1
-rw-r--r--block/commit.c176
-rw-r--r--block/crypto.c1
-rw-r--r--block/dmg.c1
-rw-r--r--block/io.c41
-rw-r--r--block/mirror.c237
-rw-r--r--block/parallels.c4
-rw-r--r--block/qcow.c4
-rw-r--r--block/qcow2.c19
-rw-r--r--block/qed.c4
-rw-r--r--block/quorum.c11
-rw-r--r--block/raw-format.c1
-rw-r--r--block/replication.c3
-rw-r--r--block/sheepdog.c2
-rw-r--r--block/stream.c47
-rw-r--r--block/vdi.c4
-rw-r--r--block/vhdx.c4
-rw-r--r--block/vmdk.c7
-rw-r--r--block/vpc.c4
-rw-r--r--block/vvfat.c24
-rw-r--r--blockdev.c74
-rw-r--r--blockjob.c62
-rw-r--r--default-configs/arm-softmmu.mak2
-rw-r--r--docs/mach-virt-graphical.cfg281
-rw-r--r--docs/mach-virt-serial.cfg243
-rw-r--r--docs/migration.txt71
-rw-r--r--docs/q35-chipset.cfg152
-rw-r--r--docs/q35-emulated.cfg288
-rw-r--r--docs/q35-virtio-graphical.cfg248
-rw-r--r--docs/q35-virtio-serial.cfg193
-rw-r--r--exec.c83
-rw-r--r--hmp.c33
-rw-r--r--hw/9pfs/9p-local.c1023
-rw-r--r--hw/9pfs/9p-local.h20
-rw-r--r--hw/9pfs/9p-posix-acl.c44
-rw-r--r--hw/9pfs/9p-util.c69
-rw-r--r--hw/9pfs/9p-util.h54
-rw-r--r--hw/9pfs/9p-xattr-user.c24
-rw-r--r--hw/9pfs/9p-xattr.c166
-rw-r--r--hw/9pfs/9p-xattr.h87
-rw-r--r--hw/9pfs/Makefile.objs2
-rw-r--r--hw/acpi/pcihp.c11
-rw-r--r--hw/acpi/piix4.c2
-rw-r--r--hw/arm/armv7m.c379
-rw-r--r--hw/arm/bcm2835_peripherals.c43
-rw-r--r--hw/arm/netduino2.c7
-rw-r--r--hw/arm/stm32f205_soc.c28
-rw-r--r--hw/block/block.c24
-rw-r--r--hw/block/fdc.c28
-rw-r--r--hw/block/m25p80.c8
-rw-r--r--hw/block/nand.c7
-rw-r--r--hw/block/nvme.c8
-rw-r--r--hw/block/onenand.c7
-rw-r--r--hw/block/pflash_cfi01.c18
-rw-r--r--hw/block/pflash_cfi02.c19
-rw-r--r--hw/block/virtio-blk.c8
-rw-r--r--hw/core/bus.c2
-rw-r--r--hw/core/loader.c17
-rw-r--r--hw/core/ptimer.c8
-rw-r--r--hw/core/qdev-properties-system.c9
-rw-r--r--hw/core/qdev.c21
-rw-r--r--hw/gpio/Makefile.objs1
-rw-r--r--hw/gpio/bcm2835_gpio.c353
-rw-r--r--hw/i386/acpi-build.c4
-rw-r--r--hw/ide/core.c2
-rw-r--r--hw/ide/qdev.c9
-rw-r--r--hw/intc/Makefile.objs2
-rw-r--r--hw/intc/arm_gicv3_common.c38
-rw-r--r--hw/intc/arm_gicv3_cpuif.c8
-rw-r--r--hw/intc/arm_gicv3_kvm.c629
-rw-r--r--hw/intc/armv7m_nvic.c214
-rw-r--r--hw/intc/gicv3_internal.h3
-rw-r--r--hw/intc/xics.c461
-rw-r--r--hw/intc/xics_kvm.c184
-rw-r--r--hw/intc/xics_spapr.c128
-rw-r--r--hw/nvram/spapr_nvram.c8
-rw-r--r--hw/pci/pci.c28
-rw-r--r--hw/ppc/spapr.c196
-rw-r--r--hw/ppc/spapr_cpu_core.c24
-rw-r--r--hw/ppc/spapr_events.c10
-rw-r--r--hw/ppc/spapr_hcall.c89
-rw-r--r--hw/ppc/spapr_pci.c312
-rw-r--r--hw/ppc/spapr_vio.c2
-rw-r--r--hw/s390x/ipl.c90
-rw-r--r--hw/s390x/ipl.h5
-rw-r--r--hw/s390x/s390-virtio-ccw.c3
-rw-r--r--hw/s390x/s390-virtio.c2
-rw-r--r--hw/s390x/s390-virtio.h1
-rw-r--r--hw/scsi/scsi-disk.c12
-rw-r--r--hw/sd/core.c27
-rw-r--r--hw/sd/sd.c8
-rw-r--r--hw/timer/Makefile.objs1
-rw-r--r--hw/timer/armv7m_systick.c240
-rw-r--r--hw/timer/trace-events6
-rw-r--r--hw/usb/bus.c19
-rw-r--r--hw/usb/dev-storage.c22
-rw-r--r--hw/usb/dev-uas.c2
-rw-r--r--include/block/block.h46
-rw-r--r--include/block/block_int.h126
-rw-r--r--include/block/blockjob.h14
-rw-r--r--include/block/blockjob_int.h4
-rw-r--r--include/exec/cpu-common.h2
-rw-r--r--include/glib-compat.h21
-rw-r--r--include/hw/arm/arm.h12
-rw-r--r--include/hw/arm/armv7m.h63
-rw-r--r--include/hw/arm/armv7m_nvic.h62
-rw-r--r--include/hw/arm/bcm2835_peripherals.h4
-rw-r--r--include/hw/arm/stm32f205_soc.h4
-rw-r--r--include/hw/block/block.h8
-rw-r--r--include/hw/elf_ops.h13
-rw-r--r--include/hw/gpio/bcm2835_gpio.h39
-rw-r--r--include/hw/intc/arm_gicv3_common.h1
-rw-r--r--include/hw/loader.h13
-rw-r--r--include/hw/pci-host/spapr.h2
-rw-r--r--include/hw/pci/pci.h4
-rw-r--r--include/hw/pci/pci_ids.h112
-rw-r--r--include/hw/ppc/spapr.h5
-rw-r--r--include/hw/ppc/spapr_vio.h2
-rw-r--r--include/hw/ppc/xics.h97
-rw-r--r--include/hw/ptimer.h1
-rw-r--r--include/hw/sd/sd.h11
-rw-r--r--include/hw/timer/armv7m_systick.h34
-rw-r--r--include/migration/migration.h6
-rw-r--r--include/migration/postcopy-ram.h13
-rw-r--r--include/migration/vmstate.h4
-rw-r--r--include/qemu-io.h1
-rw-r--r--include/qemu/timer.h5
-rw-r--r--include/standard-headers/asm-x86/hyperv.h8
-rw-r--r--include/standard-headers/linux/input-event-codes.h2
-rw-r--r--include/standard-headers/linux/pci_regs.h25
-rw-r--r--include/standard-headers/linux/virtio_ids.h1
-rw-r--r--include/sysemu/block-backend.h9
-rw-r--r--linux-headers/asm-arm/kvm.h15
-rw-r--r--linux-headers/asm-arm/unistd-common.h357
-rw-r--r--linux-headers/asm-arm/unistd-eabi.h5
-rw-r--r--linux-headers/asm-arm/unistd-oabi.h17
-rw-r--r--linux-headers/asm-arm/unistd.h419
-rw-r--r--linux-headers/asm-arm64/kvm.h13
-rw-r--r--linux-headers/asm-powerpc/kvm.h27
-rw-r--r--linux-headers/asm-powerpc/unistd.h1
-rw-r--r--linux-headers/asm-x86/kvm_para.h13
-rw-r--r--linux-headers/linux/kvm.h24
-rw-r--r--linux-headers/linux/kvm_para.h2
-rw-r--r--linux-headers/linux/userfaultfd.h67
-rw-r--r--linux-headers/linux/vfio.h10
-rw-r--r--migration/block.c21
-rw-r--r--migration/colo.c49
-rw-r--r--migration/migration.c36
-rw-r--r--migration/postcopy-ram.c142
-rw-r--r--migration/ram.c109
-rw-r--r--migration/savevm.c38
-rw-r--r--migration/trace-events2
-rw-r--r--migration/vmstate.c97
-rw-r--r--monitor.c4
-rw-r--r--nbd/server.c16
-rw-r--r--pc-bios/bios-256k.binbin262144 -> 262144 bytes
-rw-r--r--pc-bios/bios.binbin131072 -> 131072 bytes
-rw-r--r--pc-bios/openbios-ppcbin750840 -> 750840 bytes
-rw-r--r--pc-bios/openbios-sparc32bin382048 -> 382048 bytes
-rw-r--r--pc-bios/openbios-sparc64bin1593408 -> 1593408 bytes
-rw-r--r--pc-bios/s390-ccw.imgbin26392 -> 26456 bytes
-rw-r--r--pc-bios/s390-ccw/bootmap.c8
-rw-r--r--pc-bios/s390-ccw/iplb.h3
-rw-r--r--pc-bios/s390-ccw/main.c20
-rw-r--r--pc-bios/s390-ccw/virtio.c1
-rw-r--r--pc-bios/s390-ccw/virtio.h1
-rw-r--r--pc-bios/vgabios-cirrus.binbin38400 -> 38400 bytes
-rw-r--r--pc-bios/vgabios-qxl.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios-stdvga.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios-virtio.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios-vmware.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios.binbin38400 -> 38400 bytes
-rw-r--r--qapi-schema.json82
-rw-r--r--qapi/block-core.json16
-rw-r--r--qdev-monitor.c9
-rw-r--r--qemu-img-cmds.hx4
-rw-r--r--qemu-img.c334
-rw-r--r--qemu-img.texi16
-rw-r--r--qemu-io-cmds.c28
-rw-r--r--qtest.c1
m---------roms/openbios0
m---------roms/seabios0
-rwxr-xr-xscripts/update-linux-headers.sh13
-rwxr-xr-xscripts/vmstate-static-checker.py5
-rw-r--r--stubs/vmstate.c6
-rw-r--r--target/arm/cpu.h2
-rw-r--r--target/i386/cpu-qom.h8
-rw-r--r--target/i386/cpu.c455
-rw-r--r--target/i386/cpu.h2
-rw-r--r--target/ppc/Makefile.objs5
-rw-r--r--target/ppc/arch_dump.c154
-rw-r--r--target/ppc/cpu.c47
-rw-r--r--target/ppc/cpu.h50
-rw-r--r--target/ppc/int_helper.c34
-rw-r--r--target/ppc/kvm.c128
-rw-r--r--target/ppc/kvm_ppc.h20
-rw-r--r--target/ppc/machine.c5
-rw-r--r--target/ppc/misc_helper.c8
-rw-r--r--target/ppc/mmu-hash32.c14
-rw-r--r--target/ppc/mmu-hash32.h34
-rw-r--r--target/ppc/mmu-hash64.c193
-rw-r--r--target/ppc/mmu-hash64.h65
-rw-r--r--target/ppc/mmu_helper.c51
-rw-r--r--target/ppc/translate.c115
-rw-r--r--target/ppc/translate_init.c39
-rw-r--r--tcg/aarch64/tcg-target.inc.c4
-rw-r--r--tests/bios-tables-test.c2
-rw-r--r--tests/docker/dockerfiles/debian-s390x-cross.docker22
-rw-r--r--tests/e1000-test.c1
-rw-r--r--tests/e1000e-test.c6
-rw-r--r--tests/eepro100-test.c1
-rw-r--r--tests/endianness-test.c3
-rw-r--r--tests/hd-geo-test.c53
-rw-r--r--tests/i440fx-test.c5
-rw-r--r--tests/ide-test.c12
-rw-r--r--tests/ipmi-bt-test.c1
-rw-r--r--tests/ipmi-kcs-test.c1
-rw-r--r--tests/libqos/usb.c6
-rw-r--r--tests/libqos/usb.h1
-rw-r--r--tests/libqos/virtio-pci.c38
-rw-r--r--tests/libqos/virtio-pci.h6
-rw-r--r--tests/libqtest.c10
-rw-r--r--tests/postcopy-test.c2
-rw-r--r--tests/ptimer-test-stubs.c5
-rw-r--r--tests/ptimer-test.c122
-rw-r--r--tests/pvpanic-test.c1
-rw-r--r--tests/q35-test.c3
-rw-r--r--tests/qemu-iotests/049.out14
-rw-r--r--tests/qemu-iotests/051.pc.out6
-rwxr-xr-xtests/qemu-iotests/05511
-rw-r--r--tests/qemu-iotests/085.out2
-rwxr-xr-xtests/qemu-iotests/1412
-rw-r--r--tests/qemu-iotests/141.out4
-rw-r--r--tests/qemu-iotests/172.out53
-rw-r--r--tests/tco-test.c35
-rw-r--r--tests/test-blockjob-txn.c6
-rw-r--r--tests/test-blockjob.c10
-rw-r--r--tests/test-filter-mirror.c2
-rw-r--r--tests/test-filter-redirector.c4
-rw-r--r--tests/test-io-channel-command.c6
-rw-r--r--tests/test-throttle.c7
-rw-r--r--tests/test-vmstate.c122
-rw-r--r--tests/usb-hcd-ehci-test.c19
-rw-r--r--tests/usb-hcd-uhci-test.c1
-rw-r--r--tests/vhost-user-test.c11
-rw-r--r--tests/virtio-9p-test.c2
-rw-r--r--tests/virtio-blk-test.c29
-rw-r--r--tests/virtio-scsi-test.c2
-rw-r--r--util/qemu-option.c2
-rw-r--r--util/qemu-timer.c5
258 files changed, 9724 insertions, 3509 deletions
diff --git a/.shippable.yml b/.shippable.yml
index 1a1fd7a..653bd75 100644
--- a/.shippable.yml
+++ b/.shippable.yml
@@ -5,6 +5,8 @@ env:
TARGET_LIST=arm-softmmu,arm-linux-user
- IMAGE=debian-arm64-cross
TARGET_LIST=aarch64-softmmu,aarch64-linux-user
+ - IMAGE=debian-s390x-cross
+ TARGET_LIST=s390x-softmmu,s390x-linux-user
build:
pre_ci:
- make docker-image-${IMAGE}
diff --git a/block.c b/block.c
index b663204..f293ccb 100644
--- a/block.c
+++ b/block.c
@@ -707,6 +707,12 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
return 0;
}
+static char *bdrv_child_get_parent_desc(BdrvChild *c)
+{
+ BlockDriverState *parent = c->opaque;
+ return g_strdup(bdrv_get_device_or_node_name(parent));
+}
+
static void bdrv_child_cb_drained_begin(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
@@ -774,6 +780,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
}
const BdrvChildRole child_file = {
+ .get_parent_desc = bdrv_child_get_parent_desc,
.inherit_options = bdrv_inherited_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
@@ -794,11 +801,63 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
}
const BdrvChildRole child_format = {
+ .get_parent_desc = bdrv_child_get_parent_desc,
.inherit_options = bdrv_inherited_fmt_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
};
+static void bdrv_backing_attach(BdrvChild *c)
+{
+ BlockDriverState *parent = c->opaque;
+ BlockDriverState *backing_hd = c->bs;
+
+ assert(!parent->backing_blocker);
+ error_setg(&parent->backing_blocker,
+ "node is used as backing hd of '%s'",
+ bdrv_get_device_or_node_name(parent));
+
+ parent->open_flags &= ~BDRV_O_NO_BACKING;
+ pstrcpy(parent->backing_file, sizeof(parent->backing_file),
+ backing_hd->filename);
+ pstrcpy(parent->backing_format, sizeof(parent->backing_format),
+ backing_hd->drv ? backing_hd->drv->format_name : "");
+
+ bdrv_op_block_all(backing_hd, parent->backing_blocker);
+ /* Otherwise we won't be able to commit or stream */
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
+ parent->backing_blocker);
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
+ parent->backing_blocker);
+ /*
+ * We do backup in 3 ways:
+ * 1. drive backup
+ * The target bs is new opened, and the source is top BDS
+ * 2. blockdev backup
+ * Both the source and the target are top BDSes.
+ * 3. internal backup(used for block replication)
+ * Both the source and the target are backing file
+ *
+ * In case 1 and 2, neither the source nor the target is the backing file.
+ * In case 3, we will block the top BDS, so there is only one block job
+ * for the top BDS and its backing chain.
+ */
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
+ parent->backing_blocker);
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
+ parent->backing_blocker);
+}
+
+static void bdrv_backing_detach(BdrvChild *c)
+{
+ BlockDriverState *parent = c->opaque;
+
+ assert(parent->backing_blocker);
+ bdrv_op_unblock_all(c->bs, parent->backing_blocker);
+ error_free(parent->backing_blocker);
+ parent->backing_blocker = NULL;
+}
+
/*
* Returns the options and flags that bs->backing should get, based on the
* given options and flags for the parent BDS
@@ -823,7 +882,10 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
*child_flags = flags;
}
-static const BdrvChildRole child_backing = {
+const BdrvChildRole child_backing = {
+ .get_parent_desc = bdrv_child_get_parent_desc,
+ .attach = bdrv_backing_attach,
+ .detach = bdrv_backing_detach,
.inherit_options = bdrv_backing_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
@@ -1326,15 +1388,352 @@ static int bdrv_fill_options(QDict **options, const char *filename,
return 0;
}
-static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
+/*
+ * Check whether permissions on this node can be changed in a way that
+ * @cumulative_perms and @cumulative_shared_perms are the new cumulative
+ * permissions of all its parents. This involves checking whether all necessary
+ * permission changes to child nodes can be performed.
+ *
+ * A call to this function must always be followed by a call to bdrv_set_perm()
+ * or bdrv_abort_perm_update().
+ */
+static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms,
+ uint64_t cumulative_shared_perms, Error **errp)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvChild *c;
+ int ret;
+
+ /* Write permissions never work with read-only images */
+ if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
+ bdrv_is_read_only(bs))
+ {
+ error_setg(errp, "Block node is read-only");
+ return -EPERM;
+ }
+
+ /* Check this node */
+ if (!drv) {
+ return 0;
+ }
+
+ if (drv->bdrv_check_perm) {
+ return drv->bdrv_check_perm(bs, cumulative_perms,
+ cumulative_shared_perms, errp);
+ }
+
+ /* Drivers that never have children can omit .bdrv_child_perm() */
+ if (!drv->bdrv_child_perm) {
+ assert(QLIST_EMPTY(&bs->children));
+ return 0;
+ }
+
+ /* Check all children */
+ QLIST_FOREACH(c, &bs->children, next) {
+ uint64_t cur_perm, cur_shared;
+ drv->bdrv_child_perm(bs, c, c->role,
+ cumulative_perms, cumulative_shared_perms,
+ &cur_perm, &cur_shared);
+ ret = bdrv_child_check_perm(c, cur_perm, cur_shared, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Notifies drivers that after a previous bdrv_check_perm() call, the
+ * permission update is not performed and any preparations made for it (e.g.
+ * taken file locks) need to be undone.
+ *
+ * This function recursively notifies all child nodes.
+ */
+static void bdrv_abort_perm_update(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvChild *c;
+
+ if (!drv) {
+ return;
+ }
+
+ if (drv->bdrv_abort_perm_update) {
+ drv->bdrv_abort_perm_update(bs);
+ }
+
+ QLIST_FOREACH(c, &bs->children, next) {
+ bdrv_child_abort_perm_update(c);
+ }
+}
+
+static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
+ uint64_t cumulative_shared_perms)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvChild *c;
+
+ if (!drv) {
+ return;
+ }
+
+ /* Update this node */
+ if (drv->bdrv_set_perm) {
+ drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
+ }
+
+ /* Drivers that never have children can omit .bdrv_child_perm() */
+ if (!drv->bdrv_child_perm) {
+ assert(QLIST_EMPTY(&bs->children));
+ return;
+ }
+
+ /* Update all children */
+ QLIST_FOREACH(c, &bs->children, next) {
+ uint64_t cur_perm, cur_shared;
+ drv->bdrv_child_perm(bs, c, c->role,
+ cumulative_perms, cumulative_shared_perms,
+ &cur_perm, &cur_shared);
+ bdrv_child_set_perm(c, cur_perm, cur_shared);
+ }
+}
+
+static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
+ uint64_t *shared_perm)
+{
+ BdrvChild *c;
+ uint64_t cumulative_perms = 0;
+ uint64_t cumulative_shared_perms = BLK_PERM_ALL;
+
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ cumulative_perms |= c->perm;
+ cumulative_shared_perms &= c->shared_perm;
+ }
+
+ *perm = cumulative_perms;
+ *shared_perm = cumulative_shared_perms;
+}
+
+static char *bdrv_child_user_desc(BdrvChild *c)
+{
+ if (c->role->get_parent_desc) {
+ return c->role->get_parent_desc(c);
+ }
+
+ return g_strdup("another user");
+}
+
+static char *bdrv_perm_names(uint64_t perm)
+{
+ struct perm_name {
+ uint64_t perm;
+ const char *name;
+ } permissions[] = {
+ { BLK_PERM_CONSISTENT_READ, "consistent read" },
+ { BLK_PERM_WRITE, "write" },
+ { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
+ { BLK_PERM_RESIZE, "resize" },
+ { BLK_PERM_GRAPH_MOD, "change children" },
+ { 0, NULL }
+ };
+
+ char *result = g_strdup("");
+ struct perm_name *p;
+
+ for (p = permissions; p->name; p++) {
+ if (perm & p->perm) {
+ char *old = result;
+ result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name);
+ g_free(old);
+ }
+ }
+
+ return result;
+}
+
+/*
+ * Checks whether a new reference to @bs can be added if the new user requires
+ * @new_used_perm/@new_shared_perm as its permissions. If @ignore_child is set,
+ * this old reference is ignored in the calculations; this allows checking
+ * permission updates for an existing reference.
+ *
+ * Needs to be followed by a call to either bdrv_set_perm() or
+ * bdrv_abort_perm_update(). */
+static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm,
+ uint64_t new_shared_perm,
+ BdrvChild *ignore_child, Error **errp)
+{
+ BdrvChild *c;
+ uint64_t cumulative_perms = new_used_perm;
+ uint64_t cumulative_shared_perms = new_shared_perm;
+
+ /* There is no reason why anyone couldn't tolerate write_unchanged */
+ assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
+
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c == ignore_child) {
+ continue;
+ }
+
+ if ((new_used_perm & c->shared_perm) != new_used_perm) {
+ char *user = bdrv_child_user_desc(c);
+ char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
+ error_setg(errp, "Conflicts with use by %s as '%s', which does not "
+ "allow '%s' on %s",
+ user, c->name, perm_names, bdrv_get_node_name(c->bs));
+ g_free(user);
+ g_free(perm_names);
+ return -EPERM;
+ }
+
+ if ((c->perm & new_shared_perm) != c->perm) {
+ char *user = bdrv_child_user_desc(c);
+ char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
+ error_setg(errp, "Conflicts with use by %s as '%s', which uses "
+ "'%s' on %s",
+ user, c->name, perm_names, bdrv_get_node_name(c->bs));
+ g_free(user);
+ g_free(perm_names);
+ return -EPERM;
+ }
+
+ cumulative_perms |= c->perm;
+ cumulative_shared_perms &= c->shared_perm;
+ }
+
+ return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms, errp);
+}
+
+/* Needs to be followed by a call to either bdrv_child_set_perm() or
+ * bdrv_child_abort_perm_update(). */
+int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+ Error **errp)
+{
+ return bdrv_check_update_perm(c->bs, perm, shared, c, errp);
+}
+
+void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
+{
+ uint64_t cumulative_perms, cumulative_shared_perms;
+
+ c->perm = perm;
+ c->shared_perm = shared;
+
+ bdrv_get_cumulative_perm(c->bs, &cumulative_perms,
+ &cumulative_shared_perms);
+ bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms);
+}
+
+void bdrv_child_abort_perm_update(BdrvChild *c)
+{
+ bdrv_abort_perm_update(c->bs);
+}
+
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+ Error **errp)
+{
+ int ret;
+
+ ret = bdrv_child_check_perm(c, perm, shared, errp);
+ if (ret < 0) {
+ bdrv_child_abort_perm_update(c);
+ return ret;
+ }
+
+ bdrv_child_set_perm(c, perm, shared);
+
+ return 0;
+}
+
+#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
+ | BLK_PERM_WRITE \
+ | BLK_PERM_WRITE_UNCHANGED \
+ | BLK_PERM_RESIZE)
+#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH)
+
+void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ if (c == NULL) {
+ *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+ *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
+ return;
+ }
+
+ *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) |
+ (c->perm & DEFAULT_PERM_UNCHANGED);
+ *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) |
+ (c->shared_perm & DEFAULT_PERM_UNCHANGED);
+}
+
+void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ bool backing = (role == &child_backing);
+ assert(role == &child_backing || role == &child_file);
+
+ if (!backing) {
+ /* Apart from the modifications below, the same permissions are
+ * forwarded and left alone as for filters */
+ bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared);
+
+ /* Format drivers may touch metadata even if the guest doesn't write */
+ if (!bdrv_is_read_only(bs)) {
+ perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
+ }
+
+ /* bs->file always needs to be consistent because of the metadata. We
+ * can never allow other users to resize or write to it. */
+ perm |= BLK_PERM_CONSISTENT_READ;
+ shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+ } else {
+ /* We want consistent read from backing files if the parent needs it.
+ * No other operations are performed on backing files. */
+ perm &= BLK_PERM_CONSISTENT_READ;
+
+ /* If the parent can deal with changing data, we're okay with a
+ * writable and resizable backing file. */
+ /* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */
+ if (shared & BLK_PERM_WRITE) {
+ shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
+ } else {
+ shared = 0;
+ }
+
+ shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
+ BLK_PERM_WRITE_UNCHANGED;
+ }
+
+ *nperm = perm;
+ *nshared = shared;
+}
+
+static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs,
+ bool check_new_perm)
{
BlockDriverState *old_bs = child->bs;
+ uint64_t perm, shared_perm;
if (old_bs) {
if (old_bs->quiesce_counter && child->role->drained_end) {
child->role->drained_end(child);
}
+ if (child->role->detach) {
+ child->role->detach(child);
+ }
QLIST_REMOVE(child, next_parent);
+
+ /* Update permissions for old node. This is guaranteed to succeed
+ * because we're just taking a parent away, so we're loosening
+ * restrictions. */
+ bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
+ bdrv_check_perm(old_bs, perm, shared_perm, &error_abort);
+ bdrv_set_perm(old_bs, perm, shared_perm);
}
child->bs = new_bs;
@@ -1344,23 +1743,46 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
if (new_bs->quiesce_counter && child->role->drained_begin) {
child->role->drained_begin(child);
}
+
+ bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm);
+ if (check_new_perm) {
+ bdrv_check_perm(new_bs, perm, shared_perm, &error_abort);
+ }
+ bdrv_set_perm(new_bs, perm, shared_perm);
+
+ if (child->role->attach) {
+ child->role->attach(child);
+ }
}
}
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role,
- void *opaque)
+ uint64_t perm, uint64_t shared_perm,
+ void *opaque, Error **errp)
{
- BdrvChild *child = g_new(BdrvChild, 1);
+ BdrvChild *child;
+ int ret;
+
+ ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp);
+ if (ret < 0) {
+ bdrv_abort_perm_update(child_bs);
+ return NULL;
+ }
+
+ child = g_new(BdrvChild, 1);
*child = (BdrvChild) {
- .bs = NULL,
- .name = g_strdup(child_name),
- .role = child_role,
- .opaque = opaque,
+ .bs = NULL,
+ .name = g_strdup(child_name),
+ .role = child_role,
+ .perm = perm,
+ .shared_perm = shared_perm,
+ .opaque = opaque,
};
- bdrv_replace_child(child, child_bs);
+ /* This performs the matching bdrv_set_perm() for the above check. */
+ bdrv_replace_child(child, child_bs, false);
return child;
}
@@ -1368,10 +1790,24 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
- const BdrvChildRole *child_role)
+ const BdrvChildRole *child_role,
+ Error **errp)
{
- BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role,
- parent_bs);
+ BdrvChild *child;
+ uint64_t perm, shared_perm;
+
+ bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
+
+ assert(parent_bs->drv);
+ parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role,
+ perm, shared_perm, &perm, &shared_perm);
+
+ child = bdrv_root_attach_child(child_bs, child_name, child_role,
+ perm, shared_perm, parent_bs, errp);
+ if (child == NULL) {
+ return NULL;
+ }
+
QLIST_INSERT_HEAD(&parent_bs->children, child, next);
return child;
}
@@ -1383,7 +1819,7 @@ static void bdrv_detach_child(BdrvChild *child)
child->next.le_prev = NULL;
}
- bdrv_replace_child(child, NULL);
+ bdrv_replace_child(child, NULL, false);
g_free(child->name);
g_free(child);
@@ -1447,57 +1883,28 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs)
* Sets the backing file link of a BDS. A new reference is created; callers
* which don't need their own reference any more must call bdrv_unref().
*/
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp)
{
if (backing_hd) {
bdrv_ref(backing_hd);
}
if (bs->backing) {
- assert(bs->backing_blocker);
- bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
bdrv_unref_child(bs, bs->backing);
- } else if (backing_hd) {
- error_setg(&bs->backing_blocker,
- "node is used as backing hd of '%s'",
- bdrv_get_device_or_node_name(bs));
}
if (!backing_hd) {
- error_free(bs->backing_blocker);
- bs->backing_blocker = NULL;
bs->backing = NULL;
goto out;
}
- bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
- bs->open_flags &= ~BDRV_O_NO_BACKING;
- pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
- pstrcpy(bs->backing_format, sizeof(bs->backing_format),
- backing_hd->drv ? backing_hd->drv->format_name : "");
- bdrv_op_block_all(backing_hd, bs->backing_blocker);
- /* Otherwise we won't be able to commit or stream */
- bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
- bs->backing_blocker);
- bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
- bs->backing_blocker);
- /*
- * We do backup in 3 ways:
- * 1. drive backup
- * The target bs is new opened, and the source is top BDS
- * 2. blockdev backup
- * Both the source and the target are top BDSes.
- * 3. internal backup(used for block replication)
- * Both the source and the target are backing file
- *
- * In case 1 and 2, neither the source nor the target is the backing file.
- * In case 3, we will block the top BDS, so there is only one block job
- * for the top BDS and its backing chain.
- */
- bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
- bs->backing_blocker);
- bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
- bs->backing_blocker);
+ bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing,
+ errp);
+ if (!bs->backing) {
+ bdrv_unref(backing_hd);
+ }
+
out:
bdrv_refresh_limits(bs, NULL);
}
@@ -1580,8 +1987,12 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
/* Hook up the backing file link; drop our reference, bs owns the
* backing_hd reference now */
- bdrv_set_backing_hd(bs, backing_hd);
+ bdrv_set_backing_hd(bs, backing_hd, &local_err);
bdrv_unref(backing_hd);
+ if (local_err) {
+ ret = -EINVAL;
+ goto free_exit;
+ }
qdict_del(parent_options, bdref_key);
@@ -1648,6 +2059,7 @@ BdrvChild *bdrv_open_child(const char *filename,
const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
+ BdrvChild *c;
BlockDriverState *bs;
bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
@@ -1656,7 +2068,13 @@ BdrvChild *bdrv_open_child(const char *filename,
return NULL;
}
- return bdrv_attach_child(parent, bs, bdref_key, child_role);
+ c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
+ if (!c) {
+ bdrv_unref(bs);
+ return NULL;
+ }
+
+ return c;
}
static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
@@ -1669,6 +2087,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
int64_t total_size;
QemuOpts *opts = NULL;
BlockDriverState *bs_snapshot;
+ Error *local_err = NULL;
int ret;
/* if snapshot, we create a temporary backing file and open it
@@ -1718,7 +2137,12 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
* call bdrv_unref() on it), so in order to be able to return one, we have
* to increase bs_snapshot's refcount here */
bdrv_ref(bs_snapshot);
- bdrv_append(bs_snapshot, bs);
+ bdrv_append(bs_snapshot, bs, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto out;
+ }
g_free(tmp_filename);
return bs_snapshot;
@@ -1862,9 +2286,12 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
goto fail;
}
if (file_bs != NULL) {
- file = blk_new();
- blk_insert_bs(file, file_bs);
+ file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+ blk_insert_bs(file, file_bs, &local_err);
bdrv_unref(file_bs);
+ if (local_err) {
+ goto fail;
+ }
qdict_put(options, "file",
qstring_from_str(bdrv_get_node_name(file_bs)));
@@ -2405,7 +2832,7 @@ static void bdrv_close(BlockDriverState *bs)
bs->drv->bdrv_close(bs);
bs->drv = NULL;
- bdrv_set_backing_hd(bs, NULL);
+ bdrv_set_backing_hd(bs, NULL, &error_abort);
if (bs->file != NULL) {
bdrv_unref_child(bs, bs->file);
@@ -2465,10 +2892,13 @@ static void change_parent_backing_link(BlockDriverState *from,
BdrvChild *c, *next, *to_c;
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+ if (c->role->stay_at_node) {
+ continue;
+ }
if (c->role == &child_backing) {
- /* @from is generally not allowed to be a backing file, except for
- * when @to is the overlay. In that case, @from may not be replaced
- * by @to as @to's backing node. */
+ /* If @from is a backing file of @to, ignore the child to avoid
+ * creating a loop. We only want to change the pointer of other
+ * parents. */
QLIST_FOREACH(to_c, &to->children, next) {
if (to_c == c) {
break;
@@ -2479,9 +2909,10 @@ static void change_parent_backing_link(BlockDriverState *from,
}
}
- assert(c->role != &child_backing);
bdrv_ref(to);
- bdrv_replace_child(c, to);
+ /* FIXME Are we sure that bdrv_replace_child() can't run into
+ * &error_abort because of permissions? */
+ bdrv_replace_child(c, to, true);
bdrv_unref(from);
}
}
@@ -2502,19 +2933,25 @@ static void change_parent_backing_link(BlockDriverState *from,
* parents of bs_top after bdrv_append() returns. If the caller needs to keep a
* reference of its own, it must call bdrv_ref().
*/
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+ Error **errp)
{
- assert(!bdrv_requests_pending(bs_top));
- assert(!bdrv_requests_pending(bs_new));
+ Error *local_err = NULL;
- bdrv_ref(bs_top);
+ assert(!atomic_read(&bs_top->in_flight));
+ assert(!atomic_read(&bs_new->in_flight));
+
+ bdrv_set_backing_hd(bs_new, bs_top, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto out;
+ }
change_parent_backing_link(bs_top, bs_new);
- bdrv_set_backing_hd(bs_new, bs_top);
- bdrv_unref(bs_top);
/* bs_new is now referenced by its new parents, we don't need the
* additional reference any more. */
+out:
bdrv_unref(bs_new);
}
@@ -2658,6 +3095,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
BlockDriverState *base, const char *backing_file_str)
{
BlockDriverState *new_top_bs = NULL;
+ Error *local_err = NULL;
int ret = -EIO;
if (!top->drv || !base->drv) {
@@ -2690,7 +3128,13 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
if (ret) {
goto exit;
}
- bdrv_set_backing_hd(new_top_bs, base);
+
+ bdrv_set_backing_hd(new_top_bs, base, &local_err);
+ if (local_err) {
+ ret = -EPERM;
+ error_report_err(local_err);
+ goto exit;
+ }
ret = 0;
exit:
@@ -2705,6 +3149,9 @@ int bdrv_truncate(BdrvChild *child, int64_t offset)
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
int ret;
+
+ assert(child->perm & BLK_PERM_RESIZE);
+
if (!drv)
return -ENOMEDIUM;
if (!drv->bdrv_truncate)
diff --git a/block/backup.c b/block/backup.c
index fe010e7..d1ab617 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -618,14 +618,24 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
- job = block_job_create(job_id, &backup_job_driver, bs, speed,
- creation_flags, cb, opaque, errp);
+ /* job->common.len is fixed, so we can't allow resize */
+ job = block_job_create(job_id, &backup_job_driver, bs,
+ BLK_PERM_CONSISTENT_READ,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+ BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
+ speed, creation_flags, cb, opaque, errp);
if (!job) {
goto error;
}
- job->target = blk_new();
- blk_insert_bs(job->target, target);
+ /* The target must match the source in size, so no resize here either */
+ job->target = blk_new(BLK_PERM_WRITE,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+ BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
+ ret = blk_insert_bs(job->target, target, errp);
+ if (ret < 0) {
+ goto error;
+ }
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
@@ -652,7 +662,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}
- block_job_add_bdrv(&job->common, target);
+ /* Required permissions are already taken with target's blk_new() */
+ block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
+ &error_abort);
job->common.len = len;
block_job_txn_add_job(txn, &job->common);
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 6117ce5..67e8024 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -734,6 +734,8 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_file_open = blkdebug_open,
.bdrv_close = blkdebug_close,
.bdrv_reopen_prepare = blkdebug_reopen_prepare,
+ .bdrv_child_perm = bdrv_filter_default_perms,
+
.bdrv_getlength = blkdebug_getlength,
.bdrv_truncate = blkdebug_truncate,
.bdrv_refresh_filename = blkdebug_refresh_filename,
diff --git a/block/blkreplay.c b/block/blkreplay.c
index cfc8c5b..e110211 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = {
.bdrv_file_open = blkreplay_open,
.bdrv_close = blkreplay_close,
+ .bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkreplay_getlength,
.bdrv_co_preadv = blkreplay_co_preadv,
diff --git a/block/blkverify.c b/block/blkverify.c
index 43a940c..9a1e21c 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = {
.bdrv_parse_filename = blkverify_parse_filename,
.bdrv_file_open = blkverify_open,
.bdrv_close = blkverify_close,
+ .bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkverify_getlength,
.bdrv_refresh_filename = blkverify_refresh_filename,
diff --git a/block/block-backend.c b/block/block-backend.c
index 492e71e..daa7908 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -59,6 +59,9 @@ struct BlockBackend {
bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
+ uint64_t perm;
+ uint64_t shared_perm;
+
bool allow_write_beyond_eof;
NotifierList remove_bs_notifiers, insert_bs_notifiers;
@@ -77,6 +80,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
+static char *blk_get_attached_dev_id(BlockBackend *blk);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -99,6 +103,25 @@ static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
+static char *blk_root_get_parent_desc(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+ char *dev_id;
+
+ if (blk->name) {
+ return g_strdup(blk->name);
+ }
+
+ dev_id = blk_get_attached_dev_id(blk);
+ if (*dev_id) {
+ return dev_id;
+ } else {
+ /* TODO Callback into the BB owner for something more detailed */
+ g_free(dev_id);
+ return g_strdup("a block device");
+ }
+}
+
static const char *blk_root_get_name(BdrvChild *child)
{
return blk_name(child->opaque);
@@ -110,6 +133,7 @@ static const BdrvChildRole child_root = {
.change_media = blk_root_change_media,
.resize = blk_root_resize,
.get_name = blk_root_get_name,
+ .get_parent_desc = blk_root_get_parent_desc,
.drained_begin = blk_root_drained_begin,
.drained_end = blk_root_drained_end,
@@ -117,15 +141,23 @@ static const BdrvChildRole child_root = {
/*
* Create a new BlockBackend with a reference count of one.
- * Store an error through @errp on failure, unless it's null.
+ *
+ * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
+ * to request for a block driver node that is attached to this BlockBackend.
+ * @shared_perm is a bitmask which describes which permissions may be granted
+ * to other users of the attached node.
+ * Both sets of permissions can be changed later using blk_set_perm().
+ *
* Return the new BlockBackend on success, null on failure.
*/
-BlockBackend *blk_new(void)
+BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
{
BlockBackend *blk;
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
+ blk->perm = perm;
+ blk->shared_perm = shared_perm;
blk_set_enable_write_cache(blk, true);
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
@@ -155,15 +187,33 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
{
BlockBackend *blk;
BlockDriverState *bs;
+ uint64_t perm;
+
+ /* blk_new_open() is mainly used in .bdrv_create implementations and the
+ * tools where sharing isn't a concern because the BDS stays private, so we
+ * just request permission according to the flags.
+ *
+ * The exceptions are xen_disk and blockdev_init(); in these cases, the
+ * caller of blk_new_open() doesn't make use of the permissions, but they
+ * shouldn't hurt either. We can still share everything here because the
+ * guest devices will add their own blockers if they can't share. */
+ perm = BLK_PERM_CONSISTENT_READ;
+ if (flags & BDRV_O_RDWR) {
+ perm |= BLK_PERM_WRITE;
+ }
+ if (flags & BDRV_O_RESIZE) {
+ perm |= BLK_PERM_RESIZE;
+ }
- blk = blk_new();
+ blk = blk_new(perm, BLK_PERM_ALL);
bs = bdrv_open(filename, reference, options, flags, errp);
if (!bs) {
blk_unref(blk);
return NULL;
}
- blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+ perm, BLK_PERM_ALL, blk, &error_abort);
return blk;
}
@@ -495,16 +545,49 @@ void blk_remove_bs(BlockBackend *blk)
/*
* Associates a new BlockDriverState with @blk.
*/
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+ blk->perm, blk->shared_perm, blk, errp);
+ if (blk->root == NULL) {
+ return -EPERM;
+ }
bdrv_ref(bs);
- blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
if (blk->public.throttle_state) {
throttle_timers_attach_aio_context(
&blk->public.throttle_timers, bdrv_get_aio_context(bs));
}
+
+ return 0;
+}
+
+/*
+ * Sets the permission bitmasks that the user of the BlockBackend needs.
+ */
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp)
+{
+ int ret;
+
+ if (blk->root) {
+ ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ blk->perm = perm;
+ blk->shared_perm = shared_perm;
+
+ return 0;
+}
+
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
+{
+ *perm = blk->perm;
+ *shared_perm = blk->shared_perm;
}
static int blk_do_attach_dev(BlockBackend *blk, void *dev)
@@ -553,6 +636,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
blk->guest_block_size = 512;
+ blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
blk_unref(blk);
}
@@ -620,19 +704,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
/*
* Notify @blk's attached device model of media change.
- * If @load is true, notify of media load.
- * Else, notify of media eject.
+ *
+ * If @load is true, notify of media load. This action can fail, meaning that
+ * the medium cannot be loaded. @errp is set then.
+ *
+ * If @load is false, notify of media eject. This can never fail.
+ *
* Also send DEVICE_TRAY_MOVED events as appropriate.
*/
-void blk_dev_change_media_cb(BlockBackend *blk, bool load)
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
{
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
bool tray_was_open, tray_is_open;
+ Error *local_err = NULL;
assert(!blk->legacy_dev);
tray_was_open = blk_dev_is_tray_open(blk);
- blk->dev_ops->change_media_cb(blk->dev_opaque, load);
+ blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
+ if (local_err) {
+ assert(load == true);
+ error_propagate(errp, local_err);
+ return;
+ }
tray_is_open = blk_dev_is_tray_open(blk);
if (tray_was_open != tray_is_open) {
@@ -646,7 +740,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
static void blk_root_change_media(BdrvChild *child, bool load)
{
- blk_dev_change_media_cb(child->opaque, load);
+ blk_dev_change_media_cb(child->opaque, load, NULL);
}
/*
diff --git a/block/bochs.c b/block/bochs.c
index 7dd2ac4..516da56 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -293,6 +293,7 @@ static BlockDriver bdrv_bochs = {
.instance_size = sizeof(BDRVBochsState),
.bdrv_probe = bochs_probe,
.bdrv_open = bochs_open,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_refresh_limits = bochs_refresh_limits,
.bdrv_co_preadv = bochs_co_preadv,
.bdrv_close = bochs_close,
diff --git a/block/cloop.c b/block/cloop.c
index 877c9b0..a6c7b9d 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -290,6 +290,7 @@ static BlockDriver bdrv_cloop = {
.instance_size = sizeof(BDRVCloopState),
.bdrv_probe = cloop_probe,
.bdrv_open = cloop_open,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_refresh_limits = cloop_refresh_limits,
.bdrv_co_preadv = cloop_co_preadv,
.bdrv_close = cloop_close,
diff --git a/block/commit.c b/block/commit.c
index c284e85..22a0a4d 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,6 +36,7 @@ typedef struct CommitBlockJob {
BlockJob common;
RateLimit limit;
BlockDriverState *active;
+ BlockDriverState *commit_top_bs;
BlockBackend *top;
BlockBackend *base;
BlockdevOnError on_error;
@@ -83,12 +84,23 @@ static void commit_complete(BlockJob *job, void *opaque)
BlockDriverState *active = s->active;
BlockDriverState *top = blk_bs(s->top);
BlockDriverState *base = blk_bs(s->base);
- BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
+ BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
int ret = data->ret;
+ bool remove_commit_top_bs = false;
+
+ /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
+ * the normal backing chain can be restored. */
+ blk_unref(s->base);
if (!block_job_is_cancelled(&s->common) && ret == 0) {
/* success */
- ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
+ ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
+ s->backing_file_str);
+ } else if (overlay_bs) {
+ /* XXX Can (or should) we somehow keep 'consistent read' blocked even
+ * after the failed/cancelled commit job is gone? If we already wrote
+ * something to base, the intermediate images aren't valid any more. */
+ remove_commit_top_bs = true;
}
/* restore base open flags here if appropriate (e.g., change the base back
@@ -102,9 +114,15 @@ static void commit_complete(BlockJob *job, void *opaque)
}
g_free(s->backing_file_str);
blk_unref(s->top);
- blk_unref(s->base);
block_job_completed(&s->common, ret);
g_free(data);
+
+ /* If bdrv_drop_intermediate() didn't already do that, remove the commit
+ * filter driver from the backing chain. Do this as the final step so that
+ * the 'consistent read' permission can be granted. */
+ if (remove_commit_top_bs) {
+ bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+ }
}
static void coroutine_fn commit_run(void *opaque)
@@ -208,10 +226,38 @@ static const BlockJobDriver commit_job_driver = {
.start = commit_run,
};
+static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static void bdrv_commit_top_close(BlockDriverState *bs)
+{
+}
+
+static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ *nperm = 0;
+ *nshared = BLK_PERM_ALL;
+}
+
+/* Dummy node that provides consistent read to its users without requiring it
+ * from its backing file and that allows writes on the backing file chain. */
+static BlockDriver bdrv_commit_top = {
+ .format_name = "commit_top",
+ .bdrv_co_preadv = bdrv_commit_top_preadv,
+ .bdrv_close = bdrv_commit_top_close,
+ .bdrv_child_perm = bdrv_commit_top_child_perm,
+};
+
void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
- Error **errp)
+ const char *filter_node_name, Error **errp)
{
CommitBlockJob *s;
BlockReopenQueue *reopen_queue = NULL;
@@ -219,7 +265,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
int orig_base_flags;
BlockDriverState *iter;
BlockDriverState *overlay_bs;
+ BlockDriverState *commit_top_bs = NULL;
Error *local_err = NULL;
+ int ret;
assert(top != bs);
if (top == base) {
@@ -234,8 +282,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
return;
}
- s = block_job_create(job_id, &commit_job_driver, bs, speed,
- BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+ s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
+ speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
return;
}
@@ -256,30 +304,70 @@ void commit_start(const char *job_id, BlockDriverState *bs,
bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
- block_job_unref(&s->common);
- return;
+ goto fail;
}
}
+ /* Insert commit_top block node above top, so we can block consistent read
+ * on the backing chain below it */
+ commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
+ errp);
+ if (commit_top_bs == NULL) {
+ goto fail;
+ }
+
+ bdrv_set_backing_hd(commit_top_bs, top, &error_abort);
+ bdrv_set_backing_hd(overlay_bs, commit_top_bs, &error_abort);
+
+ s->commit_top_bs = commit_top_bs;
+ bdrv_unref(commit_top_bs);
/* Block all nodes between top and base, because they will
* disappear from the chain after this operation. */
assert(bdrv_chain_contains(top, base));
- for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
- block_job_add_bdrv(&s->common, iter);
+ for (iter = top; iter != base; iter = backing_bs(iter)) {
+ /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
+ * at s->base (if writes are blocked for a node, they are also blocked
+ * for its backing file). The other options would be a second filter
+ * driver above s->base. */
+ ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+ BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
+ errp);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ goto fail;
}
+
/* overlay_bs must be blocked because it needs to be modified to
- * update the backing image string, but if it's the root node then
- * don't block it again */
- if (bs != overlay_bs) {
- block_job_add_bdrv(&s->common, overlay_bs);
+ * update the backing image string. */
+ ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
+ BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ goto fail;
}
- s->base = blk_new();
- blk_insert_bs(s->base, base);
+ s->base = blk_new(BLK_PERM_CONSISTENT_READ
+ | BLK_PERM_WRITE
+ | BLK_PERM_RESIZE,
+ BLK_PERM_CONSISTENT_READ
+ | BLK_PERM_GRAPH_MOD
+ | BLK_PERM_WRITE_UNCHANGED);
+ ret = blk_insert_bs(s->base, base, errp);
+ if (ret < 0) {
+ goto fail;
+ }
- s->top = blk_new();
- blk_insert_bs(s->top, top);
+ /* Required permissions are already taken with block_job_add_bdrv() */
+ s->top = blk_new(0, BLK_PERM_ALL);
+ blk_insert_bs(s->top, top, errp);
+ if (ret < 0) {
+ goto fail;
+ }
s->active = bs;
@@ -292,6 +380,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
trace_commit_start(bs, base, top, s);
block_job_start(&s->common);
+ return;
+
+fail:
+ if (s->base) {
+ blk_unref(s->base);
+ }
+ if (s->top) {
+ blk_unref(s->top);
+ }
+ if (commit_top_bs) {
+ bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+ }
+ block_job_unref(&s->common);
}
@@ -301,11 +402,14 @@ void commit_start(const char *job_id, BlockDriverState *bs,
int bdrv_commit(BlockDriverState *bs)
{
BlockBackend *src, *backing;
+ BlockDriverState *backing_file_bs = NULL;
+ BlockDriverState *commit_top_bs = NULL;
BlockDriver *drv = bs->drv;
int64_t sector, total_sectors, length, backing_length;
int n, ro, open_flags;
int ret = 0;
uint8_t *buf = NULL;
+ Error *local_err = NULL;
if (!drv)
return -ENOMEDIUM;
@@ -328,11 +432,33 @@ int bdrv_commit(BlockDriverState *bs)
}
}
- src = blk_new();
- blk_insert_bs(src, bs);
+ src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+ backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
- backing = blk_new();
- blk_insert_bs(backing, bs->backing->bs);
+ ret = blk_insert_bs(src, bs, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ goto ro_cleanup;
+ }
+
+ /* Insert commit_top block node above backing, so we can write to it */
+ backing_file_bs = backing_bs(bs);
+
+ commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
+ &local_err);
+ if (commit_top_bs == NULL) {
+ error_report_err(local_err);
+ goto ro_cleanup;
+ }
+
+ bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
+ bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
+
+ ret = blk_insert_bs(backing, backing_file_bs, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ goto ro_cleanup;
+ }
length = blk_getlength(src);
if (length < 0) {
@@ -404,8 +530,12 @@ int bdrv_commit(BlockDriverState *bs)
ro_cleanup:
qemu_vfree(buf);
- blk_unref(src);
blk_unref(backing);
+ if (backing_file_bs) {
+ bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
+ }
+ bdrv_unref(commit_top_bs);
+ blk_unref(src);
if (ro) {
/* ignoring error return here */
diff --git a/block/crypto.c b/block/crypto.c
index 7cb2ff2..4a20388 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -628,6 +628,7 @@ BlockDriver bdrv_crypto_luks = {
.bdrv_probe = block_crypto_probe_luks,
.bdrv_open = block_crypto_open_luks,
.bdrv_close = block_crypto_close,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = block_crypto_create_luks,
.bdrv_truncate = block_crypto_truncate,
.create_opts = &block_crypto_create_opts_luks,
diff --git a/block/dmg.c b/block/dmg.c
index 8e387cd..a7d25fc 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -697,6 +697,7 @@ static BlockDriver bdrv_dmg = {
.bdrv_probe = dmg_probe,
.bdrv_open = dmg_open,
.bdrv_refresh_limits = dmg_refresh_limits,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_preadv = dmg_co_preadv,
.bdrv_close = dmg_close,
};
diff --git a/block/io.c b/block/io.c
index d5c4544..8f38d46 100644
--- a/block/io.c
+++ b/block/io.c
@@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
}
-static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
{
+ BlockDriverState *bs = child->bs;
+
/* Perform I/O through a temporary buffer so that users who scribble over
* their read buffer while the operation is in progress do not end up
* modifying the image file. This is critical for zero-copy guest I/O
@@ -943,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
size_t skip_bytes;
int ret;
+ assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
@@ -1001,10 +1005,11 @@ err:
* handles copy on read, zeroing after EOF, and fragmentation of large
* reads; any other features must be implemented by the caller.
*/
-static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
+static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
+ BlockDriverState *bs = child->bs;
int64_t total_bytes, max_bytes;
int ret = 0;
uint64_t bytes_remaining = bytes;
@@ -1050,7 +1055,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (!ret || pnum != nb_sectors) {
- ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
+ ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
goto out;
}
}
@@ -1158,7 +1163,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
}
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
- ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
+ ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
tracked_request_end(&req);
@@ -1306,10 +1311,11 @@ fail:
* Forwards an already correctly aligned write request to the BlockDriver,
* after possibly fragmenting it.
*/
-static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
+static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
+ BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
bool waited;
int ret;
@@ -1332,6 +1338,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
assert(!waited || !req->serialising);
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
+ assert(child->perm & BLK_PERM_WRITE);
+ assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
@@ -1397,12 +1405,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
return ret;
}
-static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
int64_t offset,
unsigned int bytes,
BdrvRequestFlags flags,
BdrvTrackedRequest *req)
{
+ BlockDriverState *bs = child->bs;
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
struct iovec iov;
@@ -1430,7 +1439,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
- ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
+ ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1438,7 +1447,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
memset(buf + head_padding_bytes, 0, zero_bytes);
- ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
+ ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
align, &local_qiov,
flags & ~BDRV_REQ_ZERO_WRITE);
if (ret < 0) {
@@ -1452,7 +1461,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
if (bytes >= align) {
/* Write the aligned part in the middle. */
uint64_t aligned_bytes = bytes & ~(align - 1);
- ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
+ ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
NULL, flags);
if (ret < 0) {
goto fail;
@@ -1468,7 +1477,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
- ret = bdrv_aligned_preadv(bs, req, offset, align,
+ ret = bdrv_aligned_preadv(child, req, offset, align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1476,7 +1485,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
memset(buf, 0, bytes);
- ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
+ ret = bdrv_aligned_pwritev(child, req, offset, align, align,
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
}
fail:
@@ -1523,7 +1532,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
if (!qiov) {
- ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
+ ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
goto out;
}
@@ -1542,7 +1551,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
qemu_iovec_init_external(&head_qiov, &head_iov, 1);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
- ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
+ ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
align, &head_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1584,8 +1593,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
- ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
- align, &tail_qiov, 0);
+ ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
+ align, align, &tail_qiov, 0);
if (ret < 0) {
goto fail;
}
@@ -1603,7 +1612,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
bytes = ROUND_UP(bytes, align);
}
- ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
+ ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
diff --git a/block/mirror.c b/block/mirror.c
index 1b34b36..57f26c3 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -38,7 +38,10 @@ typedef struct MirrorBlockJob {
BlockJob common;
RateLimit limit;
BlockBackend *target;
+ BlockDriverState *mirror_top_bs;
+ BlockDriverState *source;
BlockDriverState *base;
+
/* The name of the graph node to replace */
char *replaces;
/* The BDS to replace */
@@ -327,7 +330,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
- BlockDriverState *source = blk_bs(s->common.blk);
+ BlockDriverState *source = s->source;
int64_t sector_num, first_chunk;
uint64_t delay_ns = 0;
/* At least the first dirty chunk is mirrored in one iteration. */
@@ -497,12 +500,30 @@ static void mirror_exit(BlockJob *job, void *opaque)
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
MirrorExitData *data = opaque;
AioContext *replace_aio_context = NULL;
- BlockDriverState *src = blk_bs(s->common.blk);
+ BlockDriverState *src = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
+ BlockDriverState *mirror_top_bs = s->mirror_top_bs;
+ Error *local_err = NULL;
/* Make sure that the source BDS doesn't go away before we called
* block_job_completed(). */
bdrv_ref(src);
+ bdrv_ref(mirror_top_bs);
+
+ /* We don't access the source any more. Dropping any WRITE/RESIZE is
+ * required before it could become a backing file of target_bs. */
+ bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
+ &error_abort);
+ if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+ BlockDriverState *backing = s->is_none_mode ? src : s->base;
+ if (backing_bs(target_bs) != backing) {
+ bdrv_set_backing_hd(target_bs, backing, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ data->ret = -EPERM;
+ }
+ }
+ }
if (s->to_replace) {
replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -524,10 +545,6 @@ static void mirror_exit(BlockJob *job, void *opaque)
bdrv_drained_begin(target_bs);
bdrv_replace_in_backing_chain(to_replace, target_bs);
bdrv_drained_end(target_bs);
-
- /* We just changed the BDS the job BB refers to */
- blk_remove_bs(job->blk);
- blk_insert_bs(job->blk, src);
}
if (s->to_replace) {
bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -540,9 +557,26 @@ static void mirror_exit(BlockJob *job, void *opaque)
g_free(s->replaces);
blk_unref(s->target);
s->target = NULL;
+
+ /* Remove the mirror filter driver from the graph. Before this, get rid of
+ * the blockers on the intermediate nodes so that the resulting state is
+ * valid. */
+ block_job_remove_all_bdrv(job);
+ bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
+
+ /* We just changed the BDS the job BB refers to (with either or both of the
+ * bdrv_replace_in_backing_chain() calls), so switch the BB back so the
+ * cleanup does the right thing. We don't need any permissions any more
+ * now. */
+ blk_remove_bs(job->blk);
+ blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
+ blk_insert_bs(job->blk, mirror_top_bs, &error_abort);
+
block_job_completed(&s->common, data->ret);
+
g_free(data);
bdrv_drained_end(src);
+ bdrv_unref(mirror_top_bs);
bdrv_unref(src);
}
@@ -562,7 +596,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
{
int64_t sector_num, end;
BlockDriverState *base = s->base;
- BlockDriverState *bs = blk_bs(s->common.blk);
+ BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
int ret, n;
@@ -644,7 +678,7 @@ static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
MirrorExitData *data;
- BlockDriverState *bs = blk_bs(s->common.blk);
+ BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
bool need_drain = true;
int64_t length;
@@ -876,9 +910,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
static void mirror_complete(BlockJob *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
- BlockDriverState *src, *target;
+ BlockDriverState *target;
- src = blk_bs(job->blk);
target = blk_bs(s->target);
if (!s->synced) {
@@ -910,6 +943,10 @@ static void mirror_complete(BlockJob *job, Error **errp)
replace_aio_context = bdrv_get_aio_context(s->to_replace);
aio_context_acquire(replace_aio_context);
+ /* TODO Translate this into permission system. Current definition of
+ * GRAPH_MOD would require to request it for the parents; they might
+ * not even be BlockDriverStates, however, so a BdrvChild can't address
+ * them. May need redefinition of GRAPH_MOD. */
error_setg(&s->replace_blocker,
"block device is in use by block-job-complete");
bdrv_op_block_all(s->to_replace, s->replace_blocker);
@@ -918,13 +955,6 @@ static void mirror_complete(BlockJob *job, Error **errp)
aio_context_release(replace_aio_context);
}
- if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
- BlockDriverState *backing = s->is_none_mode ? src : s->base;
- if (backing_bs(target) != backing) {
- bdrv_set_backing_hd(target, backing);
- }
- }
-
s->should_complete = true;
block_job_enter(&s->common);
}
@@ -980,6 +1010,77 @@ static const BlockJobDriver commit_active_job_driver = {
.drain = mirror_drain,
};
+static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
+{
+ return bdrv_co_flush(bs->backing->bs);
+}
+
+static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
+ BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+ BlockDriverState **file)
+{
+ *pnum = nb_sectors;
+ *file = bs->backing->bs;
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+ (sector_num << BDRV_SECTOR_BITS);
+}
+
+static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
+{
+ return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
+{
+ return bdrv_co_pdiscard(bs->backing->bs, offset, count);
+}
+
+static void bdrv_mirror_top_close(BlockDriverState *bs)
+{
+}
+
+static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ /* Must be able to forward guest writes to the real image */
+ *nperm = 0;
+ if (perm & BLK_PERM_WRITE) {
+ *nperm |= BLK_PERM_WRITE;
+ }
+
+ *nshared = BLK_PERM_ALL;
+}
+
+/* Dummy node that provides consistent read to its users without requiring it
+ * from its backing file and that allows writes on the backing file chain. */
+static BlockDriver bdrv_mirror_top = {
+ .format_name = "mirror_top",
+ .bdrv_co_preadv = bdrv_mirror_top_preadv,
+ .bdrv_co_pwritev = bdrv_mirror_top_pwritev,
+ .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes,
+ .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard,
+ .bdrv_co_flush = bdrv_mirror_top_flush,
+ .bdrv_co_get_block_status = bdrv_mirror_top_get_block_status,
+ .bdrv_close = bdrv_mirror_top_close,
+ .bdrv_child_perm = bdrv_mirror_top_child_perm,
+};
+
static void mirror_start_job(const char *job_id, BlockDriverState *bs,
int creation_flags, BlockDriverState *target,
const char *replaces, int64_t speed,
@@ -992,9 +1093,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
void *opaque, Error **errp,
const BlockJobDriver *driver,
bool is_none_mode, BlockDriverState *base,
- bool auto_complete)
+ bool auto_complete, const char *filter_node_name)
{
MirrorBlockJob *s;
+ BlockDriverState *mirror_top_bs;
+ bool target_graph_mod;
+ bool target_is_backing;
+ Error *local_err = NULL;
+ int ret;
if (granularity == 0) {
granularity = bdrv_get_default_bitmap_granularity(target);
@@ -1011,14 +1117,62 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
buf_size = DEFAULT_MIRROR_BUF_SIZE;
}
- s = block_job_create(job_id, driver, bs, speed, creation_flags,
- cb, opaque, errp);
- if (!s) {
+ /* In the case of active commit, add dummy driver to provide consistent
+ * reads on the top, while disabling it in the intermediate nodes, and make
+ * the backing chain writable. */
+ mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name,
+ BDRV_O_RDWR, errp);
+ if (mirror_top_bs == NULL) {
+ return;
+ }
+ mirror_top_bs->total_sectors = bs->total_sectors;
+
+ /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
+ * it alive until block_job_create() even if bs has no parent. */
+ bdrv_ref(mirror_top_bs);
+ bdrv_drained_begin(bs);
+ bdrv_append(mirror_top_bs, bs, &local_err);
+ bdrv_drained_end(bs);
+
+ if (local_err) {
+ bdrv_unref(mirror_top_bs);
+ error_propagate(errp, local_err);
return;
}
- s->target = blk_new();
- blk_insert_bs(s->target, target);
+ /* Make sure that the source is not resized while the job is running */
+ s = block_job_create(job_id, driver, mirror_top_bs,
+ BLK_PERM_CONSISTENT_READ,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
+ creation_flags, cb, opaque, errp);
+ bdrv_unref(mirror_top_bs);
+ if (!s) {
+ goto fail;
+ }
+ s->source = bs;
+ s->mirror_top_bs = mirror_top_bs;
+
+ /* No resize for the target either; while the mirror is still running, a
+ * consistent read isn't necessarily possible. We could possibly allow
+ * writes and graph modifications, though it would likely defeat the
+ * purpose of a mirror, so leave them blocked for now.
+ *
+ * In the case of active commit, things look a bit different, though,
+ * because the target is an already populated backing file in active use.
+ * We can allow anything except resize there.*/
+ target_is_backing = bdrv_chain_contains(bs, target);
+ target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
+ s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE |
+ (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
+ BLK_PERM_WRITE_UNCHANGED |
+ (target_is_backing ? BLK_PERM_CONSISTENT_READ |
+ BLK_PERM_WRITE |
+ BLK_PERM_GRAPH_MOD : 0));
+ ret = blk_insert_bs(s->target, target, errp);
+ if (ret < 0) {
+ goto fail;
+ }
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
@@ -1041,18 +1195,40 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
return;
}
- block_job_add_bdrv(&s->common, target);
+ /* Required permissions are already taken with blk_new() */
+ block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL,
+ &error_abort);
+
/* In commit_active_start() all intermediate nodes disappear, so
* any jobs in them must be blocked */
- if (bdrv_chain_contains(bs, target)) {
+ if (target_is_backing) {
BlockDriverState *iter;
for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
- block_job_add_bdrv(&s->common, iter);
+ /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
+ * ourselves at s->base (if writes are blocked for a node, they are
+ * also blocked for its backing file). The other options would be a
+ * second filter driver above s->base (== target). */
+ ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+ BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
+ errp);
+ if (ret < 0) {
+ goto fail;
+ }
}
}
trace_mirror_start(bs, s, opaque);
block_job_start(&s->common);
+ return;
+
+fail:
+ if (s) {
+ g_free(s->replaces);
+ blk_unref(s->target);
+ block_job_unref(&s->common);
+ }
+
+ bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
}
void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1061,7 +1237,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
- bool unmap, Error **errp)
+ bool unmap, const char *filter_node_name, Error **errp)
{
bool is_none_mode;
BlockDriverState *base;
@@ -1075,12 +1251,14 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
speed, granularity, buf_size, backing_mode,
on_source_error, on_target_error, unmap, NULL, NULL, errp,
- &mirror_job_driver, is_none_mode, base, false);
+ &mirror_job_driver, is_none_mode, base, false,
+ filter_node_name);
}
void commit_active_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, int creation_flags,
int64_t speed, BlockdevOnError on_error,
+ const char *filter_node_name,
BlockCompletionFunc *cb, void *opaque, Error **errp,
bool auto_complete)
{
@@ -1096,7 +1274,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN,
on_error, on_error, true, cb, opaque, &local_err,
- &commit_active_job_driver, false, base, auto_complete);
+ &commit_active_job_driver, false, base, auto_complete,
+ filter_node_name);
if (local_err) {
error_propagate(errp, local_err);
goto error_restore_flags;
diff --git a/block/parallels.c b/block/parallels.c
index b2ec09f..19935e2 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -488,7 +488,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
}
file = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (file == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -762,6 +763,7 @@ static BlockDriver bdrv_parallels = {
.bdrv_probe = parallels_probe,
.bdrv_open = parallels_open,
.bdrv_close = parallels_close,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_get_block_status = parallels_co_get_block_status,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_flush_to_os = parallels_co_flush_to_os,
diff --git a/block/qcow.c b/block/qcow.c
index 038b05a..9d6ac83 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -823,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
}
qcow_blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (qcow_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1052,6 +1053,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_probe = qcow_probe,
.bdrv_open = qcow_open,
.bdrv_close = qcow_close,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_reopen_prepare = qcow_reopen_prepare,
.bdrv_create = qcow_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
diff --git a/block/qcow2.c b/block/qcow2.c
index 21e6142..6a92d2e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2202,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -2266,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow2"));
blk = blk_new_open(filename, NULL, options,
- BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -3113,6 +3115,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
uint64_t cluster_size = s->cluster_size;
bool encrypt;
int refcount_bits = s->refcount_bits;
+ Error *local_err = NULL;
int ret;
QemuOptDesc *desc = opts->list->desc;
Qcow2AmendHelperCBInfo helper_cb_info;
@@ -3262,11 +3265,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
}
if (new_size) {
- BlockBackend *blk = blk_new();
- blk_insert_bs(blk, bs);
+ BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+ ret = blk_insert_bs(blk, bs, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ blk_unref(blk);
+ return ret;
+ }
+
ret = blk_truncate(blk, new_size);
blk_unref(blk);
-
if (ret < 0) {
return ret;
}
@@ -3403,6 +3411,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_reopen_commit = qcow2_reopen_commit,
.bdrv_reopen_abort = qcow2_reopen_abort,
.bdrv_join_options = qcow2_join_options,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = qcow2_co_get_block_status,
diff --git a/block/qed.c b/block/qed.c
index 62a0a09..5ec7fd8 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -625,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size,
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -1704,6 +1705,7 @@ static BlockDriver bdrv_qed = {
.bdrv_open = bdrv_qed_open,
.bdrv_close = bdrv_qed_close,
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = bdrv_qed_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
diff --git a/block/quorum.c b/block/quorum.c
index 86e2072..40205fb 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
/* We can safely add the child now */
bdrv_ref(child_bs);
- child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
+
+ child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
+ if (child == NULL) {
+ s->next_child_index--;
+ bdrv_unref(child_bs);
+ goto out;
+ }
s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
s->children[s->num_children++] = child;
+out:
bdrv_drained_end(bs);
}
@@ -1126,6 +1133,8 @@ static BlockDriver bdrv_quorum = {
.bdrv_add_child = quorum_add_child,
.bdrv_del_child = quorum_del_child,
+ .bdrv_child_perm = bdrv_filter_default_perms,
+
.is_filter = true,
.bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
};
diff --git a/block/raw-format.c b/block/raw-format.c
index ce34d1b..86fbc65 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -467,6 +467,7 @@ BlockDriver bdrv_raw = {
.bdrv_reopen_abort = &raw_reopen_abort,
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
+ .bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_create = &raw_create,
.bdrv_co_preadv = &raw_co_preadv,
.bdrv_co_pwritev = &raw_co_pwritev,
diff --git a/block/replication.c b/block/replication.c
index eff85c7..22f170f 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -644,7 +644,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
s->replication_state = BLOCK_REPLICATION_FAILOVER;
commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
- replication_done, bs, errp, true);
+ NULL, replication_done, bs, errp, true);
break;
default:
aio_context_release(aio_context);
@@ -660,6 +660,7 @@ BlockDriver bdrv_replication = {
.bdrv_open = replication_open,
.bdrv_close = replication_close,
+ .bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = replication_getlength,
.bdrv_co_readv = replication_co_readv,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 860ba61..7434710 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1609,7 +1609,7 @@ static int sd_prealloc(const char *filename, Error **errp)
int ret;
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
if (blk == NULL) {
ret = -EIO;
goto out_with_err_set;
diff --git a/block/stream.c b/block/stream.c
index 1523ba7..0113710 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque)
StreamCompleteData *data = opaque;
BlockDriverState *bs = blk_bs(job->blk);
BlockDriverState *base = s->base;
+ Error *local_err = NULL;
if (!block_job_is_cancelled(&s->common) && data->reached_end &&
data->ret == 0) {
@@ -79,11 +80,19 @@ static void stream_complete(BlockJob *job, void *opaque)
}
}
data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
- bdrv_set_backing_hd(bs, base);
+ bdrv_set_backing_hd(bs, base, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ data->ret = -EPERM;
+ goto out;
+ }
}
+out:
/* Reopen the image back in read-only mode if necessary */
if (s->bs_flags != bdrv_get_flags(bs)) {
+ /* Give up write permissions before making it read-only */
+ blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
bdrv_reopen(bs, s->bs_flags, NULL);
}
@@ -229,25 +238,35 @@ void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *iter;
int orig_bs_flags;
- s = block_job_create(job_id, &stream_job_driver, bs, speed,
- BLOCK_JOB_DEFAULT, NULL, NULL, errp);
- if (!s) {
- return;
- }
-
/* Make sure that the image is opened in read-write mode */
orig_bs_flags = bdrv_get_flags(bs);
if (!(orig_bs_flags & BDRV_O_RDWR)) {
if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
- block_job_unref(&s->common);
return;
}
}
- /* Block all intermediate nodes between bs and base, because they
- * will disappear from the chain after this operation */
+ /* Prevent concurrent jobs trying to modify the graph structure here, we
+ * already have our own plans. Also don't allow resize as the image size is
+ * queried only at the job start and then cached. */
+ s = block_job_create(job_id, &stream_job_driver, bs,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_GRAPH_MOD,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_WRITE,
+ speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+ if (!s) {
+ goto fail;
+ }
+
+ /* Block all intermediate nodes between bs and base, because they will
+ * disappear from the chain after this operation. The streaming job reads
+ * every block only once, assuming that it doesn't change, so block writes
+ * and resizes. */
for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
- block_job_add_bdrv(&s->common, iter);
+ block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
+ &error_abort);
}
s->base = base;
@@ -257,4 +276,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
s->on_error = on_error;
trace_stream_start(bs, base, s);
block_job_start(&s->common);
+ return;
+
+fail:
+ if (orig_bs_flags != bdrv_get_flags(bs)) {
+ bdrv_reopen(bs, s->bs_flags, NULL);
+ }
}
diff --git a/block/vdi.c b/block/vdi.c
index 18b4773..9b4f70e 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -763,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -891,6 +892,7 @@ static BlockDriver bdrv_vdi = {
.bdrv_open = vdi_open,
.bdrv_close = vdi_close,
.bdrv_reopen_prepare = vdi_reopen_prepare,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = vdi_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = vdi_co_get_block_status,
diff --git a/block/vhdx.c b/block/vhdx.c
index 9918ee9..052a753 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1859,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1983,6 +1984,7 @@ static BlockDriver bdrv_vhdx = {
.bdrv_open = vhdx_open,
.bdrv_close = vhdx_close,
.bdrv_reopen_prepare = vhdx_reopen_prepare,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_readv = vhdx_co_readv,
.bdrv_co_writev = vhdx_co_writev,
.bdrv_create = vhdx_create,
diff --git a/block/vmdk.c b/block/vmdk.c
index 9d68ec5..a9bd22b 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1703,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -2071,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
}
new_blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (new_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -2359,6 +2361,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_open = vmdk_open,
.bdrv_check = vmdk_check,
.bdrv_reopen_prepare = vmdk_reopen_prepare,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_preadv = vmdk_co_preadv,
.bdrv_co_pwritev = vmdk_co_pwritev,
.bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed,
diff --git a/block/vpc.c b/block/vpc.c
index d0df2a1..f591d4b 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -915,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1067,6 +1068,7 @@ static BlockDriver bdrv_vpc = {
.bdrv_open = vpc_open,
.bdrv_close = vpc_close,
.bdrv_reopen_prepare = vpc_reopen_prepare,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = vpc_create,
.bdrv_co_preadv = vpc_co_preadv,
diff --git a/block/vvfat.c b/block/vvfat.c
index 7f230be..aa61c32 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -3041,7 +3041,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
&error_abort);
*(void**) backing->opaque = s;
- bdrv_set_backing_hd(s->bs, backing);
+ bdrv_set_backing_hd(s->bs, backing, &error_abort);
bdrv_unref(backing);
return 0;
@@ -3052,6 +3052,27 @@ err:
return ret;
}
+static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ BDRVVVFATState *s = bs->opaque;
+
+ assert(c == s->qcow || role == &child_backing);
+
+ if (c == s->qcow) {
+ /* This is a private node, nobody should try to attach to it */
+ *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+ *nshared = BLK_PERM_WRITE_UNCHANGED;
+ } else {
+ /* The backing file is there so 'commit' can use it. vvfat doesn't
+ * access it in any way. */
+ *nperm = 0;
+ *nshared = BLK_PERM_ALL;
+ }
+}
+
static void vvfat_close(BlockDriverState *bs)
{
BDRVVVFATState *s = bs->opaque;
@@ -3077,6 +3098,7 @@ static BlockDriver bdrv_vvfat = {
.bdrv_file_open = vvfat_open,
.bdrv_refresh_limits = vvfat_refresh_limits,
.bdrv_close = vvfat_close,
+ .bdrv_child_perm = vvfat_child_perm,
.bdrv_co_preadv = vvfat_co_preadv,
.bdrv_co_pwritev = vvfat_co_pwritev,
diff --git a/blockdev.c b/blockdev.c
index 8682bd8..8eb4e84 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -558,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
if ((!file || !*file) && !qdict_size(bs_opts)) {
BlockBackendRootState *blk_rs;
- blk = blk_new();
+ blk = blk_new(0, BLK_PERM_ALL);
blk_rs = blk_get_root_state(blk);
blk_rs->open_flags = bdrv_flags;
blk_rs->read_only = read_only;
@@ -1768,6 +1768,17 @@ static void external_snapshot_prepare(BlkActionState *common,
if (!state->new_bs->drv->supports_backing) {
error_setg(errp, "The snapshot does not support backing images");
+ return;
+ }
+
+ /* This removes our old bs and adds the new bs. This is an operation that
+ * can fail, so we need to do it in .prepare; undoing it for abort is
+ * always possible. */
+ bdrv_ref(state->new_bs);
+ bdrv_append(state->new_bs, state->old_bs, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
}
}
@@ -1778,8 +1789,6 @@ static void external_snapshot_commit(BlkActionState *common)
bdrv_set_aio_context(state->new_bs, state->aio_context);
- /* This removes our old bs and adds the new bs */
- bdrv_append(state->new_bs, state->old_bs);
/* We don't need (or want) to use the transactional
* bdrv_reopen_multiple() across all the entries at once, because we
* don't want to abort all of them if one of them fails the reopen */
@@ -1794,7 +1803,9 @@ static void external_snapshot_abort(BlkActionState *common)
ExternalSnapshotState *state =
DO_UPCAST(ExternalSnapshotState, common, common);
if (state->new_bs) {
- bdrv_unref(state->new_bs);
+ if (state->new_bs->backing) {
+ bdrv_replace_in_backing_chain(state->new_bs, state->old_bs);
+ }
}
}
@@ -1805,6 +1816,7 @@ static void external_snapshot_clean(BlkActionState *common)
if (state->aio_context) {
bdrv_drained_end(state->old_bs);
aio_context_release(state->aio_context);
+ bdrv_unref(state->new_bs);
}
}
@@ -2311,7 +2323,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id,
}
if (!locked || force) {
- blk_dev_change_media_cb(blk, false);
+ blk_dev_change_media_cb(blk, false, &error_abort);
}
if (locked && !force) {
@@ -2349,6 +2361,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
Error **errp)
{
BlockBackend *blk;
+ Error *local_err = NULL;
device = has_device ? device : NULL;
id = has_id ? id : NULL;
@@ -2372,7 +2385,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
return;
}
- blk_dev_change_media_cb(blk, true);
+ blk_dev_change_media_cb(blk, true, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
}
void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
@@ -2425,7 +2442,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
* called at all); therefore, the medium needs to be ejected here.
* Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load
* value passed here (i.e. false). */
- blk_dev_change_media_cb(blk, false);
+ blk_dev_change_media_cb(blk, false, &error_abort);
}
out:
@@ -2435,7 +2452,9 @@ out:
static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
BlockDriverState *bs, Error **errp)
{
+ Error *local_err = NULL;
bool has_device;
+ int ret;
/* For BBs without a device, we can exchange the BDS tree at will */
has_device = blk_get_attached_dev(blk);
@@ -2455,7 +2474,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
return;
}
- blk_insert_bs(blk, bs);
+ ret = blk_insert_bs(blk, bs, errp);
+ if (ret < 0) {
+ return;
+ }
if (!blk_dev_has_tray(blk)) {
/* For tray-less devices, blockdev-close-tray is a no-op (or may not be
@@ -2463,7 +2485,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
* slot here.
* Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load
* value passed here (i.e. true). */
- blk_dev_change_media_cb(blk, true);
+ blk_dev_change_media_cb(blk, true, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ blk_remove_bs(blk);
+ return;
+ }
}
}
@@ -2890,8 +2917,11 @@ void qmp_block_resize(bool has_device, const char *device,
goto out;
}
- blk = blk_new();
- blk_insert_bs(blk, bs);
+ blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+ ret = blk_insert_bs(blk, bs, errp);
+ if (ret < 0) {
+ goto out;
+ }
/* complete all in-flight operations before resizing the device */
bdrv_drain_all();
@@ -3014,6 +3044,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
bool has_top, const char *top,
bool has_backing_file, const char *backing_file,
bool has_speed, int64_t speed,
+ bool has_filter_node_name, const char *filter_node_name,
Error **errp)
{
BlockDriverState *bs;
@@ -3029,6 +3060,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
if (!has_speed) {
speed = 0;
}
+ if (!has_filter_node_name) {
+ filter_node_name = NULL;
+ }
/* Important Note:
* libvirt relies on the DeviceNotFound error class in order to probe for
@@ -3103,8 +3137,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
goto out;
}
commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
- BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL,
- &local_err, false);
+ BLOCK_JOB_DEFAULT, speed, on_error,
+ filter_node_name, NULL, NULL, &local_err, false);
} else {
BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
@@ -3112,7 +3146,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
}
commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
on_error, has_backing_file ? backing_file : NULL,
- &local_err);
+ filter_node_name, &local_err);
}
if (local_err != NULL) {
error_propagate(errp, local_err);
@@ -3348,6 +3382,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
bool has_on_target_error,
BlockdevOnError on_target_error,
bool has_unmap, bool unmap,
+ bool has_filter_node_name,
+ const char *filter_node_name,
Error **errp)
{
@@ -3369,6 +3405,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_unmap) {
unmap = true;
}
+ if (!has_filter_node_name) {
+ filter_node_name = NULL;
+ }
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -3398,7 +3437,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
mirror_start(job_id, bs, target,
has_replaces ? replaces : NULL,
speed, granularity, buf_size, sync, backing_mode,
- on_source_error, on_target_error, unmap, errp);
+ on_source_error, on_target_error, unmap, filter_node_name,
+ errp);
}
void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3536,6 +3576,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
arg->has_on_source_error, arg->on_source_error,
arg->has_on_target_error, arg->on_target_error,
arg->has_unmap, arg->unmap,
+ false, NULL,
&local_err);
bdrv_unref(target_bs);
error_propagate(errp, local_err);
@@ -3554,6 +3595,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
BlockdevOnError on_source_error,
bool has_on_target_error,
BlockdevOnError on_target_error,
+ bool has_filter_node_name,
+ const char *filter_node_name,
Error **errp)
{
BlockDriverState *bs;
@@ -3585,6 +3628,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
has_on_source_error, on_source_error,
has_on_target_error, on_target_error,
true, true,
+ has_filter_node_name, filter_node_name,
&local_err);
error_propagate(errp, local_err);
diff --git a/blockjob.c b/blockjob.c
index abee11b..69126af 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -55,6 +55,19 @@ struct BlockJobTxn {
static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
+static char *child_job_get_parent_desc(BdrvChild *c)
+{
+ BlockJob *job = c->opaque;
+ return g_strdup_printf("%s job '%s'",
+ BlockJobType_lookup[job->driver->job_type],
+ job->id);
+}
+
+static const BdrvChildRole child_job = {
+ .get_parent_desc = child_job_get_parent_desc,
+ .stay_at_node = true,
+};
+
BlockJob *block_job_next(BlockJob *job)
{
if (!job) {
@@ -115,19 +128,44 @@ static void block_job_detach_aio_context(void *opaque)
block_job_unref(job);
}
-void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
+void block_job_remove_all_bdrv(BlockJob *job)
+{
+ GSList *l;
+ for (l = job->nodes; l; l = l->next) {
+ BdrvChild *c = l->data;
+ bdrv_op_unblock_all(c->bs, job->blocker);
+ bdrv_root_unref_child(c);
+ }
+ g_slist_free(job->nodes);
+ job->nodes = NULL;
+}
+
+int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
+ uint64_t perm, uint64_t shared_perm, Error **errp)
{
- job->nodes = g_slist_prepend(job->nodes, bs);
+ BdrvChild *c;
+
+ c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
+ job, errp);
+ if (c == NULL) {
+ return -EPERM;
+ }
+
+ job->nodes = g_slist_prepend(job->nodes, c);
bdrv_ref(bs);
bdrv_op_block_all(bs, job->blocker);
+
+ return 0;
}
void *block_job_create(const char *job_id, const BlockJobDriver *driver,
- BlockDriverState *bs, int64_t speed, int flags,
+ BlockDriverState *bs, uint64_t perm,
+ uint64_t shared_perm, int64_t speed, int flags,
BlockCompletionFunc *cb, void *opaque, Error **errp)
{
BlockBackend *blk;
BlockJob *job;
+ int ret;
if (bs->job) {
error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
@@ -159,13 +197,17 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
}
}
- blk = blk_new();
- blk_insert_bs(blk, bs);
+ blk = blk_new(perm, shared_perm);
+ ret = blk_insert_bs(blk, bs, errp);
+ if (ret < 0) {
+ blk_unref(blk);
+ return NULL;
+ }
job = g_malloc0(driver->instance_size);
error_setg(&job->blocker, "block device is in use by block job: %s",
BlockJobType_lookup[driver->job_type]);
- block_job_add_bdrv(job, bs);
+ block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
job->driver = driver;
@@ -228,15 +270,9 @@ void block_job_ref(BlockJob *job)
void block_job_unref(BlockJob *job)
{
if (--job->refcnt == 0) {
- GSList *l;
BlockDriverState *bs = blk_bs(job->blk);
bs->job = NULL;
- for (l = job->nodes; l; l = l->next) {
- bs = l->data;
- bdrv_op_unblock_all(bs, job->blocker);
- bdrv_unref(bs);
- }
- g_slist_free(job->nodes);
+ block_job_remove_all_bdrv(job);
blk_remove_aio_context_notifier(job->blk,
block_job_attached_aio_context,
block_job_detach_aio_context, job);
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index fdf4089..1e3bd2b 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -42,6 +42,8 @@ CONFIG_ARM11MPCORE=y
CONFIG_A9MPCORE=y
CONFIG_A15MPCORE=y
+CONFIG_ARM_V7M=y
+
CONFIG_ARM_GIC=y
CONFIG_ARM_GIC_KVM=$(CONFIG_KVM)
CONFIG_ARM_TIMER=y
diff --git a/docs/mach-virt-graphical.cfg b/docs/mach-virt-graphical.cfg
new file mode 100644
index 0000000..0fdf684
--- /dev/null
+++ b/docs/mach-virt-graphical.cfg
@@ -0,0 +1,281 @@
+# mach-virt - VirtIO guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+# $ qemu-system-aarch64 \
+# -nodefaults \
+# -readconfig mach-virt-graphical.cfg \
+# -cpu host
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals,
+# such as the PL011 UART, plus a PCI Express Root Bus; the
+# user will then have to explicitly add further devices.
+#
+# The PCI Express Root Bus shows up in the guest as:
+#
+# 00:00.0 Host bridge
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+# 00:01.0 Display controller
+# 00.1c.* PCI bridge (PCI Express Root Ports)
+# 01:00.0 SCSI storage controller
+# 02:00.0 Ethernet controller
+# 03:00.0 USB controller
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the virt machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line, but we can configure the guest to use the
+# same GIC version as the host.
+
+[machine]
+ type = "virt"
+ accel = "kvm"
+ gic-version = "host"
+
+[memory]
+ size = "1024"
+
+
+# Firmware configuration
+# =========================================================
+#
+# There are two parts to the firmware: a read-only image
+# containing the executable code, which is shared between
+# guests, and a read/write variable store that is owned
+# by one specific guest, exclusively, and is used to
+# record information such as the UEFI boot order.
+#
+# For any new guest, its permanent, private variable store
+# should initially be copied from the template file
+# provided along with the firmware binary.
+#
+# Depending on the OS distribution you're using on the
+# host, the name of the package containing the firmware
+# binary and variable store template, as well as the paths
+# to the files themselves, will be different. For example:
+#
+# Fedora
+# edk2-aarch64 (pkg)
+# /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw (bin)
+# /usr/share/edk2/aarch64/vars-template-pflash.raw (var)
+#
+# RHEL
+# AAVMF (pkg)
+# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
+# /usr/share/AAVMF/AAVMF_VARS.fd (var)
+#
+# Debian/Ubuntu
+# qemu-efi (pkg)
+# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
+# /usr/share/AAVMF/AAVMF_VARS.fd (var)
+
+[drive "uefi-binary"]
+ file = "/usr/share/AAVMF/AAVMF_CODE.fd" # CHANGE ME
+ format = "raw"
+ if = "pflash"
+ unit = "0"
+ readonly = "on"
+
+[drive "uefi-varstore"]
+ file = "guest_VARS.fd" # CHANGE ME
+ format = "raw"
+ if = "pflash"
+ unit = "1"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.0"
+ port = "1"
+ chassis = "1"
+ multifunction = "on"
+
+[device "pcie.2"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.1"
+ port = "2"
+ chassis = "2"
+
+[device "pcie.3"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.2"
+ port = "3"
+ chassis = "3"
+
+[device "pcie.4"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.3"
+ port = "4"
+ chassis = "4"
+
+[device "pcie.5"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.4"
+ port = "5"
+ chassis = "5"
+
+[device "pcie.6"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.5"
+ port = "6"
+ chassis = "6"
+
+[device "pcie.7"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.6"
+ port = "7"
+ chassis = "7"
+
+[device "pcie.8"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.7"
+ port = "8"
+ chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+ driver = "virtio-scsi-pci"
+ bus = "pcie.1"
+ addr = "00.0"
+
+[device "scsi-disk"]
+ driver = "scsi-hd"
+ bus = "scsi.0"
+ drive = "disk"
+ bootindex = "1"
+
+[drive "disk"]
+ file = "guest.qcow2" # CHANGE ME
+ format = "qcow2"
+ if = "none"
+
+[device "scsi-optical-disk"]
+ driver = "scsi-cd"
+ bus = "scsi.0"
+ drive = "optical-disk"
+ bootindex = "2"
+
+[drive "optical-disk"]
+ file = "install.iso" # CHANGE ME
+ format = "raw"
+ if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+ type = "user"
+
+[device "net"]
+ driver = "virtio-net-pci"
+ netdev = "hostnet"
+ bus = "pcie.2"
+ addr = "00.0"
+
+
+# USB controller (and input devices)
+# =========================================================
+#
+# We add a virtualization-friendly USB 3.0 controller and
+# a USB keyboard / USB tablet combo so that graphical
+# guests can be controlled appropriately.
+
+[device "usb"]
+ driver = "nec-usb-xhci"
+ bus = "pcie.3"
+ addr = "00.0"
+
+[device "keyboard"]
+ driver = "usb-kbd"
+ bus = "usb.0"
+
+[device "tablet"]
+ driver = "usb-tablet"
+ bus = "usb.0"
+
+
+# Display controller
+# =========================================================
+#
+# We use virtio-gpu because the legacy VGA framebuffer is
+# very troublesome on aarch64, and virtio-gpu is the only
+# video device that doesn't implement it.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+# -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+ driver = "virtio-gpu"
+ bus = "pcie.0"
+ addr = "01.0"
diff --git a/docs/mach-virt-serial.cfg b/docs/mach-virt-serial.cfg
new file mode 100644
index 0000000..aee9f1c
--- /dev/null
+++ b/docs/mach-virt-serial.cfg
@@ -0,0 +1,243 @@
+# mach-virt - VirtIO guest (serial console)
+# =========================================================
+#
+# Usage:
+#
+# $ qemu-system-aarch64 \
+# -nodefaults \
+# -readconfig mach-virt-serial.cfg \
+# -display none -serial mon:stdio \
+# -cpu host
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through the serial console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals,
+# such as the PL011 UART, plus a PCI Express Root Bus; the
+# user will then have to explicitly add further devices.
+#
+# The PCI Express Root Bus shows up in the guest as:
+#
+# 00:00.0 Host bridge
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+# 00.1c.* PCI bridge (PCI Express Root Ports)
+# 01:00.0 SCSI storage controller
+# 02:00.0 Ethernet controller
+#
+# More information about these devices is available below.
+#
+# We use '-display none' to prevent QEMU from creating a
+# graphical display window, which would serve no use in
+# this specific configuration, and '-serial mon:stdio' to
+# multiplex the guest's serial console and the QEMU monitor
+# to the host's stdio; use 'Ctrl+A h' to learn how to
+# switch between the two and more.
+
+
+# Machine options
+# =========================================================
+#
+# We use the virt machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line, but we can configure the guest to use the
+# same GIC version as the host.
+
+[machine]
+ type = "virt"
+ accel = "kvm"
+ gic-version = "host"
+
+[memory]
+ size = "1024"
+
+
+# Firmware configuration
+# =========================================================
+#
+# There are two parts to the firmware: a read-only image
+# containing the executable code, which is shared between
+# guests, and a read/write variable store that is owned
+# by one specific guest, exclusively, and is used to
+# record information such as the UEFI boot order.
+#
+# For any new guest, its permanent, private variable store
+# should initially be copied from the template file
+# provided along with the firmware binary.
+#
+# Depending on the OS distribution you're using on the
+# host, the name of the package containing the firmware
+# binary and variable store template, as well as the paths
+# to the files themselves, will be different. For example:
+#
+# Fedora
+# edk2-aarch64 (pkg)
+# /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw (bin)
+# /usr/share/edk2/aarch64/vars-template-pflash.raw (var)
+#
+# RHEL
+# AAVMF (pkg)
+# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
+# /usr/share/AAVMF/AAVMF_VARS.fd (var)
+#
+# Debian/Ubuntu
+# qemu-efi (pkg)
+# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
+# /usr/share/AAVMF/AAVMF_VARS.fd (var)
+
+[drive "uefi-binary"]
+ file = "/usr/share/AAVMF/AAVMF_CODE.fd" # CHANGE ME
+ format = "raw"
+ if = "pflash"
+ unit = "0"
+ readonly = "on"
+
+[drive "uefi-varstore"]
+ file = "guest_VARS.fd" # CHANGE ME
+ format = "raw"
+ if = "pflash"
+ unit = "1"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.0"
+ port = "1"
+ chassis = "1"
+ multifunction = "on"
+
+[device "pcie.2"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.1"
+ port = "2"
+ chassis = "2"
+
+[device "pcie.3"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.2"
+ port = "3"
+ chassis = "3"
+
+[device "pcie.4"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.3"
+ port = "4"
+ chassis = "4"
+
+[device "pcie.5"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.4"
+ port = "5"
+ chassis = "5"
+
+[device "pcie.6"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.5"
+ port = "6"
+ chassis = "6"
+
+[device "pcie.7"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.6"
+ port = "7"
+ chassis = "7"
+
+[device "pcie.8"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.7"
+ port = "8"
+ chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+ driver = "virtio-scsi-pci"
+ bus = "pcie.1"
+ addr = "00.0"
+
+[device "scsi-disk"]
+ driver = "scsi-hd"
+ bus = "scsi.0"
+ drive = "disk"
+ bootindex = "1"
+
+[drive "disk"]
+ file = "guest.qcow2" # CHANGE ME
+ format = "qcow2"
+ if = "none"
+
+[device "scsi-optical-disk"]
+ driver = "scsi-cd"
+ bus = "scsi.0"
+ drive = "optical-disk"
+ bootindex = "2"
+
+[drive "optical-disk"]
+ file = "install.iso" # CHANGE ME
+ format = "raw"
+ if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+ type = "user"
+
+[device "net"]
+ driver = "virtio-net-pci"
+ netdev = "hostnet"
+ bus = "pcie.2"
+ addr = "00.0"
diff --git a/docs/migration.txt b/docs/migration.txt
index 6503c17..1b940a8 100644
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -161,6 +161,11 @@ include/hw/hw.h.
=== More about versions ===
+Version numbers are intended for major incompatible changes to the
+migration of a device, and using them breaks backwards-migration
+compatibility; in general most changes can be made by adding Subsections
+(see below) or _TEST macros (see below) which won't break compatibility.
+
You can see that there are several version fields:
- version_id: the maximum version_id supported by VMState for that device.
@@ -175,6 +180,9 @@ version_id. And the function load_state_old() (if present) is able to
load state from minimum_version_id_old to minimum_version_id. This
function is deprecated and will be removed when no more users are left.
+Saving state will always create a section with the 'version_id' value
+and thus can't be loaded by any older QEMU.
+
=== Massaging functions ===
Sometimes, it is not enough to be able to save the state directly
@@ -292,6 +300,56 @@ save/send this state when we are in the middle of a pio operation
not enabled, the values on that fields are garbage and don't need to
be sent.
+Using a condition function that checks a 'property' to determine whether
+to send a subsection allows backwards migration compatibility when
+new subsections are added.
+
+For example;
+ a) Add a new property using DEFINE_PROP_BOOL - e.g. support-foo and
+ default it to true.
+ b) Add an entry to the HW_COMPAT_ for the previous version
+ that sets the property to false.
+ c) Add a static bool support_foo function that tests the property.
+ d) Add a subsection with a .needed set to the support_foo function
+ e) (potentially) Add a pre_load that sets up a default value for 'foo'
+ to be used if the subsection isn't loaded.
+
+Now that subsection will not be generated when using an older
+machine type and the migration stream will be accepted by older
+QEMU versions. pre-load functions can be used to initialise state
+on the newer version so that they default to suitable values
+when loading streams created by older QEMU versions that do not
+generate the subsection.
+
+In some cases subsections are added for data that had been accidentally
+omitted by earlier versions; if the missing data causes the migration
+process to succeed but the guest to behave badly then it may be better
+to send the subsection and cause the migration to explicitly fail
+with the unknown subsection error. If the bad behaviour only happens
+with certain data values, making the subsection conditional on
+the data value (rather than the machine type) allows migrations to succeed
+in most cases. In general the preference is to tie the subsection to
+the machine type, and allow reliable migrations, unless the behaviour
+from omission of the subsection is really bad.
+
+= Not sending existing elements =
+
+Sometimes members of the VMState are no longer needed;
+ removing them will break migration compatibility
+ making them version dependent and bumping the version will break backwards
+ migration compatibility.
+
+The best way is to:
+ a) Add a new property/compatibility/function in the same way for subsections
+ above.
+ b) replace the VMSTATE macro with the _TEST version of the macro, e.g.:
+ VMSTATE_UINT32(foo, barstruct)
+ becomes
+ VMSTATE_UINT32_TEST(foo, barstruct, pre_version_baz)
+
+ Sometime in the future when we no longer care about the ancient
+versions these can be killed off.
+
= Return path =
In most migration scenarios there is only a single data path that runs
@@ -482,3 +540,16 @@ request for a page that has already been sent is ignored. Duplicate requests
such as this can happen as a page is sent at about the same time the
destination accesses it.
+=== Postcopy with hugepages ===
+
+Postcopy now works with hugetlbfs backed memory:
+ a) The linux kernel on the destination must support userfault on hugepages.
+ b) The huge-page configuration on the source and destination VMs must be
+ identical; i.e. RAMBlocks on both sides must use the same page size.
+ c) Note that -mem-path /dev/hugepages will fall back to allocating normal
+ RAM if it doesn't have enough hugepages, triggering (b) to fail.
+ Using -mem-prealloc enforces the allocation using hugepages.
+ d) Care should be taken with the size of hugepage used; postcopy with 2MB
+ hugepages works well, however 1GB hugepages are likely to be problematic
+ since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link,
+ and until the full page is transferred the destination thread is blocked.
diff --git a/docs/q35-chipset.cfg b/docs/q35-chipset.cfg
deleted file mode 100644
index e4ddb7d..0000000
--- a/docs/q35-chipset.cfg
+++ /dev/null
@@ -1,152 +0,0 @@
-################################################################
-#
-# qemu -M q35 creates a bare machine with just the very essential
-# chipset devices being present:
-#
-# 00.0 - Host bridge
-# 1f.0 - ISA bridge / LPC
-# 1f.2 - SATA (AHCI) controller
-# 1f.3 - SMBus controller
-#
-# This config file documents the other devices and how they are
-# created. You can simply use "-readconfig $thisfile" to create
-# them all. Here is a overview:
-#
-# 19.0 - Ethernet controller (not created, our e1000 emulation
-# doesn't emulate the ich9 device).
-# 1a.* - USB Controller #2 (ehci + uhci companions)
-# 1b.0 - HD Audio Controller
-# 1c.* - PCI Express Ports
-# 1d.* - USB Controller #1 (ehci + uhci companions,
-# "qemu -M q35 -usb" creates these too)
-# 1e.0 - PCI Bridge
-#
-
-[device "ich9-ehci-2"]
- driver = "ich9-usb-ehci2"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1a.7"
-
-[device "ich9-uhci-4"]
- driver = "ich9-usb-uhci4"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1a.0"
- masterbus = "ich9-ehci-2.0"
- firstport = "0"
-
-[device "ich9-uhci-5"]
- driver = "ich9-usb-uhci5"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1a.1"
- masterbus = "ich9-ehci-2.0"
- firstport = "2"
-
-[device "ich9-uhci-6"]
- driver = "ich9-usb-uhci6"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1a.2"
- masterbus = "ich9-ehci-2.0"
- firstport = "4"
-
-
-[device "ich9-hda-audio"]
- driver = "ich9-intel-hda"
- bus = "pcie.0"
- addr = "1b.0"
-
-
-[device "ich9-pcie-port-1"]
- driver = "ioh3420"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1c.0"
- port = "1"
- chassis = "1"
-
-[device "ich9-pcie-port-2"]
- driver = "ioh3420"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1c.1"
- port = "2"
- chassis = "2"
-
-[device "ich9-pcie-port-3"]
- driver = "ioh3420"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1c.2"
- port = "3"
- chassis = "3"
-
-[device "ich9-pcie-port-4"]
- driver = "ioh3420"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1c.3"
- port = "4"
- chassis = "4"
-
-##
-# Example PCIe switch with two downstream ports
-#
-#[device "pcie-switch-upstream-port-1"]
-# driver = "x3130-upstream"
-# bus = "ich9-pcie-port-4"
-# addr = "00.0"
-#
-#[device "pcie-switch-downstream-port-1-1"]
-# driver = "xio3130-downstream"
-# multifunction = "on"
-# bus = "pcie-switch-upstream-port-1"
-# addr = "00.0"
-# port = "1"
-# chassis = "5"
-#
-#[device "pcie-switch-downstream-port-1-2"]
-# driver = "xio3130-downstream"
-# multifunction = "on"
-# bus = "pcie-switch-upstream-port-1"
-# addr = "00.1"
-# port = "1"
-# chassis = "6"
-
-[device "ich9-ehci-1"]
- driver = "ich9-usb-ehci1"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1d.7"
-
-[device "ich9-uhci-1"]
- driver = "ich9-usb-uhci1"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1d.0"
- masterbus = "ich9-ehci-1.0"
- firstport = "0"
-
-[device "ich9-uhci-2"]
- driver = "ich9-usb-uhci2"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1d.1"
- masterbus = "ich9-ehci-1.0"
- firstport = "2"
-
-[device "ich9-uhci-3"]
- driver = "ich9-usb-uhci3"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1d.2"
- masterbus = "ich9-ehci-1.0"
- firstport = "4"
-
-
-[device "ich9-pci-bridge"]
- driver = "i82801b11-bridge"
- bus = "pcie.0"
- addr = "1e.0"
diff --git a/docs/q35-emulated.cfg b/docs/q35-emulated.cfg
new file mode 100644
index 0000000..c6416d6
--- /dev/null
+++ b/docs/q35-emulated.cfg
@@ -0,0 +1,288 @@
+# q35 - Emulated guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+# $ qemu-system-x86_64 \
+# -nodefaults \
+# -readconfig q35-emulated.cfg
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of emulated devices that
+# closely resembles that of a physical machine, and will be
+# accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+# 00:00.0 Host bridge
+# 00:1f.0 ISA bridge / LPC
+# 00:1f.2 SATA (AHCI) controller
+# 00:1f.3 SMBus controller
+#
+# This configuration file adds a number of devices that
+# are pretty much guaranteed to be present in every single
+# physical machine based on q35, more specifically:
+#
+# 00:01.0 VGA compatible controller
+# 00:19.0 Ethernet controller
+# 00:1a.* USB controller (#2)
+# 00:1b.0 Audio device
+# 00:1c.* PCI bridge (PCI Express Root Ports)
+# 00:1d.* USB Controller (#1)
+# 00:1e.0 PCI bridge (legacy PCI bridge)
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line.
+
+[machine]
+ type = "q35"
+ accel = "kvm"
+
+[memory]
+ size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We add four PCI Express Root Ports, all sharing the same
+# slot on the PCI Express Root Bus. These ports support
+# hotplug.
+
+[device "ich9-pcie-port-1"]
+ driver = "ioh3420"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1c.0"
+ port = "1"
+ chassis = "1"
+
+[device "ich9-pcie-port-2"]
+ driver = "ioh3420"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1c.1"
+ port = "2"
+ chassis = "2"
+
+[device "ich9-pcie-port-3"]
+ driver = "ioh3420"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1c.2"
+ port = "3"
+ chassis = "3"
+
+[device "ich9-pcie-port-4"]
+ driver = "ioh3420"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1c.3"
+ port = "4"
+ chassis = "4"
+
+
+# PCI bridge (legacy PCI bridge)
+# =========================================================
+#
+# This bridge can be used to build an independent topology
+# for legacy PCI devices. PCI Express devices should be
+# plugged into PCI Express slots instead, so ideally there
+# will be no devices connected to this bridge.
+
+[device "ich9-pci-bridge"]
+ driver = "i82801b11-bridge"
+ bus = "pcie.0"
+ addr = "1e.0"
+
+
+# SATA storage
+# =========================================================
+#
+# An implicit SATA controller is created automatically for
+# every single q35 guest; here we create a disk, backed by
+# a qcow2 disk image on the host's filesystem, and attach
+# it to that controller so that the guest can use it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "sata-disk"]
+ driver = "ide-hd"
+ bus = "ide.0"
+ drive = "disk"
+ bootindex = "1"
+
+[drive "disk"]
+ file = "guest.qcow2" # CHANGE ME
+ format = "qcow2"
+ if = "none"
+
+[device "sata-optical-disk"]
+ driver = "ide-cd"
+ bus = "ide.1"
+ drive = "optical-disk"
+ bootindex = "2"
+
+[drive "optical-disk"]
+ file = "install.iso" # CHANGE ME
+ format = "raw"
+ if = "none"
+
+
+# USB controller (#1)
+# =========================================================
+#
+# EHCI controller + UHCI companion controllers.
+
+[device "ich9-ehci-1"]
+ driver = "ich9-usb-ehci1"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1d.7"
+
+[device "ich9-uhci-1"]
+ driver = "ich9-usb-uhci1"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1d.0"
+ masterbus = "ich9-ehci-1.0"
+ firstport = "0"
+
+[device "ich9-uhci-2"]
+ driver = "ich9-usb-uhci2"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1d.1"
+ masterbus = "ich9-ehci-1.0"
+ firstport = "2"
+
+[device "ich9-uhci-3"]
+ driver = "ich9-usb-uhci3"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1d.2"
+ masterbus = "ich9-ehci-1.0"
+ firstport = "4"
+
+
+# USB controller (#2)
+# =========================================================
+#
+# EHCI controller + UHCI companion controllers.
+
+[device "ich9-ehci-2"]
+ driver = "ich9-usb-ehci2"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1a.7"
+
+[device "ich9-uhci-4"]
+ driver = "ich9-usb-uhci4"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1a.0"
+ masterbus = "ich9-ehci-2.0"
+ firstport = "0"
+
+[device "ich9-uhci-5"]
+ driver = "ich9-usb-uhci5"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1a.1"
+ masterbus = "ich9-ehci-2.0"
+ firstport = "2"
+
+[device "ich9-uhci-6"]
+ driver = "ich9-usb-uhci6"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1a.2"
+ masterbus = "ich9-ehci-2.0"
+ firstport = "4"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We add a Gigabit Ethernet interface to the guest; on the
+# host side, we take advantage of user networking so that
+# the QEMU process doesn't require any additional
+# privileges.
+
+[netdev "hostnet"]
+ type = "user"
+
+[device "net"]
+ driver = "e1000"
+ netdev = "hostnet"
+ bus = "pcie.0"
+ addr = "19.0"
+
+
+# VGA compatible controller
+# =========================================================
+#
+# We use stdvga instead of Cirrus as it supports more video
+# modes and is closer to what actual hardware looks like.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+# -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+ driver = "VGA"
+ bus = "pcie.0"
+ addr = "01.0"
+
+
+# Audio device
+# =========================================================
+#
+# The sound card is a legacy PCI device that is plugged
+# directly into the PCI Express Root Bus.
+
+[device "ich9-hda-audio"]
+ driver = "ich9-intel-hda"
+ bus = "pcie.0"
+ addr = "1b.0"
+
+[device "ich9-hda-duplex"]
+ driver = "hda-duplex"
+ bus = "ich9-hda-audio.0"
+ cad = "0"
diff --git a/docs/q35-virtio-graphical.cfg b/docs/q35-virtio-graphical.cfg
new file mode 100644
index 0000000..28bde2f
--- /dev/null
+++ b/docs/q35-virtio-graphical.cfg
@@ -0,0 +1,248 @@
+# q35 - VirtIO guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+# $ qemu-system-x86_64 \
+# -nodefaults \
+# -readconfig q35-virtio-graphical.cfg
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+# 00:00.0 Host bridge
+# 00:1f.0 ISA bridge / LPC
+# 00:1f.2 SATA (AHCI) controller
+# 00:1f.3 SMBus controller
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+# 00:01.0 VGA compatible controller
+# 00:1b.0 Audio device
+# 00.1c.* PCI bridge (PCI Express Root Ports)
+# 01:00.0 SCSI storage controller
+# 02:00.0 Ethernet controller
+# 03:00.0 USB controller
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+
+[machine]
+ type = "q35"
+ accel = "kvm"
+
+[memory]
+ size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.0"
+ port = "1"
+ chassis = "1"
+ multifunction = "on"
+
+[device "pcie.2"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.1"
+ port = "2"
+ chassis = "2"
+
+[device "pcie.3"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.2"
+ port = "3"
+ chassis = "3"
+
+[device "pcie.4"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.3"
+ port = "4"
+ chassis = "4"
+
+[device "pcie.5"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.4"
+ port = "5"
+ chassis = "5"
+
+[device "pcie.6"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.5"
+ port = "6"
+ chassis = "6"
+
+[device "pcie.7"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.6"
+ port = "7"
+ chassis = "7"
+
+[device "pcie.8"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.7"
+ port = "8"
+ chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+ driver = "virtio-scsi-pci"
+ bus = "pcie.1"
+ addr = "00.0"
+
+[device "scsi-disk"]
+ driver = "scsi-hd"
+ bus = "scsi.0"
+ drive = "disk"
+ bootindex = "1"
+
+[drive "disk"]
+ file = "guest.qcow2" # CHANGE ME
+ format = "qcow2"
+ if = "none"
+
+[device "scsi-optical-disk"]
+ driver = "scsi-cd"
+ bus = "scsi.0"
+ drive = "optical-disk"
+ bootindex = "2"
+
+[drive "optical-disk"]
+ file = "install.iso" # CHANGE ME
+ format = "raw"
+ if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+ type = "user"
+
+[device "net"]
+ driver = "virtio-net-pci"
+ netdev = "hostnet"
+ bus = "pcie.2"
+ addr = "00.0"
+
+
+# USB controller (and input devices)
+# =========================================================
+#
+# We add a virtualization-friendly USB 3.0 controller and
+# a USB tablet so that graphical guests can be controlled
+# appropriately. A USB keyboard is not needed, as q35
+# guests get a PS/2 one added automatically.
+
+[device "usb"]
+ driver = "nec-usb-xhci"
+ bus = "pcie.3"
+ addr = "00.0"
+
+[device "tablet"]
+ driver = "usb-tablet"
+ bus = "usb.0"
+
+
+# VGA compatible controller
+# =========================================================
+#
+# We plug the QXL video card directly into the PCI Express
+# Root Bus as it is a legacy PCI device; this way, we can
+# reduce the number of PCI Express controllers in the
+# guest.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+# -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+ driver = "qxl-vga"
+ bus = "pcie.0"
+ addr = "01.0"
+
+
+# Audio device
+# =========================================================
+#
+# Like the video card, the sound card is a legacy PCI
+# device and as such can be plugged directly into the PCI
+# Express Root Bus.
+
+[device "sound"]
+ driver = "ich9-intel-hda"
+ bus = "pcie.0"
+ addr = "1b.0"
+
+[device "duplex"]
+ driver = "hda-duplex"
+ bus = "sound.0"
+ cad = "0"
diff --git a/docs/q35-virtio-serial.cfg b/docs/q35-virtio-serial.cfg
new file mode 100644
index 0000000..c33c9cc
--- /dev/null
+++ b/docs/q35-virtio-serial.cfg
@@ -0,0 +1,193 @@
+# q35 - VirtIO guest (serial console)
+# =========================================================
+#
+# Usage:
+#
+# $ qemu-system-x86_64 \
+# -nodefaults \
+# -readconfig q35-virtio-serial.cfg \
+# -display none -serial mon:stdio
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through the serial console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+# 00:00.0 Host bridge
+# 00:1f.0 ISA bridge / LPC
+# 00:1f.2 SATA (AHCI) controller
+# 00:1f.3 SMBus controller
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+# 00.1c.* PCI bridge (PCI Express Root Ports)
+# 01:00.0 SCSI storage controller
+# 02:00.0 Ethernet controller
+#
+# More information about these devices is available below.
+#
+# We use '-display none' to prevent QEMU from creating a
+# graphical display window, which would serve no use in
+# this specific configuration, and '-serial mon:stdio' to
+# multiplex the guest's serial console and the QEMU monitor
+# to the host's stdio; use 'Ctrl+A h' to learn how to
+# switch between the two and more.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+
+[machine]
+ type = "q35"
+ accel = "kvm"
+
+[memory]
+ size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.0"
+ port = "1"
+ chassis = "1"
+ multifunction = "on"
+
+[device "pcie.2"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.1"
+ port = "2"
+ chassis = "2"
+
+[device "pcie.3"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.2"
+ port = "3"
+ chassis = "3"
+
+[device "pcie.4"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.3"
+ port = "4"
+ chassis = "4"
+
+[device "pcie.5"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.4"
+ port = "5"
+ chassis = "5"
+
+[device "pcie.6"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.5"
+ port = "6"
+ chassis = "6"
+
+[device "pcie.7"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.6"
+ port = "7"
+ chassis = "7"
+
+[device "pcie.8"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.7"
+ port = "8"
+ chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+ driver = "virtio-scsi-pci"
+ bus = "pcie.1"
+ addr = "00.0"
+
+[device "scsi-disk"]
+ driver = "scsi-hd"
+ bus = "scsi.0"
+ drive = "disk"
+ bootindex = "1"
+
+[drive "disk"]
+ file = "guest.qcow2" # CHANGE ME
+ format = "qcow2"
+ if = "none"
+
+[device "scsi-optical-disk"]
+ driver = "scsi-cd"
+ bus = "scsi.0"
+ drive = "optical-disk"
+ bootindex = "2"
+
+[drive "optical-disk"]
+ file = "install.iso" # CHANGE ME
+ format = "raw"
+ if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+ type = "user"
+
+[device "net"]
+ driver = "virtio-net-pci"
+ netdev = "hostnet"
+ bus = "pcie.2"
+ addr = "00.0"
diff --git a/exec.c b/exec.c
index 3adf2b1..785d20f 100644
--- a/exec.c
+++ b/exec.c
@@ -45,6 +45,12 @@
#include "exec/address-spaces.h"
#include "sysemu/xen-mapcache.h"
#include "trace-root.h"
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+
#endif
#include "exec/cpu-all.h"
#include "qemu/rcu_queue.h"
@@ -1518,6 +1524,19 @@ size_t qemu_ram_pagesize(RAMBlock *rb)
return rb->page_size;
}
+/* Returns the largest size of page in use */
+size_t qemu_ram_pagesize_largest(void)
+{
+ RAMBlock *block;
+ size_t largest = 0;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ largest = MAX(largest, qemu_ram_pagesize(block));
+ }
+
+ return largest;
+}
+
static int memory_try_enable_merging(void *addr, size_t len)
{
if (!machine_mem_merge(current_machine)) {
@@ -3294,4 +3313,68 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
rcu_read_unlock();
return ret;
}
+
+/*
+ * Unmap pages of memory from start to start+length such that
+ * they a) read as 0, b) Trigger whatever fault mechanism
+ * the OS provides for postcopy.
+ * The pages must be unmapped by the end of the function.
+ * Returns: 0 on success, none-0 on failure
+ *
+ */
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
+{
+ int ret = -1;
+
+ uint8_t *host_startaddr = rb->host + start;
+
+ if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
+ error_report("ram_block_discard_range: Unaligned start address: %p",
+ host_startaddr);
+ goto err;
+ }
+
+ if ((start + length) <= rb->used_length) {
+ uint8_t *host_endaddr = host_startaddr + length;
+ if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
+ error_report("ram_block_discard_range: Unaligned end address: %p",
+ host_endaddr);
+ goto err;
+ }
+
+ errno = ENOTSUP; /* If we are missing MADVISE etc */
+
+ if (rb->page_size == qemu_host_page_size) {
+#if defined(CONFIG_MADVISE)
+ /* Note: We need the madvise MADV_DONTNEED behaviour of definitely
+ * freeing the page.
+ */
+ ret = madvise(host_startaddr, length, MADV_DONTNEED);
+#endif
+ } else {
+ /* Huge page case - unfortunately it can't do DONTNEED, but
+ * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
+ * huge page file.
+ */
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ start, length);
+#endif
+ }
+ if (ret) {
+ ret = -errno;
+ error_report("ram_block_discard_range: Failed to discard range "
+ "%s:%" PRIx64 " +%zx (%d)",
+ rb->idstr, start, length, ret);
+ }
+ } else {
+ error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
+ "/%zx/" RAM_ADDR_FMT")",
+ rb->idstr, start, length, rb->used_length);
+ }
+
+err:
+ return ret;
+}
+
#endif
diff --git a/hmp.c b/hmp.c
index 83e287e..7b44e64 100644
--- a/hmp.c
+++ b/hmp.c
@@ -2045,13 +2045,17 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
const char* device = qdict_get_str(qdict, "device");
const char* command = qdict_get_str(qdict, "command");
Error *err = NULL;
+ int ret;
blk = blk_by_name(device);
if (!blk) {
BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
if (bs) {
- blk = local_blk = blk_new();
- blk_insert_bs(blk, bs);
+ blk = local_blk = blk_new(0, BLK_PERM_ALL);
+ ret = blk_insert_bs(blk, bs, &err);
+ if (ret < 0) {
+ goto fail;
+ }
} else {
goto fail;
}
@@ -2060,6 +2064,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
aio_context = blk_get_aio_context(blk);
aio_context_acquire(aio_context);
+ /*
+ * Notably absent: Proper permission management. This is sad, but it seems
+ * almost impossible to achieve without changing the semantics and thereby
+ * limiting the use cases of the qemu-io HMP command.
+ *
+ * In an ideal world we would unconditionally create a new BlockBackend for
+ * qemuio_command(), but we have commands like 'reopen' and want them to
+ * take effect on the exact BlockBackend whose name the user passed instead
+ * of just on a temporary copy of it.
+ *
+ * Another problem is that deleting the temporary BlockBackend involves
+ * draining all requests on it first, but some qemu-iotests cases want to
+ * issue multiple aio_read/write requests and expect them to complete in
+ * the background while the monitor has already returned.
+ *
+ * This is also what prevents us from saving the original permissions and
+ * restoring them later: We can't revoke permissions until all requests
+ * have completed, and we don't know when that is nor can we really let
+ * anything else run before we have revoken them to avoid race conditions.
+ *
+ * What happens now is that command() in qemu-io-cmds.c can extend the
+ * permissions if necessary for the qemu-io command. And they simply stay
+ * extended, possibly resulting in a read-only guest device keeping write
+ * permissions. Ugly, but it appears to be the lesser evil.
+ */
qemuio_command(blk, command);
aio_context_release(aio_context);
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 2369b91..f22a3c3 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -13,7 +13,9 @@
#include "qemu/osdep.h"
#include "9p.h"
+#include "9p-local.h"
#include "9p-xattr.h"
+#include "9p-util.h"
#include "fsdev/qemu-fsdev.h" /* local_ops */
#include <arpa/inet.h>
#include <pwd.h>
@@ -43,40 +45,62 @@
#define BTRFS_SUPER_MAGIC 0x9123683E
#endif
-#define VIRTFS_META_DIR ".virtfs_metadata"
+typedef struct {
+ int mountfd;
+} LocalData;
-static char *local_mapped_attr_path(FsContext *ctx, const char *path)
+int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
+ mode_t mode)
{
- int dirlen;
- const char *name = strrchr(path, '/');
- if (name) {
- dirlen = name - path;
- ++name;
- } else {
- name = path;
- dirlen = 0;
+ LocalData *data = fs_ctx->private;
+
+ /* All paths are relative to the path data->mountfd points to */
+ while (*path == '/') {
+ path++;
}
- return g_strdup_printf("%s/%.*s/%s/%s", ctx->fs_root,
- dirlen, path, VIRTFS_META_DIR, name);
+
+ return relative_openat_nofollow(data->mountfd, path, flags, mode);
+}
+
+int local_opendir_nofollow(FsContext *fs_ctx, const char *path)
+{
+ return local_open_nofollow(fs_ctx, path, O_DIRECTORY | O_RDONLY, 0);
+}
+
+static void renameat_preserve_errno(int odirfd, const char *opath, int ndirfd,
+ const char *npath)
+{
+ int serrno = errno;
+ renameat(odirfd, opath, ndirfd, npath);
+ errno = serrno;
}
-static FILE *local_fopen(const char *path, const char *mode)
+static void unlinkat_preserve_errno(int dirfd, const char *path, int flags)
+{
+ int serrno = errno;
+ unlinkat(dirfd, path, flags);
+ errno = serrno;
+}
+
+#define VIRTFS_META_DIR ".virtfs_metadata"
+
+static FILE *local_fopenat(int dirfd, const char *name, const char *mode)
{
int fd, o_mode = 0;
FILE *fp;
- int flags = O_NOFOLLOW;
+ int flags;
/*
* only supports two modes
*/
if (mode[0] == 'r') {
- flags |= O_RDONLY;
+ flags = O_RDONLY;
} else if (mode[0] == 'w') {
- flags |= O_WRONLY | O_TRUNC | O_CREAT;
+ flags = O_WRONLY | O_TRUNC | O_CREAT;
o_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
} else {
return NULL;
}
- fd = open(path, flags, o_mode);
+ fd = openat_file(dirfd, name, flags, o_mode);
if (fd == -1) {
return NULL;
}
@@ -88,16 +112,20 @@ static FILE *local_fopen(const char *path, const char *mode)
}
#define ATTR_MAX 100
-static void local_mapped_file_attr(FsContext *ctx, const char *path,
+static void local_mapped_file_attr(int dirfd, const char *name,
struct stat *stbuf)
{
FILE *fp;
char buf[ATTR_MAX];
- char *attr_path;
+ int map_dirfd;
- attr_path = local_mapped_attr_path(ctx, path);
- fp = local_fopen(attr_path, "r");
- g_free(attr_path);
+ map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+ if (map_dirfd == -1) {
+ return;
+ }
+
+ fp = local_fopenat(map_dirfd, name, "r");
+ close_preserve_errno(map_dirfd);
if (!fp) {
return;
}
@@ -119,12 +147,17 @@ static void local_mapped_file_attr(FsContext *ctx, const char *path,
static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf)
{
- int err;
- char *buffer;
- char *path = fs_path->data;
+ int err = -1;
+ char *dirpath = g_path_get_dirname(fs_path->data);
+ char *name = g_path_get_basename(fs_path->data);
+ int dirfd;
+
+ dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+ if (dirfd == -1) {
+ goto out;
+ }
- buffer = rpath(fs_ctx, path);
- err = lstat(buffer, stbuf);
+ err = fstatat(dirfd, name, stbuf, AT_SYMLINK_NOFOLLOW);
if (err) {
goto err_out;
}
@@ -134,87 +167,83 @@ static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf)
gid_t tmp_gid;
mode_t tmp_mode;
dev_t tmp_dev;
- if (getxattr(buffer, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) {
+
+ if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.uid", &tmp_uid,
+ sizeof(uid_t)) > 0) {
stbuf->st_uid = le32_to_cpu(tmp_uid);
}
- if (getxattr(buffer, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) {
+ if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.gid", &tmp_gid,
+ sizeof(gid_t)) > 0) {
stbuf->st_gid = le32_to_cpu(tmp_gid);
}
- if (getxattr(buffer, "user.virtfs.mode",
- &tmp_mode, sizeof(mode_t)) > 0) {
+ if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.mode", &tmp_mode,
+ sizeof(mode_t)) > 0) {
stbuf->st_mode = le32_to_cpu(tmp_mode);
}
- if (getxattr(buffer, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) {
+ if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.rdev", &tmp_dev,
+ sizeof(dev_t)) > 0) {
stbuf->st_rdev = le64_to_cpu(tmp_dev);
}
} else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
- local_mapped_file_attr(fs_ctx, path, stbuf);
+ local_mapped_file_attr(dirfd, name, stbuf);
}
err_out:
- g_free(buffer);
- return err;
-}
-
-static int local_create_mapped_attr_dir(FsContext *ctx, const char *path)
-{
- int err;
- char *attr_dir;
- char *tmp_path = g_strdup(path);
-
- attr_dir = g_strdup_printf("%s/%s/%s",
- ctx->fs_root, dirname(tmp_path), VIRTFS_META_DIR);
-
- err = mkdir(attr_dir, 0700);
- if (err < 0 && errno == EEXIST) {
- err = 0;
- }
- g_free(attr_dir);
- g_free(tmp_path);
+ close_preserve_errno(dirfd);
+out:
+ g_free(name);
+ g_free(dirpath);
return err;
}
-static int local_set_mapped_file_attr(FsContext *ctx,
- const char *path, FsCred *credp)
+static int local_set_mapped_file_attrat(int dirfd, const char *name,
+ FsCred *credp)
{
FILE *fp;
- int ret = 0;
+ int ret;
char buf[ATTR_MAX];
- char *attr_path;
int uid = -1, gid = -1, mode = -1, rdev = -1;
+ int map_dirfd;
+
+ ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700);
+ if (ret < 0 && errno != EEXIST) {
+ return -1;
+ }
- attr_path = local_mapped_attr_path(ctx, path);
- fp = local_fopen(attr_path, "r");
+ map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+ if (map_dirfd == -1) {
+ return -1;
+ }
+
+ fp = local_fopenat(map_dirfd, name, "r");
if (!fp) {
- goto create_map_file;
+ if (errno == ENOENT) {
+ goto update_map_file;
+ } else {
+ close_preserve_errno(map_dirfd);
+ return -1;
+ }
}
memset(buf, 0, ATTR_MAX);
while (fgets(buf, ATTR_MAX, fp)) {
if (!strncmp(buf, "virtfs.uid", 10)) {
- uid = atoi(buf+11);
+ uid = atoi(buf + 11);
} else if (!strncmp(buf, "virtfs.gid", 10)) {
- gid = atoi(buf+11);
+ gid = atoi(buf + 11);
} else if (!strncmp(buf, "virtfs.mode", 11)) {
- mode = atoi(buf+12);
+ mode = atoi(buf + 12);
} else if (!strncmp(buf, "virtfs.rdev", 11)) {
- rdev = atoi(buf+12);
+ rdev = atoi(buf + 12);
}
memset(buf, 0, ATTR_MAX);
}
fclose(fp);
- goto update_map_file;
-
-create_map_file:
- ret = local_create_mapped_attr_dir(ctx, path);
- if (ret < 0) {
- goto err_out;
- }
update_map_file:
- fp = local_fopen(attr_path, "w");
+ fp = local_fopenat(map_dirfd, name, "w");
+ close_preserve_errno(map_dirfd);
if (!fp) {
- ret = -1;
- goto err_out;
+ return -1;
}
if (credp->fc_uid != -1) {
@@ -230,7 +259,6 @@ update_map_file:
rdev = credp->fc_rdev;
}
-
if (uid != -1) {
fprintf(fp, "virtfs.uid=%d\n", uid);
}
@@ -245,39 +273,71 @@ update_map_file:
}
fclose(fp);
-err_out:
- g_free(attr_path);
+ return 0;
+}
+
+static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode)
+{
+ int fd, ret;
+
+ /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW).
+ * Unfortunately, the linux kernel doesn't implement it yet. As an
+ * alternative, let's open the file and use fchmod() instead. This
+ * may fail depending on the permissions of the file, but it is the
+ * best we can do to avoid TOCTTOU. We first try to open read-only
+ * in case name points to a directory. If that fails, we try write-only
+ * in case name doesn't point to a directory.
+ */
+ fd = openat_file(dirfd, name, O_RDONLY, 0);
+ if (fd == -1) {
+ /* In case the file is writable-only and isn't a directory. */
+ if (errno == EACCES) {
+ fd = openat_file(dirfd, name, O_WRONLY, 0);
+ }
+ if (fd == -1 && errno == EISDIR) {
+ errno = EACCES;
+ }
+ }
+ if (fd == -1) {
+ return -1;
+ }
+ ret = fchmod(fd, mode);
+ close_preserve_errno(fd);
return ret;
}
-static int local_set_xattr(const char *path, FsCred *credp)
+static int local_set_xattrat(int dirfd, const char *path, FsCred *credp)
{
int err;
if (credp->fc_uid != -1) {
uint32_t tmp_uid = cpu_to_le32(credp->fc_uid);
- err = setxattr(path, "user.virtfs.uid", &tmp_uid, sizeof(uid_t), 0);
+ err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.uid", &tmp_uid,
+ sizeof(uid_t), 0);
if (err) {
return err;
}
}
if (credp->fc_gid != -1) {
uint32_t tmp_gid = cpu_to_le32(credp->fc_gid);
- err = setxattr(path, "user.virtfs.gid", &tmp_gid, sizeof(gid_t), 0);
+ err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.gid", &tmp_gid,
+ sizeof(gid_t), 0);
if (err) {
return err;
}
}
if (credp->fc_mode != -1) {
uint32_t tmp_mode = cpu_to_le32(credp->fc_mode);
- err = setxattr(path, "user.virtfs.mode", &tmp_mode, sizeof(mode_t), 0);
+ err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.mode", &tmp_mode,
+ sizeof(mode_t), 0);
if (err) {
return err;
}
}
if (credp->fc_rdev != -1) {
uint64_t tmp_rdev = cpu_to_le64(credp->fc_rdev);
- err = setxattr(path, "user.virtfs.rdev", &tmp_rdev, sizeof(dev_t), 0);
+ err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.rdev", &tmp_rdev,
+ sizeof(dev_t), 0);
if (err) {
return err;
}
@@ -285,58 +345,56 @@ static int local_set_xattr(const char *path, FsCred *credp)
return 0;
}
-static int local_post_create_passthrough(FsContext *fs_ctx, const char *path,
- FsCred *credp)
+static int local_set_cred_passthrough(FsContext *fs_ctx, int dirfd,
+ const char *name, FsCred *credp)
{
- char *buffer;
-
- buffer = rpath(fs_ctx, path);
- if (lchown(buffer, credp->fc_uid, credp->fc_gid) < 0) {
+ if (fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+ AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) < 0) {
/*
* If we fail to change ownership and if we are
* using security model none. Ignore the error
*/
if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) {
- goto err;
+ return -1;
}
}
- if (chmod(buffer, credp->fc_mode & 07777) < 0) {
- goto err;
- }
-
- g_free(buffer);
- return 0;
-err:
- g_free(buffer);
- return -1;
+ return fchmodat_nofollow(dirfd, name, credp->fc_mode & 07777);
}
static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath *fs_path,
char *buf, size_t bufsz)
{
ssize_t tsize = -1;
- char *buffer;
- char *path = fs_path->data;
if ((fs_ctx->export_flags & V9FS_SM_MAPPED) ||
(fs_ctx->export_flags & V9FS_SM_MAPPED_FILE)) {
int fd;
- buffer = rpath(fs_ctx, path);
- fd = open(buffer, O_RDONLY | O_NOFOLLOW);
- g_free(buffer);
+
+ fd = local_open_nofollow(fs_ctx, fs_path->data, O_RDONLY, 0);
if (fd == -1) {
return -1;
}
do {
tsize = read(fd, (void *)buf, bufsz);
} while (tsize == -1 && errno == EINTR);
- close(fd);
+ close_preserve_errno(fd);
} else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
(fs_ctx->export_flags & V9FS_SM_NONE)) {
- buffer = rpath(fs_ctx, path);
- tsize = readlink(buffer, buf, bufsz);
- g_free(buffer);
+ char *dirpath = g_path_get_dirname(fs_path->data);
+ char *name = g_path_get_basename(fs_path->data);
+ int dirfd;
+
+ dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+ if (dirfd == -1) {
+ goto out;
+ }
+
+ tsize = readlinkat(dirfd, name, buf, bufsz);
+ close_preserve_errno(dirfd);
+ out:
+ g_free(name);
+ g_free(dirpath);
}
return tsize;
}
@@ -354,27 +412,32 @@ static int local_closedir(FsContext *ctx, V9fsFidOpenState *fs)
static int local_open(FsContext *ctx, V9fsPath *fs_path,
int flags, V9fsFidOpenState *fs)
{
- char *buffer;
- char *path = fs_path->data;
+ int fd;
- buffer = rpath(ctx, path);
- fs->fd = open(buffer, flags | O_NOFOLLOW);
- g_free(buffer);
+ fd = local_open_nofollow(ctx, fs_path->data, flags, 0);
+ if (fd == -1) {
+ return -1;
+ }
+ fs->fd = fd;
return fs->fd;
}
static int local_opendir(FsContext *ctx,
V9fsPath *fs_path, V9fsFidOpenState *fs)
{
- char *buffer;
- char *path = fs_path->data;
+ int dirfd;
+ DIR *stream;
+
+ dirfd = local_opendir_nofollow(ctx, fs_path->data);
+ if (dirfd == -1) {
+ return -1;
+ }
- buffer = rpath(ctx, path);
- fs->dir.stream = opendir(buffer);
- g_free(buffer);
- if (!fs->dir.stream) {
+ stream = fdopendir(dirfd);
+ if (!stream) {
return -1;
}
+ fs->dir.stream = stream;
return 0;
}
@@ -463,145 +526,122 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
static int local_chmod(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
{
- char *buffer;
+ char *dirpath = g_path_get_dirname(fs_path->data);
+ char *name = g_path_get_basename(fs_path->data);
int ret = -1;
- char *path = fs_path->data;
+ int dirfd;
+
+ dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+ if (dirfd == -1) {
+ goto out;
+ }
if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
- buffer = rpath(fs_ctx, path);
- ret = local_set_xattr(buffer, credp);
- g_free(buffer);
+ ret = local_set_xattrat(dirfd, name, credp);
} else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
- return local_set_mapped_file_attr(fs_ctx, path, credp);
- } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
- (fs_ctx->export_flags & V9FS_SM_NONE)) {
- buffer = rpath(fs_ctx, path);
- ret = chmod(buffer, credp->fc_mode);
- g_free(buffer);
+ ret = local_set_mapped_file_attrat(dirfd, name, credp);
+ } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+ fs_ctx->export_flags & V9FS_SM_NONE) {
+ ret = fchmodat_nofollow(dirfd, name, credp->fc_mode);
}
+ close_preserve_errno(dirfd);
+
+out:
+ g_free(dirpath);
+ g_free(name);
return ret;
}
static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path,
const char *name, FsCred *credp)
{
- char *path;
int err = -1;
- int serrno = 0;
- V9fsString fullname;
- char *buffer = NULL;
+ int dirfd;
- v9fs_string_init(&fullname);
- v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
- path = fullname.data;
+ dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+ if (dirfd == -1) {
+ return -1;
+ }
- /* Determine the security model */
- if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
- buffer = rpath(fs_ctx, path);
- err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0);
+ if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+ fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+ err = mknodat(dirfd, name, SM_LOCAL_MODE_BITS | S_IFREG, 0);
if (err == -1) {
goto out;
}
- err = local_set_xattr(buffer, credp);
- if (err == -1) {
- serrno = errno;
- goto err_end;
- }
- } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
- buffer = rpath(fs_ctx, path);
- err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0);
- if (err == -1) {
- goto out;
+ if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+ err = local_set_xattrat(dirfd, name, credp);
+ } else {
+ err = local_set_mapped_file_attrat(dirfd, name, credp);
}
- err = local_set_mapped_file_attr(fs_ctx, path, credp);
if (err == -1) {
- serrno = errno;
goto err_end;
}
- } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
- (fs_ctx->export_flags & V9FS_SM_NONE)) {
- buffer = rpath(fs_ctx, path);
- err = mknod(buffer, credp->fc_mode, credp->fc_rdev);
+ } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+ fs_ctx->export_flags & V9FS_SM_NONE) {
+ err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
if (err == -1) {
goto out;
}
- err = local_post_create_passthrough(fs_ctx, path, credp);
+ err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
if (err == -1) {
- serrno = errno;
goto err_end;
}
}
goto out;
err_end:
- remove(buffer);
- errno = serrno;
+ unlinkat_preserve_errno(dirfd, name, 0);
out:
- g_free(buffer);
- v9fs_string_free(&fullname);
+ close_preserve_errno(dirfd);
return err;
}
static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path,
const char *name, FsCred *credp)
{
- char *path;
int err = -1;
- int serrno = 0;
- V9fsString fullname;
- char *buffer = NULL;
+ int dirfd;
- v9fs_string_init(&fullname);
- v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
- path = fullname.data;
+ dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+ if (dirfd == -1) {
+ return -1;
+ }
- /* Determine the security model */
- if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
- buffer = rpath(fs_ctx, path);
- err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS);
+ if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+ fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+ err = mkdirat(dirfd, name, SM_LOCAL_DIR_MODE_BITS);
if (err == -1) {
goto out;
}
- credp->fc_mode = credp->fc_mode|S_IFDIR;
- err = local_set_xattr(buffer, credp);
- if (err == -1) {
- serrno = errno;
- goto err_end;
- }
- } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
- buffer = rpath(fs_ctx, path);
- err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS);
- if (err == -1) {
- goto out;
+ credp->fc_mode = credp->fc_mode | S_IFDIR;
+
+ if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+ err = local_set_xattrat(dirfd, name, credp);
+ } else {
+ err = local_set_mapped_file_attrat(dirfd, name, credp);
}
- credp->fc_mode = credp->fc_mode|S_IFDIR;
- err = local_set_mapped_file_attr(fs_ctx, path, credp);
if (err == -1) {
- serrno = errno;
goto err_end;
}
- } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
- (fs_ctx->export_flags & V9FS_SM_NONE)) {
- buffer = rpath(fs_ctx, path);
- err = mkdir(buffer, credp->fc_mode);
+ } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+ fs_ctx->export_flags & V9FS_SM_NONE) {
+ err = mkdirat(dirfd, name, credp->fc_mode);
if (err == -1) {
goto out;
}
- err = local_post_create_passthrough(fs_ctx, path, credp);
+ err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
if (err == -1) {
- serrno = errno;
goto err_end;
}
}
goto out;
err_end:
- remove(buffer);
- errno = serrno;
+ unlinkat_preserve_errno(dirfd, name, AT_REMOVEDIR);
out:
- g_free(buffer);
- v9fs_string_free(&fullname);
+ close_preserve_errno(dirfd);
return err;
}
@@ -649,62 +689,45 @@ static int local_fstat(FsContext *fs_ctx, int fid_type,
static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
int flags, FsCred *credp, V9fsFidOpenState *fs)
{
- char *path;
int fd = -1;
int err = -1;
- int serrno = 0;
- V9fsString fullname;
- char *buffer = NULL;
+ int dirfd;
/*
* Mark all the open to not follow symlinks
*/
flags |= O_NOFOLLOW;
- v9fs_string_init(&fullname);
- v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
- path = fullname.data;
+ dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+ if (dirfd == -1) {
+ return -1;
+ }
/* Determine the security model */
- if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
- buffer = rpath(fs_ctx, path);
- fd = open(buffer, flags, SM_LOCAL_MODE_BITS);
+ if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+ fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+ fd = openat_file(dirfd, name, flags, SM_LOCAL_MODE_BITS);
if (fd == -1) {
- err = fd;
goto out;
}
credp->fc_mode = credp->fc_mode|S_IFREG;
- /* Set cleint credentials in xattr */
- err = local_set_xattr(buffer, credp);
- if (err == -1) {
- serrno = errno;
- goto err_end;
- }
- } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
- buffer = rpath(fs_ctx, path);
- fd = open(buffer, flags, SM_LOCAL_MODE_BITS);
- if (fd == -1) {
- err = fd;
- goto out;
+ if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+ /* Set cleint credentials in xattr */
+ err = local_set_xattrat(dirfd, name, credp);
+ } else {
+ err = local_set_mapped_file_attrat(dirfd, name, credp);
}
- credp->fc_mode = credp->fc_mode|S_IFREG;
- /* Set client credentials in .virtfs_metadata directory files */
- err = local_set_mapped_file_attr(fs_ctx, path, credp);
if (err == -1) {
- serrno = errno;
goto err_end;
}
} else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
(fs_ctx->export_flags & V9FS_SM_NONE)) {
- buffer = rpath(fs_ctx, path);
- fd = open(buffer, flags, credp->fc_mode);
+ fd = openat_file(dirfd, name, flags, credp->fc_mode);
if (fd == -1) {
- err = fd;
goto out;
}
- err = local_post_create_passthrough(fs_ctx, path, credp);
+ err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
if (err == -1) {
- serrno = errno;
goto err_end;
}
}
@@ -713,12 +736,11 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
goto out;
err_end:
- close(fd);
- remove(buffer);
- errno = serrno;
+ unlinkat_preserve_errno(dirfd, name,
+ flags & O_DIRECTORY ? AT_REMOVEDIR : 0);
+ close_preserve_errno(fd);
out:
- g_free(buffer);
- v9fs_string_free(&fullname);
+ close_preserve_errno(dirfd);
return err;
}
@@ -727,23 +749,22 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
V9fsPath *dir_path, const char *name, FsCred *credp)
{
int err = -1;
- int serrno = 0;
- char *newpath;
- V9fsString fullname;
- char *buffer = NULL;
+ int dirfd;
- v9fs_string_init(&fullname);
- v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
- newpath = fullname.data;
+ dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+ if (dirfd == -1) {
+ return -1;
+ }
/* Determine the security model */
- if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+ if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+ fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
int fd;
ssize_t oldpath_size, write_size;
- buffer = rpath(fs_ctx, newpath);
- fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS);
+
+ fd = openat_file(dirfd, name, O_CREAT | O_EXCL | O_RDWR,
+ SM_LOCAL_MODE_BITS);
if (fd == -1) {
- err = fd;
goto out;
}
/* Write the oldpath (target) to the file. */
@@ -751,218 +772,204 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
do {
write_size = write(fd, (void *)oldpath, oldpath_size);
} while (write_size == -1 && errno == EINTR);
+ close_preserve_errno(fd);
if (write_size != oldpath_size) {
- serrno = errno;
- close(fd);
- err = -1;
goto err_end;
}
- close(fd);
/* Set cleint credentials in symlink's xattr */
- credp->fc_mode = credp->fc_mode|S_IFLNK;
- err = local_set_xattr(buffer, credp);
- if (err == -1) {
- serrno = errno;
- goto err_end;
- }
- } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
- int fd;
- ssize_t oldpath_size, write_size;
- buffer = rpath(fs_ctx, newpath);
- fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS);
- if (fd == -1) {
- err = fd;
- goto out;
- }
- /* Write the oldpath (target) to the file. */
- oldpath_size = strlen(oldpath);
- do {
- write_size = write(fd, (void *)oldpath, oldpath_size);
- } while (write_size == -1 && errno == EINTR);
+ credp->fc_mode = credp->fc_mode | S_IFLNK;
- if (write_size != oldpath_size) {
- serrno = errno;
- close(fd);
- err = -1;
- goto err_end;
+ if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+ err = local_set_xattrat(dirfd, name, credp);
+ } else {
+ err = local_set_mapped_file_attrat(dirfd, name, credp);
}
- close(fd);
- /* Set cleint credentials in symlink's xattr */
- credp->fc_mode = credp->fc_mode|S_IFLNK;
- err = local_set_mapped_file_attr(fs_ctx, newpath, credp);
if (err == -1) {
- serrno = errno;
goto err_end;
}
- } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
- (fs_ctx->export_flags & V9FS_SM_NONE)) {
- buffer = rpath(fs_ctx, newpath);
- err = symlink(oldpath, buffer);
+ } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+ fs_ctx->export_flags & V9FS_SM_NONE) {
+ err = symlinkat(oldpath, dirfd, name);
if (err) {
goto out;
}
- err = lchown(buffer, credp->fc_uid, credp->fc_gid);
+ err = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+ AT_SYMLINK_NOFOLLOW);
if (err == -1) {
/*
* If we fail to change ownership and if we are
* using security model none. Ignore the error
*/
if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) {
- serrno = errno;
goto err_end;
- } else
+ } else {
err = 0;
+ }
}
}
goto out;
err_end:
- remove(buffer);
- errno = serrno;
+ unlinkat_preserve_errno(dirfd, name, 0);
out:
- g_free(buffer);
- v9fs_string_free(&fullname);
+ close_preserve_errno(dirfd);
return err;
}
static int local_link(FsContext *ctx, V9fsPath *oldpath,
V9fsPath *dirpath, const char *name)
{
- int ret;
- V9fsString newpath;
- char *buffer, *buffer1;
+ char *odirpath = g_path_get_dirname(oldpath->data);
+ char *oname = g_path_get_basename(oldpath->data);
+ int ret = -1;
+ int odirfd, ndirfd;
+
+ odirfd = local_opendir_nofollow(ctx, odirpath);
+ if (odirfd == -1) {
+ goto out;
+ }
- v9fs_string_init(&newpath);
- v9fs_string_sprintf(&newpath, "%s/%s", dirpath->data, name);
+ ndirfd = local_opendir_nofollow(ctx, dirpath->data);
+ if (ndirfd == -1) {
+ close_preserve_errno(odirfd);
+ goto out;
+ }
- buffer = rpath(ctx, oldpath->data);
- buffer1 = rpath(ctx, newpath.data);
- ret = link(buffer, buffer1);
- g_free(buffer);
- g_free(buffer1);
+ ret = linkat(odirfd, oname, ndirfd, name, 0);
+ if (ret < 0) {
+ goto out_close;
+ }
/* now link the virtfs_metadata files */
- if (!ret && (ctx->export_flags & V9FS_SM_MAPPED_FILE)) {
- /* Link the .virtfs_metadata files. Create the metada directory */
- ret = local_create_mapped_attr_dir(ctx, newpath.data);
- if (ret < 0) {
- goto err_out;
+ if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+ int omap_dirfd, nmap_dirfd;
+
+ ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700);
+ if (ret < 0 && errno != EEXIST) {
+ goto err_undo_link;
}
- buffer = local_mapped_attr_path(ctx, oldpath->data);
- buffer1 = local_mapped_attr_path(ctx, newpath.data);
- ret = link(buffer, buffer1);
- g_free(buffer);
- g_free(buffer1);
+
+ omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR);
+ if (omap_dirfd == -1) {
+ goto err;
+ }
+
+ nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR);
+ if (nmap_dirfd == -1) {
+ close_preserve_errno(omap_dirfd);
+ goto err;
+ }
+
+ ret = linkat(omap_dirfd, oname, nmap_dirfd, name, 0);
+ close_preserve_errno(nmap_dirfd);
+ close_preserve_errno(omap_dirfd);
if (ret < 0 && errno != ENOENT) {
- goto err_out;
+ goto err_undo_link;
}
- }
-err_out:
- v9fs_string_free(&newpath);
- return ret;
-}
-static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size)
-{
- char *buffer;
- int ret;
- char *path = fs_path->data;
+ ret = 0;
+ }
+ goto out_close;
- buffer = rpath(ctx, path);
- ret = truncate(buffer, size);
- g_free(buffer);
+err:
+ ret = -1;
+err_undo_link:
+ unlinkat_preserve_errno(ndirfd, name, 0);
+out_close:
+ close_preserve_errno(ndirfd);
+ close_preserve_errno(odirfd);
+out:
+ g_free(oname);
+ g_free(odirpath);
return ret;
}
-static int local_rename(FsContext *ctx, const char *oldpath,
- const char *newpath)
+static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size)
{
- int err;
- char *buffer, *buffer1;
+ int fd, ret;
- if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
- err = local_create_mapped_attr_dir(ctx, newpath);
- if (err < 0) {
- return err;
- }
- /* rename the .virtfs_metadata files */
- buffer = local_mapped_attr_path(ctx, oldpath);
- buffer1 = local_mapped_attr_path(ctx, newpath);
- err = rename(buffer, buffer1);
- g_free(buffer);
- g_free(buffer1);
- if (err < 0 && errno != ENOENT) {
- return err;
- }
+ fd = local_open_nofollow(ctx, fs_path->data, O_WRONLY, 0);
+ if (fd == -1) {
+ return -1;
}
-
- buffer = rpath(ctx, oldpath);
- buffer1 = rpath(ctx, newpath);
- err = rename(buffer, buffer1);
- g_free(buffer);
- g_free(buffer1);
- return err;
+ ret = ftruncate(fd, size);
+ close_preserve_errno(fd);
+ return ret;
}
static int local_chown(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
{
- char *buffer;
+ char *dirpath = g_path_get_dirname(fs_path->data);
+ char *name = g_path_get_basename(fs_path->data);
int ret = -1;
- char *path = fs_path->data;
+ int dirfd;
+
+ dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+ if (dirfd == -1) {
+ goto out;
+ }
if ((credp->fc_uid == -1 && credp->fc_gid == -1) ||
(fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
(fs_ctx->export_flags & V9FS_SM_NONE)) {
- buffer = rpath(fs_ctx, path);
- ret = lchown(buffer, credp->fc_uid, credp->fc_gid);
- g_free(buffer);
+ ret = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+ AT_SYMLINK_NOFOLLOW);
} else if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
- buffer = rpath(fs_ctx, path);
- ret = local_set_xattr(buffer, credp);
- g_free(buffer);
+ ret = local_set_xattrat(dirfd, name, credp);
} else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
- return local_set_mapped_file_attr(fs_ctx, path, credp);
+ ret = local_set_mapped_file_attrat(dirfd, name, credp);
}
+
+ close_preserve_errno(dirfd);
+out:
+ g_free(name);
+ g_free(dirpath);
return ret;
}
static int local_utimensat(FsContext *s, V9fsPath *fs_path,
const struct timespec *buf)
{
- char *buffer;
- int ret;
- char *path = fs_path->data;
+ char *dirpath = g_path_get_dirname(fs_path->data);
+ char *name = g_path_get_basename(fs_path->data);
+ int dirfd, ret = -1;
+
+ dirfd = local_opendir_nofollow(s, dirpath);
+ if (dirfd == -1) {
+ goto out;
+ }
- buffer = rpath(s, path);
- ret = qemu_utimens(buffer, buf);
- g_free(buffer);
+ ret = utimensat(dirfd, name, buf, AT_SYMLINK_NOFOLLOW);
+ close_preserve_errno(dirfd);
+out:
+ g_free(dirpath);
+ g_free(name);
return ret;
}
-static int local_remove(FsContext *ctx, const char *path)
+static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name,
+ int flags)
{
- int err;
- struct stat stbuf;
- char *buffer;
+ int ret = -1;
if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
- buffer = rpath(ctx, path);
- err = lstat(buffer, &stbuf);
- g_free(buffer);
- if (err) {
- goto err_out;
- }
- /*
- * If directory remove .virtfs_metadata contained in the
- * directory
- */
- if (S_ISDIR(stbuf.st_mode)) {
- buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root,
- path, VIRTFS_META_DIR);
- err = remove(buffer);
- g_free(buffer);
- if (err < 0 && errno != ENOENT) {
+ int map_dirfd;
+
+ if (flags == AT_REMOVEDIR) {
+ int fd;
+
+ fd = openat(dirfd, name, O_RDONLY | O_DIRECTORY | O_PATH);
+ if (fd == -1) {
+ goto err_out;
+ }
+ /*
+ * If directory remove .virtfs_metadata contained in the
+ * directory
+ */
+ ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR);
+ close_preserve_errno(fd);
+ if (ret < 0 && errno != ENOENT) {
/*
* We didn't had the .virtfs_metadata file. May be file created
* in non-mapped mode ?. Ignore ENOENT.
@@ -972,12 +979,12 @@ static int local_remove(FsContext *ctx, const char *path)
}
/*
* Now remove the name from parent directory
- * .virtfs_metadata directory
+ * .virtfs_metadata directory.
*/
- buffer = local_mapped_attr_path(ctx, path);
- err = remove(buffer);
- g_free(buffer);
- if (err < 0 && errno != ENOENT) {
+ map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+ ret = unlinkat(map_dirfd, name, 0);
+ close_preserve_errno(map_dirfd);
+ if (ret < 0 && errno != ENOENT) {
/*
* We didn't had the .virtfs_metadata file. May be file created
* in non-mapped mode ?. Ignore ENOENT.
@@ -986,10 +993,39 @@ static int local_remove(FsContext *ctx, const char *path)
}
}
- buffer = rpath(ctx, path);
- err = remove(buffer);
- g_free(buffer);
+ ret = unlinkat(dirfd, name, flags);
+err_out:
+ return ret;
+}
+
+static int local_remove(FsContext *ctx, const char *path)
+{
+ struct stat stbuf;
+ char *dirpath = g_path_get_dirname(path);
+ char *name = g_path_get_basename(path);
+ int flags = 0;
+ int dirfd;
+ int err = -1;
+
+ dirfd = local_opendir_nofollow(ctx, dirpath);
+ if (dirfd) {
+ goto out;
+ }
+
+ if (fstatat(dirfd, path, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) {
+ goto err_out;
+ }
+
+ if (S_ISDIR(stbuf.st_mode)) {
+ flags |= AT_REMOVEDIR;
+ }
+
+ err = local_unlinkat_common(ctx, dirfd, name, flags);
err_out:
+ close_preserve_errno(dirfd);
+out:
+ g_free(name);
+ g_free(dirpath);
return err;
}
@@ -1013,13 +1049,11 @@ static int local_fsync(FsContext *ctx, int fid_type,
static int local_statfs(FsContext *s, V9fsPath *fs_path, struct statfs *stbuf)
{
- char *buffer;
- int ret;
- char *path = fs_path->data;
+ int fd, ret;
- buffer = rpath(s, path);
- ret = statfs(buffer, stbuf);
- g_free(buffer);
+ fd = local_open_nofollow(s, fs_path->data, O_RDONLY, 0);
+ ret = fstatfs(fd, stbuf);
+ close_preserve_errno(fd);
return ret;
}
@@ -1071,70 +1105,105 @@ static int local_renameat(FsContext *ctx, V9fsPath *olddir,
const char *new_name)
{
int ret;
- V9fsString old_full_name, new_full_name;
+ int odirfd, ndirfd;
- v9fs_string_init(&old_full_name);
- v9fs_string_init(&new_full_name);
+ odirfd = local_opendir_nofollow(ctx, olddir->data);
+ if (odirfd == -1) {
+ return -1;
+ }
- v9fs_string_sprintf(&old_full_name, "%s/%s", olddir->data, old_name);
- v9fs_string_sprintf(&new_full_name, "%s/%s", newdir->data, new_name);
+ ndirfd = local_opendir_nofollow(ctx, newdir->data);
+ if (ndirfd == -1) {
+ close_preserve_errno(odirfd);
+ return -1;
+ }
- ret = local_rename(ctx, old_full_name.data, new_full_name.data);
- v9fs_string_free(&old_full_name);
- v9fs_string_free(&new_full_name);
+ ret = renameat(odirfd, old_name, ndirfd, new_name);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+ int omap_dirfd, nmap_dirfd;
+
+ ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700);
+ if (ret < 0 && errno != EEXIST) {
+ goto err_undo_rename;
+ }
+
+ omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR);
+ if (omap_dirfd == -1) {
+ goto err;
+ }
+
+ nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR);
+ if (nmap_dirfd == -1) {
+ close_preserve_errno(omap_dirfd);
+ goto err;
+ }
+
+ /* rename the .virtfs_metadata files */
+ ret = renameat(omap_dirfd, old_name, nmap_dirfd, new_name);
+ close_preserve_errno(nmap_dirfd);
+ close_preserve_errno(omap_dirfd);
+ if (ret < 0 && errno != ENOENT) {
+ goto err_undo_rename;
+ }
+
+ ret = 0;
+ }
+ goto out;
+
+err:
+ ret = -1;
+err_undo_rename:
+ renameat_preserve_errno(ndirfd, new_name, odirfd, old_name);
+out:
+ close_preserve_errno(ndirfd);
+ close_preserve_errno(odirfd);
return ret;
}
+static void v9fs_path_init_dirname(V9fsPath *path, const char *str)
+{
+ path->data = g_path_get_dirname(str);
+ path->size = strlen(path->data) + 1;
+}
+
+static int local_rename(FsContext *ctx, const char *oldpath,
+ const char *newpath)
+{
+ int err;
+ char *oname = g_path_get_basename(oldpath);
+ char *nname = g_path_get_basename(newpath);
+ V9fsPath olddir, newdir;
+
+ v9fs_path_init_dirname(&olddir, oldpath);
+ v9fs_path_init_dirname(&newdir, newpath);
+
+ err = local_renameat(ctx, &olddir, oname, &newdir, nname);
+
+ v9fs_path_free(&newdir);
+ v9fs_path_free(&olddir);
+ g_free(nname);
+ g_free(oname);
+
+ return err;
+}
+
static int local_unlinkat(FsContext *ctx, V9fsPath *dir,
const char *name, int flags)
{
int ret;
- V9fsString fullname;
- char *buffer;
-
- v9fs_string_init(&fullname);
+ int dirfd;
- v9fs_string_sprintf(&fullname, "%s/%s", dir->data, name);
- if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
- if (flags == AT_REMOVEDIR) {
- /*
- * If directory remove .virtfs_metadata contained in the
- * directory
- */
- buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root,
- fullname.data, VIRTFS_META_DIR);
- ret = remove(buffer);
- g_free(buffer);
- if (ret < 0 && errno != ENOENT) {
- /*
- * We didn't had the .virtfs_metadata file. May be file created
- * in non-mapped mode ?. Ignore ENOENT.
- */
- goto err_out;
- }
- }
- /*
- * Now remove the name from parent directory
- * .virtfs_metadata directory.
- */
- buffer = local_mapped_attr_path(ctx, fullname.data);
- ret = remove(buffer);
- g_free(buffer);
- if (ret < 0 && errno != ENOENT) {
- /*
- * We didn't had the .virtfs_metadata file. May be file created
- * in non-mapped mode ?. Ignore ENOENT.
- */
- goto err_out;
- }
+ dirfd = local_opendir_nofollow(ctx, dir->data);
+ if (dirfd == -1) {
+ return -1;
}
- /* Remove the name finally */
- buffer = rpath(ctx, fullname.data);
- ret = remove(buffer);
- g_free(buffer);
-err_out:
- v9fs_string_free(&fullname);
+ ret = local_unlinkat_common(ctx, dirfd, name, flags);
+ close_preserve_errno(dirfd);
return ret;
}
@@ -1168,8 +1237,31 @@ static int local_ioc_getversion(FsContext *ctx, V9fsPath *path,
static int local_init(FsContext *ctx)
{
- int err = 0;
struct statfs stbuf;
+ LocalData *data = g_malloc(sizeof(*data));
+
+ data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY);
+ if (data->mountfd == -1) {
+ goto err;
+ }
+
+#ifdef FS_IOC_GETVERSION
+ /*
+ * use ioc_getversion only if the ioctl is definied
+ */
+ if (fstatfs(data->mountfd, &stbuf) < 0) {
+ close_preserve_errno(data->mountfd);
+ goto err;
+ }
+ switch (stbuf.f_type) {
+ case EXT2_SUPER_MAGIC:
+ case BTRFS_SUPER_MAGIC:
+ case REISERFS_SUPER_MAGIC:
+ case XFS_SUPER_MAGIC:
+ ctx->exops.get_st_gen = local_ioc_getversion;
+ break;
+ }
+#endif
if (ctx->export_flags & V9FS_SM_PASSTHROUGH) {
ctx->xops = passthrough_xattr_ops;
@@ -1185,23 +1277,21 @@ static int local_init(FsContext *ctx)
ctx->xops = passthrough_xattr_ops;
}
ctx->export_flags |= V9FS_PATHNAME_FSCONTEXT;
-#ifdef FS_IOC_GETVERSION
- /*
- * use ioc_getversion only if the iocl is definied
- */
- err = statfs(ctx->fs_root, &stbuf);
- if (!err) {
- switch (stbuf.f_type) {
- case EXT2_SUPER_MAGIC:
- case BTRFS_SUPER_MAGIC:
- case REISERFS_SUPER_MAGIC:
- case XFS_SUPER_MAGIC:
- ctx->exops.get_st_gen = local_ioc_getversion;
- break;
- }
- }
-#endif
- return err;
+
+ ctx->private = data;
+ return 0;
+
+err:
+ g_free(data);
+ return -1;
+}
+
+static void local_cleanup(FsContext *ctx)
+{
+ LocalData *data = ctx->private;
+
+ close(data->mountfd);
+ g_free(data);
}
static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
@@ -1252,6 +1342,7 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
FileOperations local_ops = {
.parse_opts = local_parse_opts,
.init = local_init,
+ .cleanup = local_cleanup,
.lstat = local_lstat,
.readlink = local_readlink,
.close = local_close,
diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h
new file mode 100644
index 0000000..32c7274
--- /dev/null
+++ b/hw/9pfs/9p-local.h
@@ -0,0 +1,20 @@
+/*
+ * 9p local backend utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ * Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_9P_LOCAL_H
+#define QEMU_9P_LOCAL_H
+
+int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
+ mode_t mode);
+int local_opendir_nofollow(FsContext *fs_ctx, const char *path);
+
+#endif
diff --git a/hw/9pfs/9p-posix-acl.c b/hw/9pfs/9p-posix-acl.c
index ec00318..bbf8906 100644
--- a/hw/9pfs/9p-posix-acl.c
+++ b/hw/9pfs/9p-posix-acl.c
@@ -25,13 +25,7 @@
static ssize_t mp_pacl_getxattr(FsContext *ctx, const char *path,
const char *name, void *value, size_t size)
{
- char *buffer;
- ssize_t ret;
-
- buffer = rpath(ctx, path);
- ret = lgetxattr(buffer, MAP_ACL_ACCESS, value, size);
- g_free(buffer);
- return ret;
+ return local_getxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size);
}
static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
@@ -56,23 +50,16 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
static int mp_pacl_setxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags)
{
- char *buffer;
- int ret;
-
- buffer = rpath(ctx, path);
- ret = lsetxattr(buffer, MAP_ACL_ACCESS, value, size, flags);
- g_free(buffer);
- return ret;
+ return local_setxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size,
+ flags);
}
static int mp_pacl_removexattr(FsContext *ctx,
const char *path, const char *name)
{
int ret;
- char *buffer;
- buffer = rpath(ctx, path);
- ret = lremovexattr(buffer, MAP_ACL_ACCESS);
+ ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS);
if (ret == -1 && errno == ENODATA) {
/*
* We don't get ENODATA error when trying to remove a
@@ -82,20 +69,13 @@ static int mp_pacl_removexattr(FsContext *ctx,
errno = 0;
ret = 0;
}
- g_free(buffer);
return ret;
}
static ssize_t mp_dacl_getxattr(FsContext *ctx, const char *path,
const char *name, void *value, size_t size)
{
- char *buffer;
- ssize_t ret;
-
- buffer = rpath(ctx, path);
- ret = lgetxattr(buffer, MAP_ACL_DEFAULT, value, size);
- g_free(buffer);
- return ret;
+ return local_getxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size);
}
static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
@@ -120,23 +100,16 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
static int mp_dacl_setxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags)
{
- char *buffer;
- int ret;
-
- buffer = rpath(ctx, path);
- ret = lsetxattr(buffer, MAP_ACL_DEFAULT, value, size, flags);
- g_free(buffer);
- return ret;
+ return local_setxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size,
+ flags);
}
static int mp_dacl_removexattr(FsContext *ctx,
const char *path, const char *name)
{
int ret;
- char *buffer;
- buffer = rpath(ctx, path);
- ret = lremovexattr(buffer, MAP_ACL_DEFAULT);
+ ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT);
if (ret == -1 && errno == ENODATA) {
/*
* We don't get ENODATA error when trying to remove a
@@ -146,7 +119,6 @@ static int mp_dacl_removexattr(FsContext *ctx,
errno = 0;
ret = 0;
}
- g_free(buffer);
return ret;
}
diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c
new file mode 100644
index 0000000..fdb4d57
--- /dev/null
+++ b/hw/9pfs/9p-util.c
@@ -0,0 +1,69 @@
+/*
+ * 9p utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ * Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/xattr.h"
+#include "9p-util.h"
+
+int relative_openat_nofollow(int dirfd, const char *path, int flags,
+ mode_t mode)
+{
+ int fd;
+
+ fd = dup(dirfd);
+ if (fd == -1) {
+ return -1;
+ }
+
+ while (*path) {
+ const char *c;
+ int next_fd;
+ char *head;
+
+ /* Only relative paths without consecutive slashes */
+ assert(path[0] != '/');
+
+ head = g_strdup(path);
+ c = strchr(path, '/');
+ if (c) {
+ head[c - path] = 0;
+ next_fd = openat_dir(fd, head);
+ } else {
+ next_fd = openat_file(fd, head, flags, mode);
+ }
+ g_free(head);
+ if (next_fd == -1) {
+ close_preserve_errno(fd);
+ return -1;
+ }
+ close(fd);
+ fd = next_fd;
+
+ if (!c) {
+ break;
+ }
+ path = c + 1;
+ }
+
+ return fd;
+}
+
+ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size)
+{
+ char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+ int ret;
+
+ ret = lgetxattr(proc_path, name, value, size);
+ g_free(proc_path);
+ return ret;
+}
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
new file mode 100644
index 0000000..091f3ce
--- /dev/null
+++ b/hw/9pfs/9p-util.h
@@ -0,0 +1,54 @@
+/*
+ * 9p utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ * Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_9P_UTIL_H
+#define QEMU_9P_UTIL_H
+
+static inline void close_preserve_errno(int fd)
+{
+ int serrno = errno;
+ close(fd);
+ errno = serrno;
+}
+
+static inline int openat_dir(int dirfd, const char *name)
+{
+ return openat(dirfd, name, O_DIRECTORY | O_RDONLY | O_PATH);
+}
+
+static inline int openat_file(int dirfd, const char *name, int flags,
+ mode_t mode)
+{
+ int fd, serrno, ret;
+
+ fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK,
+ mode);
+ if (fd == -1) {
+ return -1;
+ }
+
+ serrno = errno;
+ /* O_NONBLOCK was only needed to open the file. Let's drop it. */
+ ret = fcntl(fd, F_SETFL, flags);
+ assert(!ret);
+ errno = serrno;
+ return fd;
+}
+
+int relative_openat_nofollow(int dirfd, const char *path, int flags,
+ mode_t mode);
+ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name,
+ void *value, size_t size);
+int fsetxattrat_nofollow(int dirfd, const char *path, const char *name,
+ void *value, size_t size, int flags);
+
+#endif
diff --git a/hw/9pfs/9p-xattr-user.c b/hw/9pfs/9p-xattr-user.c
index f87530c..2c90817 100644
--- a/hw/9pfs/9p-xattr-user.c
+++ b/hw/9pfs/9p-xattr-user.c
@@ -20,9 +20,6 @@
static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
const char *name, void *value, size_t size)
{
- char *buffer;
- ssize_t ret;
-
if (strncmp(name, "user.virtfs.", 12) == 0) {
/*
* Don't allow fetch of user.virtfs namesapce
@@ -31,10 +28,7 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
errno = ENOATTR;
return -1;
}
- buffer = rpath(ctx, path);
- ret = lgetxattr(buffer, name, value, size);
- g_free(buffer);
- return ret;
+ return local_getxattr_nofollow(ctx, path, name, value, size);
}
static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
@@ -73,9 +67,6 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags)
{
- char *buffer;
- int ret;
-
if (strncmp(name, "user.virtfs.", 12) == 0) {
/*
* Don't allow fetch of user.virtfs namesapce
@@ -84,18 +75,12 @@ static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
errno = EACCES;
return -1;
}
- buffer = rpath(ctx, path);
- ret = lsetxattr(buffer, name, value, size, flags);
- g_free(buffer);
- return ret;
+ return local_setxattr_nofollow(ctx, path, name, value, size, flags);
}
static int mp_user_removexattr(FsContext *ctx,
const char *path, const char *name)
{
- char *buffer;
- int ret;
-
if (strncmp(name, "user.virtfs.", 12) == 0) {
/*
* Don't allow fetch of user.virtfs namesapce
@@ -104,10 +89,7 @@ static int mp_user_removexattr(FsContext *ctx,
errno = EACCES;
return -1;
}
- buffer = rpath(ctx, path);
- ret = lremovexattr(buffer, name);
- g_free(buffer);
- return ret;
+ return local_removexattr_nofollow(ctx, path, name);
}
XattrOperations mapped_user_xattr = {
diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c
index 5d8595e..eec160b 100644
--- a/hw/9pfs/9p-xattr.c
+++ b/hw/9pfs/9p-xattr.c
@@ -15,6 +15,8 @@
#include "9p.h"
#include "fsdev/file-op-9p.h"
#include "9p-xattr.h"
+#include "9p-util.h"
+#include "9p-local.h"
static XattrOperations *get_xattr_operations(XattrOperations **h,
@@ -58,6 +60,16 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path,
return name_size;
}
+static ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+ char *list, size_t size)
+{
+ char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+ int ret;
+
+ ret = llistxattr(proc_path, list, size);
+ g_free(proc_path);
+ return ret;
+}
/*
* Get the list and pass to each layer to find out whether
@@ -67,24 +79,37 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path,
void *value, size_t vsize)
{
ssize_t size = 0;
- char *buffer;
void *ovalue = value;
XattrOperations *xops;
char *orig_value, *orig_value_start;
ssize_t xattr_len, parsed_len = 0, attr_len;
+ char *dirpath, *name;
+ int dirfd;
/* Get the actual len */
- buffer = rpath(ctx, path);
- xattr_len = llistxattr(buffer, value, 0);
+ dirpath = g_path_get_dirname(path);
+ dirfd = local_opendir_nofollow(ctx, dirpath);
+ g_free(dirpath);
+ if (dirfd == -1) {
+ return -1;
+ }
+
+ name = g_path_get_basename(path);
+ xattr_len = flistxattrat_nofollow(dirfd, name, value, 0);
if (xattr_len <= 0) {
- g_free(buffer);
+ g_free(name);
+ close_preserve_errno(dirfd);
return xattr_len;
}
/* Now fetch the xattr and find the actual size */
orig_value = g_malloc(xattr_len);
- xattr_len = llistxattr(buffer, orig_value, xattr_len);
- g_free(buffer);
+ xattr_len = flistxattrat_nofollow(dirfd, name, orig_value, xattr_len);
+ g_free(name);
+ close_preserve_errno(dirfd);
+ if (xattr_len < 0) {
+ return -1;
+ }
/* store the orig pointer */
orig_value_start = orig_value;
@@ -143,6 +168,135 @@ int v9fs_remove_xattr(FsContext *ctx,
}
+ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
+ const char *name, void *value, size_t size)
+{
+ char *dirpath = g_path_get_dirname(path);
+ char *filename = g_path_get_basename(path);
+ int dirfd;
+ ssize_t ret = -1;
+
+ dirfd = local_opendir_nofollow(ctx, dirpath);
+ if (dirfd == -1) {
+ goto out;
+ }
+
+ ret = fgetxattrat_nofollow(dirfd, filename, name, value, size);
+ close_preserve_errno(dirfd);
+out:
+ g_free(dirpath);
+ g_free(filename);
+ return ret;
+}
+
+ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
+ void *value, size_t size)
+{
+ return local_getxattr_nofollow(ctx, path, name, value, size);
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size, int flags)
+{
+ char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+ int ret;
+
+ ret = lsetxattr(proc_path, name, value, size, flags);
+ g_free(proc_path);
+ return ret;
+}
+
+ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
+ const char *name, void *value, size_t size,
+ int flags)
+{
+ char *dirpath = g_path_get_dirname(path);
+ char *filename = g_path_get_basename(path);
+ int dirfd;
+ ssize_t ret = -1;
+
+ dirfd = local_opendir_nofollow(ctx, dirpath);
+ if (dirfd == -1) {
+ goto out;
+ }
+
+ ret = fsetxattrat_nofollow(dirfd, filename, name, value, size, flags);
+ close_preserve_errno(dirfd);
+out:
+ g_free(dirpath);
+ g_free(filename);
+ return ret;
+}
+
+int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
+ size_t size, int flags)
+{
+ return local_setxattr_nofollow(ctx, path, name, value, size, flags);
+}
+
+static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+ const char *name)
+{
+ char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+ int ret;
+
+ ret = lremovexattr(proc_path, name);
+ g_free(proc_path);
+ return ret;
+}
+
+ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
+ const char *name)
+{
+ char *dirpath = g_path_get_dirname(path);
+ char *filename = g_path_get_basename(path);
+ int dirfd;
+ ssize_t ret = -1;
+
+ dirfd = local_opendir_nofollow(ctx, dirpath);
+ if (dirfd == -1) {
+ goto out;
+ }
+
+ ret = fremovexattrat_nofollow(dirfd, filename, name);
+ close_preserve_errno(dirfd);
+out:
+ g_free(dirpath);
+ g_free(filename);
+ return ret;
+}
+
+int pt_removexattr(FsContext *ctx, const char *path, const char *name)
+{
+ return local_removexattr_nofollow(ctx, path, name);
+}
+
+ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
+ void *value, size_t size)
+{
+ errno = ENOTSUP;
+ return -1;
+}
+
+int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
+ void *value, size_t size, int flags)
+{
+ errno = ENOTSUP;
+ return -1;
+}
+
+ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
+ void *value, size_t size)
+{
+ return 0;
+}
+
+int notsup_removexattr(FsContext *ctx, const char *path, const char *name)
+{
+ errno = ENOTSUP;
+ return -1;
+}
+
XattrOperations *mapped_xattr_ops[] = {
&mapped_user_xattr,
&mapped_pacl_xattr,
diff --git a/hw/9pfs/9p-xattr.h b/hw/9pfs/9p-xattr.h
index a853ea6..0d83996 100644
--- a/hw/9pfs/9p-xattr.h
+++ b/hw/9pfs/9p-xattr.h
@@ -29,6 +29,13 @@ typedef struct xattr_operations
const char *path, const char *name);
} XattrOperations;
+ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
+ const char *name, void *value, size_t size);
+ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
+ const char *name, void *value, size_t size,
+ int flags);
+ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
+ const char *name);
extern XattrOperations mapped_user_xattr;
extern XattrOperations passthrough_user_xattr;
@@ -49,73 +56,21 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value,
int v9fs_set_xattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags);
int v9fs_remove_xattr(FsContext *ctx, const char *path, const char *name);
+
ssize_t pt_listxattr(FsContext *ctx, const char *path, char *name, void *value,
size_t size);
-
-static inline ssize_t pt_getxattr(FsContext *ctx, const char *path,
- const char *name, void *value, size_t size)
-{
- char *buffer;
- ssize_t ret;
-
- buffer = rpath(ctx, path);
- ret = lgetxattr(buffer, name, value, size);
- g_free(buffer);
- return ret;
-}
-
-static inline int pt_setxattr(FsContext *ctx, const char *path,
- const char *name, void *value,
- size_t size, int flags)
-{
- char *buffer;
- int ret;
-
- buffer = rpath(ctx, path);
- ret = lsetxattr(buffer, name, value, size, flags);
- g_free(buffer);
- return ret;
-}
-
-static inline int pt_removexattr(FsContext *ctx,
- const char *path, const char *name)
-{
- char *buffer;
- int ret;
-
- buffer = rpath(ctx, path);
- ret = lremovexattr(path, name);
- g_free(buffer);
- return ret;
-}
-
-static inline ssize_t notsup_getxattr(FsContext *ctx, const char *path,
- const char *name, void *value,
- size_t size)
-{
- errno = ENOTSUP;
- return -1;
-}
-
-static inline int notsup_setxattr(FsContext *ctx, const char *path,
- const char *name, void *value,
- size_t size, int flags)
-{
- errno = ENOTSUP;
- return -1;
-}
-
-static inline ssize_t notsup_listxattr(FsContext *ctx, const char *path,
- char *name, void *value, size_t size)
-{
- return 0;
-}
-
-static inline int notsup_removexattr(FsContext *ctx,
- const char *path, const char *name)
-{
- errno = ENOTSUP;
- return -1;
-}
+ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
+ void *value, size_t size);
+int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
+ size_t size, int flags);
+int pt_removexattr(FsContext *ctx, const char *path, const char *name);
+
+ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
+ void *value, size_t size);
+int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
+ void *value, size_t size, int flags);
+ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
+ void *value, size_t size);
+int notsup_removexattr(FsContext *ctx, const char *path, const char *name);
#endif
diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs
index da0ae0c..32197e6 100644
--- a/hw/9pfs/Makefile.objs
+++ b/hw/9pfs/Makefile.objs
@@ -1,4 +1,4 @@
-common-obj-y = 9p.o
+common-obj-y = 9p.o 9p-util.o
common-obj-y += 9p-local.o 9p-xattr.o
common-obj-y += 9p-xattr-user.o 9p-posix-acl.o
common-obj-y += coth.o cofs.o codir.o cofile.o
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index d957d1e..2b0f3e1 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -49,7 +49,6 @@
#define ACPI_PCIHP_ADDR 0xae00
#define ACPI_PCIHP_SIZE 0x0014
-#define ACPI_PCIHP_LEGACY_SIZE 0x000f
#define PCI_UP_BASE 0x0000
#define PCI_DOWN_BASE 0x0004
#define PCI_EJ_BASE 0x0008
@@ -302,16 +301,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
s->root= root_bus;
s->legacy_piix = !bridges_enabled;
- if (s->legacy_piix) {
- unsigned *bus_bsel = g_malloc(sizeof *bus_bsel);
-
- s->io_len = ACPI_PCIHP_LEGACY_SIZE;
-
- *bus_bsel = ACPI_PCIHP_BSEL_DEFAULT;
- object_property_add_uint32_ptr(OBJECT(root_bus), ACPI_PCIHP_PROP_BSEL,
- bus_bsel, NULL);
- }
-
memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s,
"acpi-pci-hotplug", s->io_len);
memory_region_add_subregion(address_space_io, s->io_base, &s->io);
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 6d99fe4..a553a7e 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -440,6 +440,8 @@ static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque)
{
PIIX4PMState *s = opaque;
+ /* pci_bus cannot outlive PIIX4PMState, because /machine keeps it alive
+ * and it's not hot-unpluggable */
qbus_set_hotplug_handler(BUS(pci_bus), DEVICE(s), &error_abort);
}
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index 0c9ca7b..c8a11f2 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -8,6 +8,7 @@
*/
#include "qemu/osdep.h"
+#include "hw/arm/armv7m.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "cpu.h"
@@ -17,147 +18,260 @@
#include "elf.h"
#include "sysemu/qtest.h"
#include "qemu/error-report.h"
+#include "exec/address-spaces.h"
/* Bitbanded IO. Each word corresponds to a single bit. */
/* Get the byte address of the real memory for a bitband access. */
-static inline uint32_t bitband_addr(void * opaque, uint32_t addr)
+static inline hwaddr bitband_addr(BitBandState *s, hwaddr offset)
{
- uint32_t res;
-
- res = *(uint32_t *)opaque;
- res |= (addr & 0x1ffffff) >> 5;
- return res;
-
+ return s->base | (offset & 0x1ffffff) >> 5;
}
-static uint32_t bitband_readb(void *opaque, hwaddr offset)
+static MemTxResult bitband_read(void *opaque, hwaddr offset,
+ uint64_t *data, unsigned size, MemTxAttrs attrs)
{
- uint8_t v;
- cpu_physical_memory_read(bitband_addr(opaque, offset), &v, 1);
- return (v & (1 << ((offset >> 2) & 7))) != 0;
+ BitBandState *s = opaque;
+ uint8_t buf[4];
+ MemTxResult res;
+ int bitpos, bit;
+ hwaddr addr;
+
+ assert(size <= 4);
+
+ /* Find address in underlying memory and round down to multiple of size */
+ addr = bitband_addr(s, offset) & (-size);
+ res = address_space_read(s->source_as, addr, attrs, buf, size);
+ if (res) {
+ return res;
+ }
+ /* Bit position in the N bytes read... */
+ bitpos = (offset >> 2) & ((size * 8) - 1);
+ /* ...converted to byte in buffer and bit in byte */
+ bit = (buf[bitpos >> 3] >> (bitpos & 7)) & 1;
+ *data = bit;
+ return MEMTX_OK;
}
-static void bitband_writeb(void *opaque, hwaddr offset,
- uint32_t value)
+static MemTxResult bitband_write(void *opaque, hwaddr offset, uint64_t value,
+ unsigned size, MemTxAttrs attrs)
{
- uint32_t addr;
- uint8_t mask;
- uint8_t v;
- addr = bitband_addr(opaque, offset);
- mask = (1 << ((offset >> 2) & 7));
- cpu_physical_memory_read(addr, &v, 1);
- if (value & 1)
- v |= mask;
- else
- v &= ~mask;
- cpu_physical_memory_write(addr, &v, 1);
+ BitBandState *s = opaque;
+ uint8_t buf[4];
+ MemTxResult res;
+ int bitpos, bit;
+ hwaddr addr;
+
+ assert(size <= 4);
+
+ /* Find address in underlying memory and round down to multiple of size */
+ addr = bitband_addr(s, offset) & (-size);
+ res = address_space_read(s->source_as, addr, attrs, buf, size);
+ if (res) {
+ return res;
+ }
+ /* Bit position in the N bytes read... */
+ bitpos = (offset >> 2) & ((size * 8) - 1);
+ /* ...converted to byte in buffer and bit in byte */
+ bit = 1 << (bitpos & 7);
+ if (value & 1) {
+ buf[bitpos >> 3] |= bit;
+ } else {
+ buf[bitpos >> 3] &= ~bit;
+ }
+ return address_space_write(s->source_as, addr, attrs, buf, size);
}
-static uint32_t bitband_readw(void *opaque, hwaddr offset)
+static const MemoryRegionOps bitband_ops = {
+ .read_with_attrs = bitband_read,
+ .write_with_attrs = bitband_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 4,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 4,
+};
+
+static void bitband_init(Object *obj)
{
- uint32_t addr;
- uint16_t mask;
- uint16_t v;
- addr = bitband_addr(opaque, offset) & ~1;
- mask = (1 << ((offset >> 2) & 15));
- mask = tswap16(mask);
- cpu_physical_memory_read(addr, &v, 2);
- return (v & mask) != 0;
+ BitBandState *s = BITBAND(obj);
+ SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+
+ object_property_add_link(obj, "source-memory",
+ TYPE_MEMORY_REGION,
+ (Object **)&s->source_memory,
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_UNREF_ON_RELEASE,
+ &error_abort);
+ memory_region_init_io(&s->iomem, obj, &bitband_ops, s,
+ "bitband", 0x02000000);
+ sysbus_init_mmio(dev, &s->iomem);
}
-static void bitband_writew(void *opaque, hwaddr offset,
- uint32_t value)
+static void bitband_realize(DeviceState *dev, Error **errp)
{
- uint32_t addr;
- uint16_t mask;
- uint16_t v;
- addr = bitband_addr(opaque, offset) & ~1;
- mask = (1 << ((offset >> 2) & 15));
- mask = tswap16(mask);
- cpu_physical_memory_read(addr, &v, 2);
- if (value & 1)
- v |= mask;
- else
- v &= ~mask;
- cpu_physical_memory_write(addr, &v, 2);
+ BitBandState *s = BITBAND(dev);
+
+ if (!s->source_memory) {
+ error_setg(errp, "source-memory property not set");
+ return;
+ }
+
+ s->source_as = address_space_init_shareable(s->source_memory,
+ "bitband-source");
}
-static uint32_t bitband_readl(void *opaque, hwaddr offset)
+/* Board init. */
+
+static const hwaddr bitband_input_addr[ARMV7M_NUM_BITBANDS] = {
+ 0x20000000, 0x40000000
+};
+
+static const hwaddr bitband_output_addr[ARMV7M_NUM_BITBANDS] = {
+ 0x22000000, 0x42000000
+};
+
+static void armv7m_instance_init(Object *obj)
{
- uint32_t addr;
- uint32_t mask;
- uint32_t v;
- addr = bitband_addr(opaque, offset) & ~3;
- mask = (1 << ((offset >> 2) & 31));
- mask = tswap32(mask);
- cpu_physical_memory_read(addr, &v, 4);
- return (v & mask) != 0;
+ ARMv7MState *s = ARMV7M(obj);
+ int i;
+
+ /* Can't init the cpu here, we don't yet know which model to use */
+
+ object_property_add_link(obj, "memory",
+ TYPE_MEMORY_REGION,
+ (Object **)&s->board_memory,
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_UNREF_ON_RELEASE,
+ &error_abort);
+ memory_region_init(&s->container, obj, "armv7m-container", UINT64_MAX);
+
+ object_initialize(&s->nvic, sizeof(s->nvic), "armv7m_nvic");
+ qdev_set_parent_bus(DEVICE(&s->nvic), sysbus_get_default());
+ object_property_add_alias(obj, "num-irq",
+ OBJECT(&s->nvic), "num-irq", &error_abort);
+
+ for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
+ object_initialize(&s->bitband[i], sizeof(s->bitband[i]), TYPE_BITBAND);
+ qdev_set_parent_bus(DEVICE(&s->bitband[i]), sysbus_get_default());
+ }
}
-static void bitband_writel(void *opaque, hwaddr offset,
- uint32_t value)
+static void armv7m_realize(DeviceState *dev, Error **errp)
{
- uint32_t addr;
- uint32_t mask;
- uint32_t v;
- addr = bitband_addr(opaque, offset) & ~3;
- mask = (1 << ((offset >> 2) & 31));
- mask = tswap32(mask);
- cpu_physical_memory_read(addr, &v, 4);
- if (value & 1)
- v |= mask;
- else
- v &= ~mask;
- cpu_physical_memory_write(addr, &v, 4);
-}
+ ARMv7MState *s = ARMV7M(dev);
+ SysBusDevice *sbd;
+ Error *err = NULL;
+ int i;
+ char **cpustr;
+ ObjectClass *oc;
+ const char *typename;
+ CPUClass *cc;
+
+ if (!s->board_memory) {
+ error_setg(errp, "memory property was not set");
+ return;
+ }
-static const MemoryRegionOps bitband_ops = {
- .old_mmio = {
- .read = { bitband_readb, bitband_readw, bitband_readl, },
- .write = { bitband_writeb, bitband_writew, bitband_writel, },
- },
- .endianness = DEVICE_NATIVE_ENDIAN,
-};
+ memory_region_add_subregion_overlap(&s->container, 0, s->board_memory, -1);
-#define TYPE_BITBAND "ARM,bitband-memory"
-#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND)
+ cpustr = g_strsplit(s->cpu_model, ",", 2);
-typedef struct {
- /*< private >*/
- SysBusDevice parent_obj;
- /*< public >*/
+ oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]);
+ if (!oc) {
+ error_setg(errp, "Unknown CPU model %s", cpustr[0]);
+ g_strfreev(cpustr);
+ return;
+ }
- MemoryRegion iomem;
- uint32_t base;
-} BitBandState;
+ cc = CPU_CLASS(oc);
+ typename = object_class_get_name(oc);
+ cc->parse_features(typename, cpustr[1], &err);
+ g_strfreev(cpustr);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
-static void bitband_init(Object *obj)
-{
- BitBandState *s = BITBAND(obj);
- SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+ s->cpu = ARM_CPU(object_new(typename));
+ if (!s->cpu) {
+ error_setg(errp, "Unknown CPU model %s", s->cpu_model);
+ return;
+ }
- memory_region_init_io(&s->iomem, obj, &bitband_ops, &s->base,
- "bitband", 0x02000000);
- sysbus_init_mmio(dev, &s->iomem);
+ object_property_set_link(OBJECT(s->cpu), OBJECT(&s->container), "memory",
+ &error_abort);
+ object_property_set_bool(OBJECT(s->cpu), true, "realized", &err);
+ if (err != NULL) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ /* Note that we must realize the NVIC after the CPU */
+ object_property_set_bool(OBJECT(&s->nvic), true, "realized", &err);
+ if (err != NULL) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ /* Alias the NVIC's input and output GPIOs as our own so the board
+ * code can wire them up. (We do this in realize because the
+ * NVIC doesn't create the input GPIO array until realize.)
+ */
+ qdev_pass_gpios(DEVICE(&s->nvic), dev, NULL);
+ qdev_pass_gpios(DEVICE(&s->nvic), dev, "SYSRESETREQ");
+
+ /* Wire the NVIC up to the CPU */
+ sbd = SYS_BUS_DEVICE(&s->nvic);
+ sysbus_connect_irq(sbd, 0,
+ qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
+ s->cpu->env.nvic = &s->nvic;
+
+ memory_region_add_subregion(&s->container, 0xe000e000,
+ sysbus_mmio_get_region(sbd, 0));
+
+ for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
+ Object *obj = OBJECT(&s->bitband[i]);
+ SysBusDevice *sbd = SYS_BUS_DEVICE(&s->bitband[i]);
+
+ object_property_set_int(obj, bitband_input_addr[i], "base", &err);
+ if (err != NULL) {
+ error_propagate(errp, err);
+ return;
+ }
+ object_property_set_link(obj, OBJECT(s->board_memory),
+ "source-memory", &error_abort);
+ object_property_set_bool(obj, true, "realized", &err);
+ if (err != NULL) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ memory_region_add_subregion(&s->container, bitband_output_addr[i],
+ sysbus_mmio_get_region(sbd, 0));
+ }
}
-static void armv7m_bitband_init(void)
-{
- DeviceState *dev;
+static Property armv7m_properties[] = {
+ DEFINE_PROP_STRING("cpu-model", ARMv7MState, cpu_model),
+ DEFINE_PROP_END_OF_LIST(),
+};
- dev = qdev_create(NULL, TYPE_BITBAND);
- qdev_prop_set_uint32(dev, "base", 0x20000000);
- qdev_init_nofail(dev);
- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x22000000);
+static void armv7m_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
- dev = qdev_create(NULL, TYPE_BITBAND);
- qdev_prop_set_uint32(dev, "base", 0x40000000);
- qdev_init_nofail(dev);
- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x42000000);
+ dc->realize = armv7m_realize;
+ dc->props = armv7m_properties;
}
-/* Board init. */
+static const TypeInfo armv7m_info = {
+ .name = TYPE_ARMV7M,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(ARMv7MState),
+ .instance_init = armv7m_instance_init,
+ .class_init = armv7m_class_init,
+};
static void armv7m_reset(void *opaque)
{
@@ -168,37 +282,35 @@ static void armv7m_reset(void *opaque)
/* Init CPU and memory for a v7-M based board.
mem_size is in bytes.
- Returns the NVIC array. */
+ Returns the ARMv7M device. */
DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
const char *kernel_filename, const char *cpu_model)
{
- ARMCPU *cpu;
- CPUARMState *env;
- DeviceState *nvic;
- int image_size;
- uint64_t entry;
- uint64_t lowaddr;
- int big_endian;
+ DeviceState *armv7m;
if (cpu_model == NULL) {
- cpu_model = "cortex-m3";
- }
- cpu = cpu_arm_init(cpu_model);
- if (cpu == NULL) {
- fprintf(stderr, "Unable to find CPU definition\n");
- exit(1);
+ cpu_model = "cortex-m3";
}
- env = &cpu->env;
- armv7m_bitband_init();
+ armv7m = qdev_create(NULL, "armv7m");
+ qdev_prop_set_uint32(armv7m, "num-irq", num_irq);
+ qdev_prop_set_string(armv7m, "cpu-model", cpu_model);
+ object_property_set_link(OBJECT(armv7m), OBJECT(get_system_memory()),
+ "memory", &error_abort);
+ /* This will exit with an error if the user passed us a bad cpu_model */
+ qdev_init_nofail(armv7m);
+
+ armv7m_load_kernel(ARM_CPU(first_cpu), kernel_filename, mem_size);
+ return armv7m;
+}
- nvic = qdev_create(NULL, "armv7m_nvic");
- qdev_prop_set_uint32(nvic, "num-irq", num_irq);
- env->nvic = nvic;
- qdev_init_nofail(nvic);
- sysbus_connect_irq(SYS_BUS_DEVICE(nvic), 0,
- qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ));
+void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size)
+{
+ int image_size;
+ uint64_t entry;
+ uint64_t lowaddr;
+ int big_endian;
#ifdef TARGET_WORDS_BIGENDIAN
big_endian = 1;
@@ -224,8 +336,15 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
}
}
+ /* CPU objects (unlike devices) are not automatically reset on system
+ * reset, so we must always register a handler to do so. Unlike
+ * A-profile CPUs, we don't need to do anything special in the
+ * handler to arrange that it starts correctly.
+ * This is arguably the wrong place to do this, but it matches the
+ * way A-profile does it. Note that this means that every M profile
+ * board must call this function!
+ */
qemu_register_reset(armv7m_reset, cpu);
- return nvic;
}
static Property bitband_properties[] = {
@@ -237,6 +356,7 @@ static void bitband_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
+ dc->realize = bitband_realize;
dc->props = bitband_properties;
}
@@ -251,6 +371,7 @@ static const TypeInfo bitband_info = {
static void armv7m_register_types(void)
{
type_register_static(&bitband_info);
+ type_register_static(&armv7m_info);
}
type_init(armv7m_register_types)
diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
index 9ed22d5..369ef1e 100644
--- a/hw/arm/bcm2835_peripherals.c
+++ b/hw/arm/bcm2835_peripherals.c
@@ -96,6 +96,11 @@ static void bcm2835_peripherals_init(Object *obj)
object_property_add_child(obj, "sdhci", OBJECT(&s->sdhci), NULL);
qdev_set_parent_bus(DEVICE(&s->sdhci), sysbus_get_default());
+ /* SDHOST */
+ object_initialize(&s->sdhost, sizeof(s->sdhost), TYPE_BCM2835_SDHOST);
+ object_property_add_child(obj, "sdhost", OBJECT(&s->sdhost), NULL);
+ qdev_set_parent_bus(DEVICE(&s->sdhost), sysbus_get_default());
+
/* DMA Channels */
object_initialize(&s->dma, sizeof(s->dma), TYPE_BCM2835_DMA);
object_property_add_child(obj, "dma", OBJECT(&s->dma), NULL);
@@ -103,6 +108,16 @@ static void bcm2835_peripherals_init(Object *obj)
object_property_add_const_link(OBJECT(&s->dma), "dma-mr",
OBJECT(&s->gpu_bus_mr), &error_abort);
+
+ /* GPIO */
+ object_initialize(&s->gpio, sizeof(s->gpio), TYPE_BCM2835_GPIO);
+ object_property_add_child(obj, "gpio", OBJECT(&s->gpio), NULL);
+ qdev_set_parent_bus(DEVICE(&s->gpio), sysbus_get_default());
+
+ object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhci",
+ OBJECT(&s->sdhci.sdbus), &error_abort);
+ object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost",
+ OBJECT(&s->sdhost.sdbus), &error_abort);
}
static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
@@ -267,13 +282,20 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci), 0,
qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
INTERRUPT_ARASANSDIO));
- object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->sdhci), "sd-bus",
- &err);
+
+ /* SDHOST */
+ object_property_set_bool(OBJECT(&s->sdhost), true, "realized", &err);
if (err) {
error_propagate(errp, err);
return;
}
+ memory_region_add_subregion(&s->peri_mr, MMCI0_OFFSET,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->sdhost), 0));
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhost), 0,
+ qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
+ INTERRUPT_SDIO));
+
/* DMA Channels */
object_property_set_bool(OBJECT(&s->dma), true, "realized", &err);
if (err) {
@@ -292,6 +314,23 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
BCM2835_IC_GPU_IRQ,
INTERRUPT_DMA0 + n));
}
+
+ /* GPIO */
+ object_property_set_bool(OBJECT(&s->gpio), true, "realized", &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ memory_region_add_subregion(&s->peri_mr, GPIO_OFFSET,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->gpio), 0));
+
+ object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->gpio), "sd-bus",
+ &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
}
static void bcm2835_peripherals_class_init(ObjectClass *oc, void *data)
diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c
index 23d7928..3cfe332 100644
--- a/hw/arm/netduino2.c
+++ b/hw/arm/netduino2.c
@@ -27,17 +27,18 @@
#include "hw/boards.h"
#include "qemu/error-report.h"
#include "hw/arm/stm32f205_soc.h"
+#include "hw/arm/arm.h"
static void netduino2_init(MachineState *machine)
{
DeviceState *dev;
dev = qdev_create(NULL, TYPE_STM32F205_SOC);
- if (machine->kernel_filename) {
- qdev_prop_set_string(dev, "kernel-filename", machine->kernel_filename);
- }
qdev_prop_set_string(dev, "cpu-model", "cortex-m3");
object_property_set_bool(OBJECT(dev), true, "realized", &error_fatal);
+
+ armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
+ FLASH_SIZE);
}
static void netduino2_machine_init(MachineClass *mc)
diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c
index 38425bd..6e1260d 100644
--- a/hw/arm/stm32f205_soc.c
+++ b/hw/arm/stm32f205_soc.c
@@ -49,6 +49,9 @@ static void stm32f205_soc_initfn(Object *obj)
STM32F205State *s = STM32F205_SOC(obj);
int i;
+ object_initialize(&s->armv7m, sizeof(s->armv7m), TYPE_ARMV7M);
+ qdev_set_parent_bus(DEVICE(&s->armv7m), sysbus_get_default());
+
object_initialize(&s->syscfg, sizeof(s->syscfg), TYPE_STM32F2XX_SYSCFG);
qdev_set_parent_bus(DEVICE(&s->syscfg), sysbus_get_default());
@@ -82,7 +85,7 @@ static void stm32f205_soc_initfn(Object *obj)
static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
{
STM32F205State *s = STM32F205_SOC(dev_soc);
- DeviceState *dev, *nvic;
+ DeviceState *dev, *armv7m;
SysBusDevice *busdev;
Error *err = NULL;
int i;
@@ -110,8 +113,16 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
vmstate_register_ram_global(sram);
memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram);
- nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96,
- s->kernel_filename, s->cpu_model);
+ armv7m = DEVICE(&s->armv7m);
+ qdev_prop_set_uint32(armv7m, "num-irq", 96);
+ qdev_prop_set_string(armv7m, "cpu-model", s->cpu_model);
+ object_property_set_link(OBJECT(&s->armv7m), OBJECT(get_system_memory()),
+ "memory", &error_abort);
+ object_property_set_bool(OBJECT(&s->armv7m), true, "realized", &err);
+ if (err != NULL) {
+ error_propagate(errp, err);
+ return;
+ }
/* System configuration controller */
dev = DEVICE(&s->syscfg);
@@ -122,7 +133,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
}
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, 0x40013800);
- sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, 71));
+ sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, 71));
/* Attach UART (uses USART registers) and USART controllers */
for (i = 0; i < STM_NUM_USARTS; i++) {
@@ -136,7 +147,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
}
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, usart_addr[i]);
- sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, usart_irq[i]));
+ sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i]));
}
/* Timer 2 to 5 */
@@ -150,7 +161,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
}
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, timer_addr[i]);
- sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, timer_irq[i]));
+ sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, timer_irq[i]));
}
/* ADC 1 to 3 */
@@ -162,7 +173,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
return;
}
qdev_connect_gpio_out(DEVICE(s->adc_irqs), 0,
- qdev_get_gpio_in(nvic, ADC_IRQ));
+ qdev_get_gpio_in(armv7m, ADC_IRQ));
for (i = 0; i < STM_NUM_ADCS; i++) {
dev = DEVICE(&(s->adc[i]));
@@ -187,12 +198,11 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
}
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, spi_addr[i]);
- sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, spi_irq[i]));
+ sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, spi_irq[i]));
}
}
static Property stm32f205_soc_properties[] = {
- DEFINE_PROP_STRING("kernel-filename", STM32F205State, kernel_filename),
DEFINE_PROP_STRING("cpu-model", STM32F205State, cpu_model),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/block/block.c b/hw/block/block.c
index 8dc9d84..27878d0 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -51,11 +51,33 @@ void blkconf_blocksizes(BlockConf *conf)
}
}
-void blkconf_apply_backend_options(BlockConf *conf)
+void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+ bool resizable, Error **errp)
{
BlockBackend *blk = conf->blk;
BlockdevOnError rerror, werror;
+ uint64_t perm, shared_perm;
bool wce;
+ int ret;
+
+ perm = BLK_PERM_CONSISTENT_READ;
+ if (!readonly) {
+ perm |= BLK_PERM_WRITE;
+ }
+
+ shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_GRAPH_MOD;
+ if (resizable) {
+ shared_perm |= BLK_PERM_RESIZE;
+ }
+ if (conf->share_rw) {
+ shared_perm |= BLK_PERM_WRITE;
+ }
+
+ ret = blk_set_perm(blk, perm, shared_perm, errp);
+ if (ret < 0) {
+ return;
+ }
switch (conf->wce) {
case ON_OFF_AUTO_ON: wce = true; break;
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 17d29e7..a328693 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -186,6 +186,7 @@ typedef enum FDiskFlags {
struct FDrive {
FDCtrl *fdctrl;
BlockBackend *blk;
+ BlockConf *conf;
/* Drive status */
FloppyDriveType drive; /* CMOS drive type */
uint8_t perpendicular; /* 2.88 MB access mode */
@@ -469,9 +470,22 @@ static void fd_revalidate(FDrive *drv)
}
}
-static void fd_change_cb(void *opaque, bool load)
+static void fd_change_cb(void *opaque, bool load, Error **errp)
{
FDrive *drive = opaque;
+ Error *local_err = NULL;
+
+ if (!load) {
+ blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort);
+ } else {
+ blkconf_apply_backend_options(drive->conf,
+ blk_is_read_only(drive->blk), false,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
drive->media_changed = 1;
drive->media_validated = false;
@@ -508,6 +522,7 @@ static int floppy_drive_init(DeviceState *qdev)
FloppyDrive *dev = FLOPPY_DRIVE(qdev);
FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus);
FDrive *drive;
+ Error *local_err = NULL;
int ret;
if (dev->unit == -1) {
@@ -533,7 +548,7 @@ static int floppy_drive_init(DeviceState *qdev)
if (!dev->conf.blk) {
/* Anonymous BlockBackend for an empty drive */
- dev->conf.blk = blk_new();
+ dev->conf.blk = blk_new(0, BLK_PERM_ALL);
ret = blk_attach_dev(dev->conf.blk, qdev);
assert(ret == 0);
}
@@ -551,7 +566,13 @@ static int floppy_drive_init(DeviceState *qdev)
* blkconf_apply_backend_options(). */
dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO;
dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO;
- blkconf_apply_backend_options(&dev->conf);
+
+ blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk),
+ false, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
/* 'enospc' is the default for -drive, 'report' is what blk_new() gives us
* for empty drives. */
@@ -565,6 +586,7 @@ static int floppy_drive_init(DeviceState *qdev)
return -1;
}
+ drive->conf = &dev->conf;
drive->blk = dev->conf.blk;
drive->fdctrl = bus->fdc;
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 2d6eb46..190573c 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -1215,6 +1215,7 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
{
Flash *s = M25P80(ss);
M25P80Class *mc = M25P80_GET_CLASS(s);
+ int ret;
s->pi = mc->pi;
@@ -1222,6 +1223,13 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
s->dirty_page = -1;
if (s->blk) {
+ uint64_t perm = BLK_PERM_CONSISTENT_READ |
+ (blk_is_read_only(s->blk) ? 0 : BLK_PERM_WRITE);
+ ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
+
DB_PRINT_L(0, "Binding to IF_MTD drive\n");
s->storage = blk_blockalign(s->blk, s->size);
diff --git a/hw/block/nand.c b/hw/block/nand.c
index c69e675..0d33ac2 100644
--- a/hw/block/nand.c
+++ b/hw/block/nand.c
@@ -373,6 +373,8 @@ static void nand_realize(DeviceState *dev, Error **errp)
{
int pagesize;
NANDFlashState *s = NAND(dev);
+ int ret;
+
s->buswidth = nand_flash_ids[s->chip_id].width >> 3;
s->size = nand_flash_ids[s->chip_id].size << 20;
@@ -407,6 +409,11 @@ static void nand_realize(DeviceState *dev, Error **errp)
error_setg(errp, "Can't use a read-only drive");
return;
}
+ ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+ BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
if (blk_getlength(s->blk) >=
(s->pages << s->page_shift) + (s->pages << s->oob_shift)) {
pagesize = 0;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index ae91a18..ae303d4 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -835,6 +835,7 @@ static int nvme_init(PCIDevice *pci_dev)
int i;
int64_t bs_size;
uint8_t *pci_conf;
+ Error *local_err = NULL;
if (!n->conf.blk) {
return -1;
@@ -850,7 +851,12 @@ static int nvme_init(PCIDevice *pci_dev)
return -1;
}
blkconf_blocksizes(&n->conf);
- blkconf_apply_backend_options(&n->conf);
+ blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
+ false, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
pci_conf = pci_dev->config;
pci_conf[PCI_INTERRUPT_PIN] = 1;
diff --git a/hw/block/onenand.c b/hw/block/onenand.c
index 8d84227..ddf5492 100644
--- a/hw/block/onenand.c
+++ b/hw/block/onenand.c
@@ -778,6 +778,7 @@ static int onenand_initfn(SysBusDevice *sbd)
OneNANDState *s = ONE_NAND(dev);
uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7));
void *ram;
+ Error *local_err = NULL;
s->base = (hwaddr)-1;
s->rdy = NULL;
@@ -796,6 +797,12 @@ static int onenand_initfn(SysBusDevice *sbd)
error_report("Can't use a read-only drive");
return -1;
}
+ blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+ BLK_PERM_ALL, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
s->blk_cur = s->blk;
}
s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT),
diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 71b98a3..594d4cf 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -758,6 +758,18 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
if (pfl->blk) {
+ uint64_t perm;
+ pfl->ro = blk_is_read_only(pfl->blk);
+ perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
+ ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
+ } else {
+ pfl->ro = 0;
+ }
+
+ if (pfl->blk) {
/* read the initial flash content */
ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);
@@ -768,12 +780,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
}
}
- if (pfl->blk) {
- pfl->ro = blk_is_read_only(pfl->blk);
- } else {
- pfl->ro = 0;
- }
-
/* Default to devices being used at their maximum device width. This was
* assumed before the device_width support was added.
*/
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index ef71322..e6c5c6c 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -632,6 +632,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl));
pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem);
pfl->chip_len = chip_len;
+
+ if (pfl->blk) {
+ uint64_t perm;
+ pfl->ro = blk_is_read_only(pfl->blk);
+ perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
+ ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
+ } else {
+ pfl->ro = 0;
+ }
+
if (pfl->blk) {
/* read the initial flash content */
ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);
@@ -646,12 +659,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
pfl->rom_mode = 1;
sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
- if (pfl->blk) {
- pfl->ro = blk_is_read_only(pfl->blk);
- } else {
- pfl->ro = 0;
- }
-
pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl);
pfl->wcycle = 0;
pfl->cmd = 0;
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 843bd2f..98c16a7 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -928,7 +928,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
}
blkconf_serial(&conf->conf, &conf->serial);
- blkconf_apply_backend_options(&conf->conf);
+ blkconf_apply_backend_options(&conf->conf,
+ blk_is_read_only(conf->conf.blk), true,
+ &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
s->original_wce = blk_enable_write_cache(conf->conf.blk);
blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err);
if (err) {
diff --git a/hw/core/bus.c b/hw/core/bus.c
index cf383fc..4651f24 100644
--- a/hw/core/bus.c
+++ b/hw/core/bus.c
@@ -197,7 +197,7 @@ static void qbus_initfn(Object *obj)
TYPE_HOTPLUG_HANDLER,
(Object **)&bus->hotplug_handler,
object_property_allow_set_link,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
+ 0,
NULL);
object_property_add_bool(obj, "realized",
bus_get_realized, bus_set_realized, NULL);
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 8b980e9..bf17b42 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -435,6 +435,19 @@ int load_elf_as(const char *filename,
uint64_t *highaddr, int big_endian, int elf_machine,
int clear_lsb, int data_swab, AddressSpace *as)
{
+ return load_elf_ram(filename, translate_fn, translate_opaque,
+ pentry, lowaddr, highaddr, big_endian, elf_machine,
+ clear_lsb, data_swab, as, true);
+}
+
+/* return < 0 if error, otherwise the number of bytes loaded in memory */
+int load_elf_ram(const char *filename,
+ uint64_t (*translate_fn)(void *, uint64_t),
+ void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+ uint64_t *highaddr, int big_endian, int elf_machine,
+ int clear_lsb, int data_swab, AddressSpace *as,
+ bool load_rom)
+{
int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED;
uint8_t e_ident[EI_NIDENT];
@@ -473,11 +486,11 @@ int load_elf_as(const char *filename,
if (e_ident[EI_CLASS] == ELFCLASS64) {
ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab,
pentry, lowaddr, highaddr, elf_machine, clear_lsb,
- data_swab, as);
+ data_swab, as, load_rom);
} else {
ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab,
pentry, lowaddr, highaddr, elf_machine, clear_lsb,
- data_swab, as);
+ data_swab, as, load_rom);
}
fail:
diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c
index 3af82af..59ccb00 100644
--- a/hw/core/ptimer.c
+++ b/hw/core/ptimer.c
@@ -12,6 +12,7 @@
#include "qemu/host-utils.h"
#include "sysemu/replay.h"
#include "sysemu/qtest.h"
+#include "block/aio.h"
#define DELTA_ADJUST 1
#define DELTA_NO_ADJUST -1
@@ -353,3 +354,10 @@ ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask)
s->policy_mask = policy_mask;
return s;
}
+
+void ptimer_free(ptimer_state *s)
+{
+ qemu_bh_delete(s->bh);
+ timer_free(s->timer);
+ g_free(s);
+}
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 94f4d8b..c34be1c 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -73,14 +73,19 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr,
{
BlockBackend *blk;
bool blk_created = false;
+ int ret;
blk = blk_by_name(str);
if (!blk) {
BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
if (bs) {
- blk = blk_new();
- blk_insert_bs(blk, bs);
+ blk = blk_new(0, BLK_PERM_ALL);
blk_created = true;
+
+ ret = blk_insert_bs(blk, bs, errp);
+ if (ret < 0) {
+ goto fail;
+ }
}
}
if (!blk) {
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 06ba02e..1e7fb33 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -37,6 +37,7 @@
#include "hw/boards.h"
#include "hw/sysbus.h"
#include "qapi-event.h"
+#include "migration/migration.h"
int qdev_hotplug = 0;
static bool qdev_hot_added = false;
@@ -102,9 +103,23 @@ static void bus_add_child(BusState *bus, DeviceState *child)
void qdev_set_parent_bus(DeviceState *dev, BusState *bus)
{
+ bool replugging = dev->parent_bus != NULL;
+
+ if (replugging) {
+ /* Keep a reference to the device while it's not plugged into
+ * any bus, to avoid it potentially evaporating when it is
+ * dereffed in bus_remove_child().
+ */
+ object_ref(OBJECT(dev));
+ bus_remove_child(dev->parent_bus, dev);
+ object_unref(OBJECT(dev->parent_bus));
+ }
dev->parent_bus = bus;
object_ref(OBJECT(bus));
bus_add_child(bus, dev);
+ if (replugging) {
+ object_unref(OBJECT(dev));
+ }
}
/* Create a new device. This only initializes the device state
@@ -889,6 +904,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
Error *local_err = NULL;
bool unattached_parent = false;
static int unattached_count;
+ int ret;
if (dev->hotplugged && !dc->hotpluggable) {
error_setg(errp, QERR_DEVICE_NO_HOTPLUG, object_get_typename(obj));
@@ -896,6 +912,11 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
}
if (value && !dev->realized) {
+ ret = check_migratable(obj, &local_err);
+ if (ret < 0) {
+ goto fail;
+ }
+
if (!obj->parent) {
gchar *name = g_strdup_printf("device[%d]", unattached_count++);
diff --git a/hw/gpio/Makefile.objs b/hw/gpio/Makefile.objs
index a43c7cf..fa0a72e 100644
--- a/hw/gpio/Makefile.objs
+++ b/hw/gpio/Makefile.objs
@@ -7,3 +7,4 @@ common-obj-$(CONFIG_GPIO_KEY) += gpio_key.o
obj-$(CONFIG_OMAP) += omap_gpio.o
obj-$(CONFIG_IMX) += imx_gpio.o
+obj-$(CONFIG_RASPI) += bcm2835_gpio.o
diff --git a/hw/gpio/bcm2835_gpio.c b/hw/gpio/bcm2835_gpio.c
new file mode 100644
index 0000000..acc2e3c
--- /dev/null
+++ b/hw/gpio/bcm2835_gpio.c
@@ -0,0 +1,353 @@
+/*
+ * Raspberry Pi (BCM2835) GPIO Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ * Clement Deschamps <clement.deschamps@antfield.fr>
+ * Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "qapi/error.h"
+#include "hw/sysbus.h"
+#include "hw/sd/sd.h"
+#include "hw/gpio/bcm2835_gpio.h"
+
+#define GPFSEL0 0x00
+#define GPFSEL1 0x04
+#define GPFSEL2 0x08
+#define GPFSEL3 0x0C
+#define GPFSEL4 0x10
+#define GPFSEL5 0x14
+#define GPSET0 0x1C
+#define GPSET1 0x20
+#define GPCLR0 0x28
+#define GPCLR1 0x2C
+#define GPLEV0 0x34
+#define GPLEV1 0x38
+#define GPEDS0 0x40
+#define GPEDS1 0x44
+#define GPREN0 0x4C
+#define GPREN1 0x50
+#define GPFEN0 0x58
+#define GPFEN1 0x5C
+#define GPHEN0 0x64
+#define GPHEN1 0x68
+#define GPLEN0 0x70
+#define GPLEN1 0x74
+#define GPAREN0 0x7C
+#define GPAREN1 0x80
+#define GPAFEN0 0x88
+#define GPAFEN1 0x8C
+#define GPPUD 0x94
+#define GPPUDCLK0 0x98
+#define GPPUDCLK1 0x9C
+
+static uint32_t gpfsel_get(BCM2835GpioState *s, uint8_t reg)
+{
+ int i;
+ uint32_t value = 0;
+ for (i = 0; i < 10; i++) {
+ uint32_t index = 10 * reg + i;
+ if (index < sizeof(s->fsel)) {
+ value |= (s->fsel[index] & 0x7) << (3 * i);
+ }
+ }
+ return value;
+}
+
+static void gpfsel_set(BCM2835GpioState *s, uint8_t reg, uint32_t value)
+{
+ int i;
+ for (i = 0; i < 10; i++) {
+ uint32_t index = 10 * reg + i;
+ if (index < sizeof(s->fsel)) {
+ int fsel = (value >> (3 * i)) & 0x7;
+ s->fsel[index] = fsel;
+ }
+ }
+
+ /* SD controller selection (48-53) */
+ if (s->sd_fsel != 0
+ && (s->fsel[48] == 0) /* SD_CLK_R */
+ && (s->fsel[49] == 0) /* SD_CMD_R */
+ && (s->fsel[50] == 0) /* SD_DATA0_R */
+ && (s->fsel[51] == 0) /* SD_DATA1_R */
+ && (s->fsel[52] == 0) /* SD_DATA2_R */
+ && (s->fsel[53] == 0) /* SD_DATA3_R */
+ ) {
+ /* SDHCI controller selected */
+ sdbus_reparent_card(s->sdbus_sdhost, s->sdbus_sdhci);
+ s->sd_fsel = 0;
+ } else if (s->sd_fsel != 4
+ && (s->fsel[48] == 4) /* SD_CLK_R */
+ && (s->fsel[49] == 4) /* SD_CMD_R */
+ && (s->fsel[50] == 4) /* SD_DATA0_R */
+ && (s->fsel[51] == 4) /* SD_DATA1_R */
+ && (s->fsel[52] == 4) /* SD_DATA2_R */
+ && (s->fsel[53] == 4) /* SD_DATA3_R */
+ ) {
+ /* SDHost controller selected */
+ sdbus_reparent_card(s->sdbus_sdhci, s->sdbus_sdhost);
+ s->sd_fsel = 4;
+ }
+}
+
+static int gpfsel_is_out(BCM2835GpioState *s, int index)
+{
+ if (index >= 0 && index < 54) {
+ return s->fsel[index] == 1;
+ }
+ return 0;
+}
+
+static void gpset(BCM2835GpioState *s,
+ uint32_t val, uint8_t start, uint8_t count, uint32_t *lev)
+{
+ uint32_t changes = val & ~*lev;
+ uint32_t cur = 1;
+
+ int i;
+ for (i = 0; i < count; i++) {
+ if ((changes & cur) && (gpfsel_is_out(s, start + i))) {
+ qemu_set_irq(s->out[start + i], 1);
+ }
+ cur <<= 1;
+ }
+
+ *lev |= val;
+}
+
+static void gpclr(BCM2835GpioState *s,
+ uint32_t val, uint8_t start, uint8_t count, uint32_t *lev)
+{
+ uint32_t changes = val & *lev;
+ uint32_t cur = 1;
+
+ int i;
+ for (i = 0; i < count; i++) {
+ if ((changes & cur) && (gpfsel_is_out(s, start + i))) {
+ qemu_set_irq(s->out[start + i], 0);
+ }
+ cur <<= 1;
+ }
+
+ *lev &= ~val;
+}
+
+static uint64_t bcm2835_gpio_read(void *opaque, hwaddr offset,
+ unsigned size)
+{
+ BCM2835GpioState *s = (BCM2835GpioState *)opaque;
+
+ switch (offset) {
+ case GPFSEL0:
+ case GPFSEL1:
+ case GPFSEL2:
+ case GPFSEL3:
+ case GPFSEL4:
+ case GPFSEL5:
+ return gpfsel_get(s, offset / 4);
+ case GPSET0:
+ case GPSET1:
+ /* Write Only */
+ return 0;
+ case GPCLR0:
+ case GPCLR1:
+ /* Write Only */
+ return 0;
+ case GPLEV0:
+ return s->lev0;
+ case GPLEV1:
+ return s->lev1;
+ case GPEDS0:
+ case GPEDS1:
+ case GPREN0:
+ case GPREN1:
+ case GPFEN0:
+ case GPFEN1:
+ case GPHEN0:
+ case GPHEN1:
+ case GPLEN0:
+ case GPLEN1:
+ case GPAREN0:
+ case GPAREN1:
+ case GPAFEN0:
+ case GPAFEN1:
+ case GPPUD:
+ case GPPUDCLK0:
+ case GPPUDCLK1:
+ /* Not implemented */
+ return 0;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+ __func__, offset);
+ break;
+ }
+
+ return 0;
+}
+
+static void bcm2835_gpio_write(void *opaque, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+ BCM2835GpioState *s = (BCM2835GpioState *)opaque;
+
+ switch (offset) {
+ case GPFSEL0:
+ case GPFSEL1:
+ case GPFSEL2:
+ case GPFSEL3:
+ case GPFSEL4:
+ case GPFSEL5:
+ gpfsel_set(s, offset / 4, value);
+ break;
+ case GPSET0:
+ gpset(s, value, 0, 32, &s->lev0);
+ break;
+ case GPSET1:
+ gpset(s, value, 32, 22, &s->lev1);
+ break;
+ case GPCLR0:
+ gpclr(s, value, 0, 32, &s->lev0);
+ break;
+ case GPCLR1:
+ gpclr(s, value, 32, 22, &s->lev1);
+ break;
+ case GPLEV0:
+ case GPLEV1:
+ /* Read Only */
+ break;
+ case GPEDS0:
+ case GPEDS1:
+ case GPREN0:
+ case GPREN1:
+ case GPFEN0:
+ case GPFEN1:
+ case GPHEN0:
+ case GPHEN1:
+ case GPLEN0:
+ case GPLEN1:
+ case GPAREN0:
+ case GPAREN1:
+ case GPAFEN0:
+ case GPAFEN1:
+ case GPPUD:
+ case GPPUDCLK0:
+ case GPPUDCLK1:
+ /* Not implemented */
+ break;
+ default:
+ goto err_out;
+ }
+ return;
+
+err_out:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+ __func__, offset);
+}
+
+static void bcm2835_gpio_reset(DeviceState *dev)
+{
+ BCM2835GpioState *s = BCM2835_GPIO(dev);
+
+ int i;
+ for (i = 0; i < 6; i++) {
+ gpfsel_set(s, i, 0);
+ }
+
+ s->sd_fsel = 0;
+
+ /* SDHCI is selected by default */
+ sdbus_reparent_card(&s->sdbus, s->sdbus_sdhci);
+
+ s->lev0 = 0;
+ s->lev1 = 0;
+}
+
+static const MemoryRegionOps bcm2835_gpio_ops = {
+ .read = bcm2835_gpio_read,
+ .write = bcm2835_gpio_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_bcm2835_gpio = {
+ .name = "bcm2835_gpio",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8_ARRAY(fsel, BCM2835GpioState, 54),
+ VMSTATE_UINT32(lev0, BCM2835GpioState),
+ VMSTATE_UINT32(lev1, BCM2835GpioState),
+ VMSTATE_UINT8(sd_fsel, BCM2835GpioState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void bcm2835_gpio_init(Object *obj)
+{
+ BCM2835GpioState *s = BCM2835_GPIO(obj);
+ DeviceState *dev = DEVICE(obj);
+ SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+ qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
+ TYPE_SD_BUS, DEVICE(s), "sd-bus");
+
+ memory_region_init_io(&s->iomem, obj,
+ &bcm2835_gpio_ops, s, "bcm2835_gpio", 0x1000);
+ sysbus_init_mmio(sbd, &s->iomem);
+ qdev_init_gpio_out(dev, s->out, 54);
+}
+
+static void bcm2835_gpio_realize(DeviceState *dev, Error **errp)
+{
+ BCM2835GpioState *s = BCM2835_GPIO(dev);
+ Object *obj;
+ Error *err = NULL;
+
+ obj = object_property_get_link(OBJECT(dev), "sdbus-sdhci", &err);
+ if (obj == NULL) {
+ error_setg(errp, "%s: required sdhci link not found: %s",
+ __func__, error_get_pretty(err));
+ return;
+ }
+ s->sdbus_sdhci = SD_BUS(obj);
+
+ obj = object_property_get_link(OBJECT(dev), "sdbus-sdhost", &err);
+ if (obj == NULL) {
+ error_setg(errp, "%s: required sdhost link not found: %s",
+ __func__, error_get_pretty(err));
+ return;
+ }
+ s->sdbus_sdhost = SD_BUS(obj);
+}
+
+static void bcm2835_gpio_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->vmsd = &vmstate_bcm2835_gpio;
+ dc->realize = &bcm2835_gpio_realize;
+ dc->reset = &bcm2835_gpio_reset;
+}
+
+static const TypeInfo bcm2835_gpio_info = {
+ .name = TYPE_BCM2835_GPIO,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(BCM2835GpioState),
+ .instance_init = bcm2835_gpio_init,
+ .class_init = bcm2835_gpio_class_init,
+};
+
+static void bcm2835_gpio_register_types(void)
+{
+ type_register_static(&bcm2835_gpio_info);
+}
+
+type_init(bcm2835_gpio_register_types)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 1c928ab..f44767b 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -462,7 +462,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
*bus_bsel = (*bsel_alloc)++;
object_property_add_uint32_ptr(OBJECT(bus), ACPI_PCIHP_PROP_BSEL,
- bus_bsel, NULL);
+ bus_bsel, &error_abort);
}
return bsel_alloc;
@@ -471,7 +471,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
static void acpi_set_pci_info(void)
{
PCIBus *bus = find_i440fx(); /* TODO: Q35 support */
- unsigned bsel_alloc = 0;
+ unsigned bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT;
if (bus) {
/* Scan all PCI buses. Set property to enable acpi based hotplug. */
diff --git a/hw/ide/core.c b/hw/ide/core.c
index cfa5de6..db509b3 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1120,7 +1120,7 @@ static void ide_cfata_metadata_write(IDEState *s)
}
/* called when the inserted state of the media has changed */
-static void ide_cd_change_cb(void *opaque, bool load)
+static void ide_cd_change_cb(void *opaque, bool load, Error **errp)
{
IDEState *s = opaque;
uint64_t nb_sectors;
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index dbaa75c..4383cd1 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -170,7 +170,7 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
return -1;
} else {
/* Anonymous BlockBackend for an empty drive */
- dev->conf.blk = blk_new();
+ dev->conf.blk = blk_new(0, BLK_PERM_ALL);
}
}
@@ -196,7 +196,12 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
return -1;
}
}
- blkconf_apply_backend_options(&dev->conf);
+ blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD,
+ &err);
+ if (err) {
+ error_report_err(err);
+ return -1;
+ }
if (ide_init_drive(s, dev->conf.blk, kind,
dev->version, dev->serial, dev->model, dev->wwn,
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 8948106..adedd0d 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -24,7 +24,7 @@ obj-$(CONFIG_APIC) += apic.o apic_common.o
obj-$(CONFIG_ARM_GIC_KVM) += arm_gic_kvm.o
obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_kvm.o
obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_its_kvm.o
-obj-$(CONFIG_STELLARIS) += armv7m_nvic.o
+obj-$(CONFIG_ARM_V7M) += armv7m_nvic.o
obj-$(CONFIG_EXYNOS4) += exynos4210_gic.o exynos4210_combiner.o
obj-$(CONFIG_GRLIB) += grlib_irqmp.o
obj-$(CONFIG_IOAPIC) += ioapic.o
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 16b9b0f..c6493d6 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -70,6 +70,38 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = {
}
};
+static int icc_sre_el1_reg_pre_load(void *opaque)
+{
+ GICv3CPUState *cs = opaque;
+
+ /*
+ * If the sre_el1 subsection is not transferred this
+ * means SRE_EL1 is 0x7 (which might not be the same as
+ * our reset value).
+ */
+ cs->icc_sre_el1 = 0x7;
+ return 0;
+}
+
+static bool icc_sre_el1_reg_needed(void *opaque)
+{
+ GICv3CPUState *cs = opaque;
+
+ return cs->icc_sre_el1 != 7;
+}
+
+const VMStateDescription vmstate_gicv3_cpu_sre_el1 = {
+ .name = "arm_gicv3_cpu/sre_el1",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .pre_load = icc_sre_el1_reg_pre_load,
+ .needed = icc_sre_el1_reg_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_gicv3_cpu = {
.name = "arm_gicv3_cpu",
.version_id = 1,
@@ -100,6 +132,10 @@ static const VMStateDescription vmstate_gicv3_cpu = {
.subsections = (const VMStateDescription * []) {
&vmstate_gicv3_cpu_virt,
NULL
+ },
+ .subsections = (const VMStateDescription * []) {
+ &vmstate_gicv3_cpu_sre_el1,
+ NULL
}
};
@@ -216,6 +252,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
s->cpu[i].cpu = cpu;
s->cpu[i].gic = s;
+ /* Store GICv3CPUState in CPUARMState gicv3state pointer */
+ gicv3_set_gicv3state(cpu, &s->cpu[i]);
/* Pre-construct the GICR_TYPER:
* For our implementation:
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index f775aba..0b20856 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -19,6 +19,14 @@
#include "gicv3_internal.h"
#include "cpu.h"
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+
+ env->gicv3state = (void *)s;
+};
+
static GICv3CPUState *icc_cs_from_env(CPUARMState *env)
{
/* Given the CPU, find the right GICv3CPUState struct.
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index d69dc47..81f0403 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,10 @@
#include "qapi/error.h"
#include "hw/intc/arm_gicv3_common.h"
#include "hw/sysbus.h"
+#include "qemu/error-report.h"
#include "sysemu/kvm.h"
#include "kvm_arm.h"
+#include "gicv3_internal.h"
#include "vgic_common.h"
#include "migration/migration.h"
@@ -44,6 +46,32 @@
#define KVM_ARM_GICV3_GET_CLASS(obj) \
OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
+#define KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2) \
+ (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+ ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+ ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+ ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+ ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ICC_PMR_EL1 \
+ KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
+#define ICC_BPR0_EL1 \
+ KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
+#define ICC_AP0R_EL1(n) \
+ KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
+#define ICC_AP1R_EL1(n) \
+ KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
+#define ICC_BPR1_EL1 \
+ KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
+#define ICC_CTLR_EL1 \
+ KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1 \
+ KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5)
+#define ICC_IGRPEN0_EL1 \
+ KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
+#define ICC_IGRPEN1_EL1 \
+ KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
+
typedef struct KVMARMGICv3Class {
ARMGICv3CommonClass parent_class;
DeviceRealize parent_realize;
@@ -57,16 +85,549 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level)
kvm_arm_gic_set_irq(s->num_irq, irq, level);
}
+#define KVM_VGIC_ATTR(reg, typer) \
+ ((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset,
+ uint32_t *val, bool write)
+{
+ kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ KVM_VGIC_ATTR(offset, 0),
+ val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+ uint32_t *val, bool write)
+{
+ kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+ KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
+ val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+ uint64_t *val, bool write)
+{
+ kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
+ val, write);
+}
+
+static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu,
+ uint32_t *val, bool write)
+{
+ kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) |
+ (VGIC_LEVEL_INFO_LINE_LEVEL <<
+ KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT),
+ val, write);
+}
+
+/* Loop through each distributor IRQ related register; since bits
+ * corresponding to SPIs and PPIs are RAZ/WI when affinity routing
+ * is enabled, we skip those.
+ */
+#define for_each_dist_irq_reg(_irq, _max, _field_width) \
+ for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width))
+
+static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+ uint32_t reg, *field;
+ int irq;
+
+ field = (uint32_t *)bmp;
+ for_each_dist_irq_reg(irq, s->num_irq, 8) {
+ kvm_gicd_access(s, offset, &reg, false);
+ *field = reg;
+ offset += 4;
+ field++;
+ }
+}
+
+static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+ uint32_t reg, *field;
+ int irq;
+
+ field = (uint32_t *)bmp;
+ for_each_dist_irq_reg(irq, s->num_irq, 8) {
+ reg = *field;
+ kvm_gicd_access(s, offset, &reg, true);
+ offset += 4;
+ field++;
+ }
+}
+
+static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset,
+ uint32_t *bmp)
+{
+ uint32_t reg;
+ int irq;
+
+ for_each_dist_irq_reg(irq, s->num_irq, 2) {
+ kvm_gicd_access(s, offset, &reg, false);
+ reg = half_unshuffle32(reg >> 1);
+ if (irq % 32 != 0) {
+ reg = (reg << 16);
+ }
+ *gic_bmp_ptr32(bmp, irq) |= reg;
+ offset += 4;
+ }
+}
+
+static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset,
+ uint32_t *bmp)
+{
+ uint32_t reg;
+ int irq;
+
+ for_each_dist_irq_reg(irq, s->num_irq, 2) {
+ reg = *gic_bmp_ptr32(bmp, irq);
+ if (irq % 32 != 0) {
+ reg = (reg & 0xffff0000) >> 16;
+ } else {
+ reg = reg & 0xffff;
+ }
+ reg = half_shuffle32(reg) << 1;
+ kvm_gicd_access(s, offset, &reg, true);
+ offset += 4;
+ }
+}
+
+static void kvm_gic_get_line_level_bmp(GICv3State *s, uint32_t *bmp)
+{
+ uint32_t reg;
+ int irq;
+
+ for_each_dist_irq_reg(irq, s->num_irq, 1) {
+ kvm_gic_line_level_access(s, irq, 0, &reg, false);
+ *gic_bmp_ptr32(bmp, irq) = reg;
+ }
+}
+
+static void kvm_gic_put_line_level_bmp(GICv3State *s, uint32_t *bmp)
+{
+ uint32_t reg;
+ int irq;
+
+ for_each_dist_irq_reg(irq, s->num_irq, 1) {
+ reg = *gic_bmp_ptr32(bmp, irq);
+ kvm_gic_line_level_access(s, irq, 0, &reg, true);
+ }
+}
+
+/* Read a bitmap register group from the kernel VGIC. */
+static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp)
+{
+ uint32_t reg;
+ int irq;
+
+ for_each_dist_irq_reg(irq, s->num_irq, 1) {
+ kvm_gicd_access(s, offset, &reg, false);
+ *gic_bmp_ptr32(bmp, irq) = reg;
+ offset += 4;
+ }
+}
+
+static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
+ uint32_t clroffset, uint32_t *bmp)
+{
+ uint32_t reg;
+ int irq;
+
+ for_each_dist_irq_reg(irq, s->num_irq, 1) {
+ /* If this bitmap is a set/clear register pair, first write to the
+ * clear-reg to clear all bits before using the set-reg to write
+ * the 1 bits.
+ */
+ if (clroffset != 0) {
+ reg = 0;
+ kvm_gicd_access(s, clroffset, &reg, true);
+ }
+ reg = *gic_bmp_ptr32(bmp, irq);
+ kvm_gicd_access(s, offset, &reg, true);
+ offset += 4;
+ }
+}
+
+static void kvm_arm_gicv3_check(GICv3State *s)
+{
+ uint32_t reg;
+ uint32_t num_irq;
+
+ /* Sanity checking s->num_irq */
+ kvm_gicd_access(s, GICD_TYPER, &reg, false);
+ num_irq = ((reg & 0x1f) + 1) * 32;
+
+ if (num_irq < s->num_irq) {
+ error_report("Model requests %u IRQs, but kernel supports max %u",
+ s->num_irq, num_irq);
+ abort();
+ }
+}
+
static void kvm_arm_gicv3_put(GICv3State *s)
{
- /* TODO */
- DPRINTF("Cannot put kernel gic state, no kernel interface\n");
+ uint32_t regl, regh, reg;
+ uint64_t reg64, redist_typer;
+ int ncpu, i;
+
+ kvm_arm_gicv3_check(s);
+
+ kvm_gicr_access(s, GICR_TYPER, 0, &regl, false);
+ kvm_gicr_access(s, GICR_TYPER + 4, 0, &regh, false);
+ redist_typer = ((uint64_t)regh << 32) | regl;
+
+ reg = s->gicd_ctlr;
+ kvm_gicd_access(s, GICD_CTLR, &reg, true);
+
+ if (redist_typer & GICR_TYPER_PLPIS) {
+ /* Set base addresses before LPIs are enabled by GICR_CTLR write */
+ for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+ GICv3CPUState *c = &s->cpu[ncpu];
+
+ reg64 = c->gicr_propbaser;
+ regl = (uint32_t)reg64;
+ kvm_gicr_access(s, GICR_PROPBASER, ncpu, &regl, true);
+ regh = (uint32_t)(reg64 >> 32);
+ kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, &regh, true);
+
+ reg64 = c->gicr_pendbaser;
+ if (!c->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) {
+ /* Setting PTZ is advised if LPIs are disabled, to reduce
+ * GIC initialization time.
+ */
+ reg64 |= GICR_PENDBASER_PTZ;
+ }
+ regl = (uint32_t)reg64;
+ kvm_gicr_access(s, GICR_PENDBASER, ncpu, &regl, true);
+ regh = (uint32_t)(reg64 >> 32);
+ kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, &regh, true);
+ }
+ }
+
+ /* Redistributor state (one per CPU) */
+
+ for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+ GICv3CPUState *c = &s->cpu[ncpu];
+
+ reg = c->gicr_ctlr;
+ kvm_gicr_access(s, GICR_CTLR, ncpu, &reg, true);
+
+ reg = c->gicr_statusr[GICV3_NS];
+ kvm_gicr_access(s, GICR_STATUSR, ncpu, &reg, true);
+
+ reg = c->gicr_waker;
+ kvm_gicr_access(s, GICR_WAKER, ncpu, &reg, true);
+
+ reg = c->gicr_igroupr0;
+ kvm_gicr_access(s, GICR_IGROUPR0, ncpu, &reg, true);
+
+ reg = ~0;
+ kvm_gicr_access(s, GICR_ICENABLER0, ncpu, &reg, true);
+ reg = c->gicr_ienabler0;
+ kvm_gicr_access(s, GICR_ISENABLER0, ncpu, &reg, true);
+
+ /* Restore config before pending so we treat level/edge correctly */
+ reg = half_shuffle32(c->edge_trigger >> 16) << 1;
+ kvm_gicr_access(s, GICR_ICFGR1, ncpu, &reg, true);
+
+ reg = c->level;
+ kvm_gic_line_level_access(s, 0, ncpu, &reg, true);
+
+ reg = ~0;
+ kvm_gicr_access(s, GICR_ICPENDR0, ncpu, &reg, true);
+ reg = c->gicr_ipendr0;
+ kvm_gicr_access(s, GICR_ISPENDR0, ncpu, &reg, true);
+
+ reg = ~0;
+ kvm_gicr_access(s, GICR_ICACTIVER0, ncpu, &reg, true);
+ reg = c->gicr_iactiver0;
+ kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, &reg, true);
+
+ for (i = 0; i < GIC_INTERNAL; i += 4) {
+ reg = c->gicr_ipriorityr[i] |
+ (c->gicr_ipriorityr[i + 1] << 8) |
+ (c->gicr_ipriorityr[i + 2] << 16) |
+ (c->gicr_ipriorityr[i + 3] << 24);
+ kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, &reg, true);
+ }
+ }
+
+ /* Distributor state (shared between all CPUs */
+ reg = s->gicd_statusr[GICV3_NS];
+ kvm_gicd_access(s, GICD_STATUSR, &reg, true);
+
+ /* s->enable bitmap -> GICD_ISENABLERn */
+ kvm_dist_putbmp(s, GICD_ISENABLER, GICD_ICENABLER, s->enabled);
+
+ /* s->group bitmap -> GICD_IGROUPRn */
+ kvm_dist_putbmp(s, GICD_IGROUPR, 0, s->group);
+
+ /* Restore targets before pending to ensure the pending state is set on
+ * the appropriate CPU interfaces in the kernel
+ */
+
+ /* s->gicd_irouter[irq] -> GICD_IROUTERn
+ * We can't use kvm_dist_put() here because the registers are 64-bit
+ */
+ for (i = GIC_INTERNAL; i < s->num_irq; i++) {
+ uint32_t offset;
+
+ offset = GICD_IROUTER + (sizeof(uint32_t) * i);
+ reg = (uint32_t)s->gicd_irouter[i];
+ kvm_gicd_access(s, offset, &reg, true);
+
+ offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4;
+ reg = (uint32_t)(s->gicd_irouter[i] >> 32);
+ kvm_gicd_access(s, offset, &reg, true);
+ }
+
+ /* s->trigger bitmap -> GICD_ICFGRn
+ * (restore configuration registers before pending IRQs so we treat
+ * level/edge correctly)
+ */
+ kvm_dist_put_edge_trigger(s, GICD_ICFGR, s->edge_trigger);
+
+ /* s->level bitmap -> line_level */
+ kvm_gic_put_line_level_bmp(s, s->level);
+
+ /* s->pending bitmap -> GICD_ISPENDRn */
+ kvm_dist_putbmp(s, GICD_ISPENDR, GICD_ICPENDR, s->pending);
+
+ /* s->active bitmap -> GICD_ISACTIVERn */
+ kvm_dist_putbmp(s, GICD_ISACTIVER, GICD_ICACTIVER, s->active);
+
+ /* s->gicd_ipriority[] -> GICD_IPRIORITYRn */
+ kvm_dist_put_priority(s, GICD_IPRIORITYR, s->gicd_ipriority);
+
+ /* CPU Interface state (one per CPU) */
+
+ for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+ GICv3CPUState *c = &s->cpu[ncpu];
+ int num_pri_bits;
+
+ kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true);
+ kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
+ &c->icc_ctlr_el1[GICV3_NS], true);
+ kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu,
+ &c->icc_igrpen[GICV3_G0], true);
+ kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu,
+ &c->icc_igrpen[GICV3_G1NS], true);
+ kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, true);
+ kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], true);
+ kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], true);
+
+ num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] &
+ ICC_CTLR_EL1_PRIBITS_MASK) >>
+ ICC_CTLR_EL1_PRIBITS_SHIFT) + 1;
+
+ switch (num_pri_bits) {
+ case 7:
+ reg64 = c->icc_apr[GICV3_G0][3];
+ kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, &reg64, true);
+ reg64 = c->icc_apr[GICV3_G0][2];
+ kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, &reg64, true);
+ case 6:
+ reg64 = c->icc_apr[GICV3_G0][1];
+ kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, &reg64, true);
+ default:
+ reg64 = c->icc_apr[GICV3_G0][0];
+ kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, &reg64, true);
+ }
+
+ switch (num_pri_bits) {
+ case 7:
+ reg64 = c->icc_apr[GICV3_G1NS][3];
+ kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, &reg64, true);
+ reg64 = c->icc_apr[GICV3_G1NS][2];
+ kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, &reg64, true);
+ case 6:
+ reg64 = c->icc_apr[GICV3_G1NS][1];
+ kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, &reg64, true);
+ default:
+ reg64 = c->icc_apr[GICV3_G1NS][0];
+ kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, &reg64, true);
+ }
+ }
}
static void kvm_arm_gicv3_get(GICv3State *s)
{
- /* TODO */
- DPRINTF("Cannot get kernel gic state, no kernel interface\n");
+ uint32_t regl, regh, reg;
+ uint64_t reg64, redist_typer;
+ int ncpu, i;
+
+ kvm_arm_gicv3_check(s);
+
+ kvm_gicr_access(s, GICR_TYPER, 0, &regl, false);
+ kvm_gicr_access(s, GICR_TYPER + 4, 0, &regh, false);
+ redist_typer = ((uint64_t)regh << 32) | regl;
+
+ kvm_gicd_access(s, GICD_CTLR, &reg, false);
+ s->gicd_ctlr = reg;
+
+ /* Redistributor state (one per CPU) */
+
+ for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+ GICv3CPUState *c = &s->cpu[ncpu];
+
+ kvm_gicr_access(s, GICR_CTLR, ncpu, &reg, false);
+ c->gicr_ctlr = reg;
+
+ kvm_gicr_access(s, GICR_STATUSR, ncpu, &reg, false);
+ c->gicr_statusr[GICV3_NS] = reg;
+
+ kvm_gicr_access(s, GICR_WAKER, ncpu, &reg, false);
+ c->gicr_waker = reg;
+
+ kvm_gicr_access(s, GICR_IGROUPR0, ncpu, &reg, false);
+ c->gicr_igroupr0 = reg;
+ kvm_gicr_access(s, GICR_ISENABLER0, ncpu, &reg, false);
+ c->gicr_ienabler0 = reg;
+ kvm_gicr_access(s, GICR_ICFGR1, ncpu, &reg, false);
+ c->edge_trigger = half_unshuffle32(reg >> 1) << 16;
+ kvm_gic_line_level_access(s, 0, ncpu, &reg, false);
+ c->level = reg;
+ kvm_gicr_access(s, GICR_ISPENDR0, ncpu, &reg, false);
+ c->gicr_ipendr0 = reg;
+ kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, &reg, false);
+ c->gicr_iactiver0 = reg;
+
+ for (i = 0; i < GIC_INTERNAL; i += 4) {
+ kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, &reg, false);
+ c->gicr_ipriorityr[i] = extract32(reg, 0, 8);
+ c->gicr_ipriorityr[i + 1] = extract32(reg, 8, 8);
+ c->gicr_ipriorityr[i + 2] = extract32(reg, 16, 8);
+ c->gicr_ipriorityr[i + 3] = extract32(reg, 24, 8);
+ }
+ }
+
+ if (redist_typer & GICR_TYPER_PLPIS) {
+ for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+ GICv3CPUState *c = &s->cpu[ncpu];
+
+ kvm_gicr_access(s, GICR_PROPBASER, ncpu, &regl, false);
+ kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, &regh, false);
+ c->gicr_propbaser = ((uint64_t)regh << 32) | regl;
+
+ kvm_gicr_access(s, GICR_PENDBASER, ncpu, &regl, false);
+ kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, &regh, false);
+ c->gicr_pendbaser = ((uint64_t)regh << 32) | regl;
+ }
+ }
+
+ /* Distributor state (shared between all CPUs */
+
+ kvm_gicd_access(s, GICD_STATUSR, &reg, false);
+ s->gicd_statusr[GICV3_NS] = reg;
+
+ /* GICD_IGROUPRn -> s->group bitmap */
+ kvm_dist_getbmp(s, GICD_IGROUPR, s->group);
+
+ /* GICD_ISENABLERn -> s->enabled bitmap */
+ kvm_dist_getbmp(s, GICD_ISENABLER, s->enabled);
+
+ /* Line level of irq */
+ kvm_gic_get_line_level_bmp(s, s->level);
+ /* GICD_ISPENDRn -> s->pending bitmap */
+ kvm_dist_getbmp(s, GICD_ISPENDR, s->pending);
+
+ /* GICD_ISACTIVERn -> s->active bitmap */
+ kvm_dist_getbmp(s, GICD_ISACTIVER, s->active);
+
+ /* GICD_ICFGRn -> s->trigger bitmap */
+ kvm_dist_get_edge_trigger(s, GICD_ICFGR, s->edge_trigger);
+
+ /* GICD_IPRIORITYRn -> s->gicd_ipriority[] */
+ kvm_dist_get_priority(s, GICD_IPRIORITYR, s->gicd_ipriority);
+
+ /* GICD_IROUTERn -> s->gicd_irouter[irq] */
+ for (i = GIC_INTERNAL; i < s->num_irq; i++) {
+ uint32_t offset;
+
+ offset = GICD_IROUTER + (sizeof(uint32_t) * i);
+ kvm_gicd_access(s, offset, &regl, false);
+ offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4;
+ kvm_gicd_access(s, offset, &regh, false);
+ s->gicd_irouter[i] = ((uint64_t)regh << 32) | regl;
+ }
+
+ /*****************************************************************
+ * CPU Interface(s) State
+ */
+
+ for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+ GICv3CPUState *c = &s->cpu[ncpu];
+ int num_pri_bits;
+
+ kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false);
+ kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
+ &c->icc_ctlr_el1[GICV3_NS], false);
+ kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu,
+ &c->icc_igrpen[GICV3_G0], false);
+ kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu,
+ &c->icc_igrpen[GICV3_G1NS], false);
+ kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, false);
+ kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], false);
+ kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], false);
+ num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] &
+ ICC_CTLR_EL1_PRIBITS_MASK) >>
+ ICC_CTLR_EL1_PRIBITS_SHIFT) + 1;
+
+ switch (num_pri_bits) {
+ case 7:
+ kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, &reg64, false);
+ c->icc_apr[GICV3_G0][3] = reg64;
+ kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, &reg64, false);
+ c->icc_apr[GICV3_G0][2] = reg64;
+ case 6:
+ kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, &reg64, false);
+ c->icc_apr[GICV3_G0][1] = reg64;
+ default:
+ kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, &reg64, false);
+ c->icc_apr[GICV3_G0][0] = reg64;
+ }
+
+ switch (num_pri_bits) {
+ case 7:
+ kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, &reg64, false);
+ c->icc_apr[GICV3_G1NS][3] = reg64;
+ kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, &reg64, false);
+ c->icc_apr[GICV3_G1NS][2] = reg64;
+ case 6:
+ kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, &reg64, false);
+ c->icc_apr[GICV3_G1NS][1] = reg64;
+ default:
+ kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, &reg64, false);
+ c->icc_apr[GICV3_G1NS][0] = reg64;
+ }
+ }
+}
+
+static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ ARMCPU *cpu;
+ GICv3State *s;
+ GICv3CPUState *c;
+
+ c = (GICv3CPUState *)env->gicv3state;
+ s = c->gic;
+ cpu = ARM_CPU(c->cpu);
+
+ /* Initialize to actual HW supported configuration */
+ kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
+ &c->icc_ctlr_el1[GICV3_NS], false);
+
+ c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
+ c->icc_pmr_el1 = 0;
+ c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
+ c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
+ c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
+
+ c->icc_sre_el1 = 0x7;
+ memset(c->icc_apr, 0, sizeof(c->icc_apr));
+ memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
}
static void kvm_arm_gicv3_reset(DeviceState *dev)
@@ -77,9 +638,43 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
DPRINTF("Reset\n");
kgc->parent_reset(dev);
+
+ if (s->migration_blocker) {
+ DPRINTF("Cannot put kernel gic state, no kernel interface\n");
+ return;
+ }
+
kvm_arm_gicv3_put(s);
}
+/*
+ * CPU interface registers of GIC needs to be reset on CPU reset.
+ * For the calling arm_gicv3_icc_reset() on CPU reset, we register
+ * below ARMCPRegInfo. As we reset the whole cpu interface under single
+ * register reset, we define only one register of CPU interface instead
+ * of defining all the registers.
+ */
+static const ARMCPRegInfo gicv3_cpuif_reginfo[] = {
+ { .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4,
+ /*
+ * If ARM_CP_NOP is used, resetfn is not called,
+ * So ARM_CP_NO_RAW is appropriate type.
+ */
+ .type = ARM_CP_NO_RAW,
+ .access = PL1_RW,
+ .readfn = arm_cp_read_zero,
+ .writefn = arm_cp_write_ignore,
+ /*
+ * We hang the whole cpu interface reset routine off here
+ * rather than parcelling it out into one little function
+ * per register
+ */
+ .resetfn = arm_gicv3_icc_reset,
+ },
+ REGINFO_SENTINEL
+};
+
static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
{
GICv3State *s = KVM_ARM_GICV3(dev);
@@ -103,16 +698,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
- /* Block migration of a KVM GICv3 device: the API for saving and restoring
- * the state in the kernel is not yet finalised in the kernel or
- * implemented in QEMU.
- */
- error_setg(&s->migration_blocker, "vGICv3 migration is not implemented");
- migrate_add_blocker(s->migration_blocker, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- error_free(s->migration_blocker);
- return;
+ for (i = 0; i < s->num_cpu; i++) {
+ ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
+
+ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
}
/* Try to create the device via the device control API */
@@ -145,6 +734,18 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
kvm_irqchip_commit_routes(kvm_state);
}
+
+ if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_CTLR)) {
+ error_setg(&s->migration_blocker, "This operating system kernel does "
+ "not support vGICv3 migration");
+ migrate_add_blocker(s->migration_blocker, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ error_free(s->migration_blocker);
+ return;
+ }
+ }
}
static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 76097b4..32ffa0b 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -17,8 +17,8 @@
#include "hw/sysbus.h"
#include "qemu/timer.h"
#include "hw/arm/arm.h"
+#include "hw/arm/armv7m_nvic.h"
#include "target/arm/cpu.h"
-#include "exec/address-spaces.h"
#include "qemu/log.h"
#include "trace.h"
@@ -47,7 +47,6 @@
* "exception" more or less interchangeably.
*/
#define NVIC_FIRST_IRQ 16
-#define NVIC_MAX_VECTORS 512
#define NVIC_MAX_IRQ (NVIC_MAX_VECTORS - NVIC_FIRST_IRQ)
/* Effective running priority of the CPU when no exception is active
@@ -55,116 +54,10 @@
*/
#define NVIC_NOEXC_PRIO 0x100
-typedef struct VecInfo {
- /* Exception priorities can range from -3 to 255; only the unmodifiable
- * priority values for RESET, NMI and HardFault can be negative.
- */
- int16_t prio;
- uint8_t enabled;
- uint8_t pending;
- uint8_t active;
- uint8_t level; /* exceptions <=15 never set level */
-} VecInfo;
-
-typedef struct NVICState {
- /*< private >*/
- SysBusDevice parent_obj;
- /*< public >*/
-
- ARMCPU *cpu;
-
- VecInfo vectors[NVIC_MAX_VECTORS];
- uint32_t prigroup;
-
- /* vectpending and exception_prio are both cached state that can
- * be recalculated from the vectors[] array and the prigroup field.
- */
- unsigned int vectpending; /* highest prio pending enabled exception */
- int exception_prio; /* group prio of the highest prio active exception */
-
- struct {
- uint32_t control;
- uint32_t reload;
- int64_t tick;
- QEMUTimer *timer;
- } systick;
-
- MemoryRegion sysregmem;
- MemoryRegion container;
-
- uint32_t num_irq;
- qemu_irq excpout;
- qemu_irq sysresetreq;
-} NVICState;
-
-#define TYPE_NVIC "armv7m_nvic"
-
-#define NVIC(obj) \
- OBJECT_CHECK(NVICState, (obj), TYPE_NVIC)
-
static const uint8_t nvic_id[] = {
0x00, 0xb0, 0x1b, 0x00, 0x0d, 0xe0, 0x05, 0xb1
};
-/* qemu timers run at 1GHz. We want something closer to 1MHz. */
-#define SYSTICK_SCALE 1000ULL
-
-#define SYSTICK_ENABLE (1 << 0)
-#define SYSTICK_TICKINT (1 << 1)
-#define SYSTICK_CLKSOURCE (1 << 2)
-#define SYSTICK_COUNTFLAG (1 << 16)
-
-int system_clock_scale;
-
-/* Conversion factor from qemu timer to SysTick frequencies. */
-static inline int64_t systick_scale(NVICState *s)
-{
- if (s->systick.control & SYSTICK_CLKSOURCE)
- return system_clock_scale;
- else
- return 1000;
-}
-
-static void systick_reload(NVICState *s, int reset)
-{
- /* The Cortex-M3 Devices Generic User Guide says that "When the
- * ENABLE bit is set to 1, the counter loads the RELOAD value from the
- * SYST RVR register and then counts down". So, we need to check the
- * ENABLE bit before reloading the value.
- */
- if ((s->systick.control & SYSTICK_ENABLE) == 0) {
- return;
- }
-
- if (reset)
- s->systick.tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- s->systick.tick += (s->systick.reload + 1) * systick_scale(s);
- timer_mod(s->systick.timer, s->systick.tick);
-}
-
-static void systick_timer_tick(void * opaque)
-{
- NVICState *s = (NVICState *)opaque;
- s->systick.control |= SYSTICK_COUNTFLAG;
- if (s->systick.control & SYSTICK_TICKINT) {
- /* Trigger the interrupt. */
- armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
- }
- if (s->systick.reload == 0) {
- s->systick.control &= ~SYSTICK_ENABLE;
- } else {
- systick_reload(s, 0);
- }
-}
-
-static void systick_reset(NVICState *s)
-{
- s->systick.control = 0;
- s->systick.reload = 0;
- s->systick.tick = 0;
- timer_del(s->systick.timer);
-}
-
static int nvic_pending_prio(NVICState *s)
{
/* return the priority of the current pending interrupt,
@@ -510,30 +403,6 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset)
switch (offset) {
case 4: /* Interrupt Control Type. */
return ((s->num_irq - NVIC_FIRST_IRQ) / 32) - 1;
- case 0x10: /* SysTick Control and Status. */
- val = s->systick.control;
- s->systick.control &= ~SYSTICK_COUNTFLAG;
- return val;
- case 0x14: /* SysTick Reload Value. */
- return s->systick.reload;
- case 0x18: /* SysTick Current Value. */
- {
- int64_t t;
- if ((s->systick.control & SYSTICK_ENABLE) == 0)
- return 0;
- t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- if (t >= s->systick.tick)
- return 0;
- val = ((s->systick.tick - (t + 1)) / systick_scale(s)) + 1;
- /* The interrupt in triggered when the timer reaches zero.
- However the counter is not reloaded until the next clock
- tick. This is a hack to return zero during the first tick. */
- if (val > s->systick.reload)
- val = 0;
- return val;
- }
- case 0x1c: /* SysTick Calibration Value. */
- return 10000;
case 0xd00: /* CPUID Base. */
return cpu->midr;
case 0xd04: /* Interrupt Control State. */
@@ -668,40 +537,8 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset)
static void nvic_writel(NVICState *s, uint32_t offset, uint32_t value)
{
ARMCPU *cpu = s->cpu;
- uint32_t oldval;
+
switch (offset) {
- case 0x10: /* SysTick Control and Status. */
- oldval = s->systick.control;
- s->systick.control &= 0xfffffff8;
- s->systick.control |= value & 7;
- if ((oldval ^ value) & SYSTICK_ENABLE) {
- int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- if (value & SYSTICK_ENABLE) {
- if (s->systick.tick) {
- s->systick.tick += now;
- timer_mod(s->systick.timer, s->systick.tick);
- } else {
- systick_reload(s, 1);
- }
- } else {
- timer_del(s->systick.timer);
- s->systick.tick -= now;
- if (s->systick.tick < 0)
- s->systick.tick = 0;
- }
- } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) {
- /* This is a hack. Force the timer to be reloaded
- when the reference clock is changed. */
- systick_reload(s, 1);
- }
- break;
- case 0x14: /* SysTick Reload Value. */
- s->systick.reload = value;
- break;
- case 0x18: /* SysTick Current Value. Writes reload the timer. */
- systick_reload(s, 1);
- s->systick.control &= ~SYSTICK_COUNTFLAG;
- break;
case 0xd04: /* Interrupt Control State. */
if (value & (1 << 31)) {
armv7m_nvic_set_pending(s, ARMV7M_EXCP_NMI);
@@ -1000,16 +837,12 @@ static const VMStateDescription vmstate_VecInfo = {
static const VMStateDescription vmstate_nvic = {
.name = "armv7m_nvic",
- .version_id = 3,
- .minimum_version_id = 3,
+ .version_id = 4,
+ .minimum_version_id = 4,
.post_load = &nvic_post_load,
.fields = (VMStateField[]) {
VMSTATE_STRUCT_ARRAY(vectors, NVICState, NVIC_MAX_VECTORS, 1,
vmstate_VecInfo, VecInfo),
- VMSTATE_UINT32(systick.control, NVICState),
- VMSTATE_UINT32(systick.reload, NVICState),
- VMSTATE_INT64(systick.tick, NVICState),
- VMSTATE_TIMER_PTR(systick.timer, NVICState),
VMSTATE_UINT32(prigroup, NVICState),
VMSTATE_END_OF_LIST()
}
@@ -1047,13 +880,26 @@ static void armv7m_nvic_reset(DeviceState *dev)
s->exception_prio = NVIC_NOEXC_PRIO;
s->vectpending = 0;
+}
- systick_reset(s);
+static void nvic_systick_trigger(void *opaque, int n, int level)
+{
+ NVICState *s = opaque;
+
+ if (level) {
+ /* SysTick just asked us to pend its exception.
+ * (This is different from an external interrupt line's
+ * behaviour.)
+ */
+ armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
+ }
}
static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
{
NVICState *s = NVIC(dev);
+ SysBusDevice *systick_sbd;
+ Error *err = NULL;
s->cpu = ARM_CPU(qemu_get_cpu(0));
assert(s->cpu);
@@ -1068,10 +914,19 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
/* include space for internal exception vectors */
s->num_irq += NVIC_FIRST_IRQ;
+ object_property_set_bool(OBJECT(&s->systick), true, "realized", &err);
+ if (err != NULL) {
+ error_propagate(errp, err);
+ return;
+ }
+ systick_sbd = SYS_BUS_DEVICE(&s->systick);
+ sysbus_connect_irq(systick_sbd, 0,
+ qdev_get_gpio_in_named(dev, "systick-trigger", 0));
+
/* The NVIC and System Control Space (SCS) starts at 0xe000e000
* and looks like this:
* 0x004 - ICTR
- * 0x010 - 0x1c - systick
+ * 0x010 - 0xff - systick
* 0x100..0x7ec - NVIC
* 0x7f0..0xcff - Reserved
* 0xd00..0xd3c - SCS registers
@@ -1089,12 +944,11 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
memory_region_init_io(&s->sysregmem, OBJECT(s), &nvic_sysreg_ops, s,
"nvic_sysregs", 0x1000);
memory_region_add_subregion(&s->container, 0, &s->sysregmem);
+ memory_region_add_subregion_overlap(&s->container, 0x10,
+ sysbus_mmio_get_region(systick_sbd, 0),
+ 1);
- /* Map the whole thing into system memory at the location required
- * by the v7M architecture.
- */
- memory_region_add_subregion(get_system_memory(), 0xe000e000, &s->container);
- s->systick.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s);
+ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container);
}
static void armv7m_nvic_instance_init(Object *obj)
@@ -1109,8 +963,12 @@ static void armv7m_nvic_instance_init(Object *obj)
NVICState *nvic = NVIC(obj);
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+ object_initialize(&nvic->systick, sizeof(nvic->systick), TYPE_SYSTICK);
+ qdev_set_parent_bus(DEVICE(&nvic->systick), sysbus_get_default());
+
sysbus_init_irq(sbd, &nvic->excpout);
qdev_init_gpio_out_named(dev, &nvic->sysresetreq, "SYSRESETREQ", 1);
+ qdev_init_gpio_in_named(dev, nvic_systick_trigger, "systick-trigger", 1);
}
static void armv7m_nvic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
index aeb801d..05303a5 100644
--- a/hw/intc/gicv3_internal.h
+++ b/hw/intc/gicv3_internal.h
@@ -138,6 +138,7 @@
#define ICC_CTLR_EL1_EOIMODE (1U << 1)
#define ICC_CTLR_EL1_PMHE (1U << 6)
#define ICC_CTLR_EL1_PRIBITS_SHIFT 8
+#define ICC_CTLR_EL1_PRIBITS_MASK (7U << ICC_CTLR_EL1_PRIBITS_SHIFT)
#define ICC_CTLR_EL1_IDBITS_SHIFT 11
#define ICC_CTLR_EL1_SEIS (1U << 14)
#define ICC_CTLR_EL1_A3V (1U << 15)
@@ -407,4 +408,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s)
}
}
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s);
+
#endif /* QEMU_ARM_GICV3_INTERNAL_H */
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 095c16a..ffc0747 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -49,40 +49,41 @@ int xics_get_cpu_index_by_dt_id(int cpu_dt_id)
return -1;
}
-void xics_cpu_destroy(XICSState *xics, PowerPCCPU *cpu)
+void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
- ICPState *ss = &xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(xi, cs->cpu_index);
- assert(cs->cpu_index < xics->nr_servers);
- assert(cs == ss->cs);
+ assert(icp);
+ assert(cs == icp->cs);
- ss->output = NULL;
- ss->cs = NULL;
+ icp->output = NULL;
+ icp->cs = NULL;
}
-void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
+void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
- ICPState *ss = &xics->ss[cs->cpu_index];
- XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
+ ICPState *icp = xics_icp_get(xi, cs->cpu_index);
+ ICPStateClass *icpc;
- assert(cs->cpu_index < xics->nr_servers);
+ assert(icp);
- ss->cs = cs;
+ icp->cs = cs;
- if (info->cpu_setup) {
- info->cpu_setup(xics, cpu);
+ icpc = ICP_GET_CLASS(icp);
+ if (icpc->cpu_setup) {
+ icpc->cpu_setup(icp, cpu);
}
switch (PPC_INPUT(env)) {
case PPC_FLAGS_INPUT_POWER7:
- ss->output = env->irq_inputs[POWER7_INPUT_INT];
+ icp->output = env->irq_inputs[POWER7_INPUT_INT];
break;
case PPC_FLAGS_INPUT_970:
- ss->output = env->irq_inputs[PPC970_INPUT_INT];
+ icp->output = env->irq_inputs[PPC970_INPUT_INT];
break;
default:
@@ -92,185 +93,43 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
}
}
-static void xics_common_pic_print_info(InterruptStatsProvider *obj,
- Monitor *mon)
+void icp_pic_print_info(ICPState *icp, Monitor *mon)
{
- XICSState *xics = XICS_COMMON(obj);
- ICSState *ics;
- uint32_t i;
-
- for (i = 0; i < xics->nr_servers; i++) {
- ICPState *icp = &xics->ss[i];
-
- if (!icp->output) {
- continue;
- }
- monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
- i, icp->xirr, icp->xirr_owner,
- icp->pending_priority, icp->mfrr);
- }
-
- QLIST_FOREACH(ics, &xics->ics, list) {
- monitor_printf(mon, "ICS %4x..%4x %p\n",
- ics->offset, ics->offset + ics->nr_irqs - 1, ics);
-
- if (!ics->irqs) {
- continue;
- }
-
- for (i = 0; i < ics->nr_irqs; i++) {
- ICSIRQState *irq = ics->irqs + i;
-
- if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
- continue;
- }
- monitor_printf(mon, " %4x %s %02x %02x\n",
- ics->offset + i,
- (irq->flags & XICS_FLAGS_IRQ_LSI) ?
- "LSI" : "MSI",
- irq->priority, irq->status);
- }
- }
-}
-
-/*
- * XICS Common class - parent for emulated XICS and KVM-XICS
- */
-static void xics_common_reset(DeviceState *d)
-{
- XICSState *xics = XICS_COMMON(d);
- ICSState *ics;
- int i;
-
- for (i = 0; i < xics->nr_servers; i++) {
- device_reset(DEVICE(&xics->ss[i]));
- }
-
- QLIST_FOREACH(ics, &xics->ics, list) {
- device_reset(DEVICE(ics));
- }
-}
-
-static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
-{
- XICSState *xics = XICS_COMMON(obj);
- int64_t value = xics->nr_irqs;
+ int cpu_index = icp->cs ? icp->cs->cpu_index : -1;
- visit_type_int(v, name, &value, errp);
-}
-
-static void xics_prop_set_nr_irqs(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
-{
- XICSState *xics = XICS_COMMON(obj);
- XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
- Error *error = NULL;
- int64_t value;
-
- visit_type_int(v, name, &value, &error);
- if (error) {
- error_propagate(errp, error);
+ if (!icp->output) {
return;
}
- if (xics->nr_irqs) {
- error_setg(errp, "Number of interrupts is already set to %u",
- xics->nr_irqs);
- return;
- }
-
- assert(info->set_nr_irqs);
- info->set_nr_irqs(xics, value, errp);
-}
-
-void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
- const char *typename, Error **errp)
-{
- int i;
-
- xics->nr_servers = nr_servers;
-
- xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
- for (i = 0; i < xics->nr_servers; i++) {
- char name[32];
- ICPState *icp = &xics->ss[i];
-
- object_initialize(icp, sizeof(*icp), typename);
- snprintf(name, sizeof(name), "icp[%d]", i);
- object_property_add_child(OBJECT(xics), name, OBJECT(icp), errp);
- icp->xics = xics;
- }
+ monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
+ cpu_index, icp->xirr, icp->xirr_owner,
+ icp->pending_priority, icp->mfrr);
}
-static void xics_prop_get_nr_servers(Object *obj, Visitor *v,
- const char *name, void *opaque,
- Error **errp)
+void ics_pic_print_info(ICSState *ics, Monitor *mon)
{
- XICSState *xics = XICS_COMMON(obj);
- int64_t value = xics->nr_servers;
-
- visit_type_int(v, name, &value, errp);
-}
+ uint32_t i;
-static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
- const char *name, void *opaque,
- Error **errp)
-{
- XICSState *xics = XICS_COMMON(obj);
- XICSStateClass *xsc = XICS_COMMON_GET_CLASS(xics);
- Error *error = NULL;
- int64_t value;
+ monitor_printf(mon, "ICS %4x..%4x %p\n",
+ ics->offset, ics->offset + ics->nr_irqs - 1, ics);
- visit_type_int(v, name, &value, &error);
- if (error) {
- error_propagate(errp, error);
+ if (!ics->irqs) {
return;
}
- if (xics->nr_servers) {
- error_setg(errp, "Number of servers is already set to %u",
- xics->nr_servers);
- return;
- }
-
- assert(xsc->set_nr_servers);
- xsc->set_nr_servers(xics, value, errp);
-}
-
-static void xics_common_initfn(Object *obj)
-{
- XICSState *xics = XICS_COMMON(obj);
- QLIST_INIT(&xics->ics);
- object_property_add(obj, "nr_irqs", "int",
- xics_prop_get_nr_irqs, xics_prop_set_nr_irqs,
- NULL, NULL, NULL);
- object_property_add(obj, "nr_servers", "int",
- xics_prop_get_nr_servers, xics_prop_set_nr_servers,
- NULL, NULL, NULL);
-}
-
-static void xics_common_class_init(ObjectClass *oc, void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(oc);
- InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(oc);
+ for (i = 0; i < ics->nr_irqs; i++) {
+ ICSIRQState *irq = ics->irqs + i;
- dc->reset = xics_common_reset;
- ic->print_info = xics_common_pic_print_info;
+ if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
+ continue;
+ }
+ monitor_printf(mon, " %4x %s %02x %02x\n",
+ ics->offset + i,
+ (irq->flags & XICS_FLAGS_IRQ_LSI) ?
+ "LSI" : "MSI",
+ irq->priority, irq->status);
+ }
}
-static const TypeInfo xics_common_info = {
- .name = TYPE_XICS_COMMON,
- .parent = TYPE_SYS_BUS_DEVICE,
- .instance_size = sizeof(XICSState),
- .class_size = sizeof(XICSStateClass),
- .instance_init = xics_common_initfn,
- .class_init = xics_common_class_init,
- .interfaces = (InterfaceInfo[]) {
- { TYPE_INTERRUPT_STATS_PROVIDER },
- { }
- },
-};
-
/*
* ICP: Presentation layer
*/
@@ -278,8 +137,8 @@ static const TypeInfo xics_common_info = {
#define XISR_MASK 0x00ffffff
#define CPPR_MASK 0xff000000
-#define XISR(ss) (((ss)->xirr) & XISR_MASK)
-#define CPPR(ss) (((ss)->xirr) >> 24)
+#define XISR(icp) (((icp)->xirr) & XISR_MASK)
+#define CPPR(icp) (((icp)->xirr) >> 24)
static void ics_reject(ICSState *ics, uint32_t nr)
{
@@ -290,7 +149,7 @@ static void ics_reject(ICSState *ics, uint32_t nr)
}
}
-static void ics_resend(ICSState *ics)
+void ics_resend(ICSState *ics)
{
ICSStateClass *k = ICS_BASE_GET_CLASS(ics);
@@ -308,151 +167,152 @@ static void ics_eoi(ICSState *ics, int nr)
}
}
-static void icp_check_ipi(ICPState *ss)
+static void icp_check_ipi(ICPState *icp)
{
- if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
+ if (XISR(icp) && (icp->pending_priority <= icp->mfrr)) {
return;
}
- trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr);
+ trace_xics_icp_check_ipi(icp->cs->cpu_index, icp->mfrr);
- if (XISR(ss) && ss->xirr_owner) {
- ics_reject(ss->xirr_owner, XISR(ss));
+ if (XISR(icp) && icp->xirr_owner) {
+ ics_reject(icp->xirr_owner, XISR(icp));
}
- ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
- ss->pending_priority = ss->mfrr;
- ss->xirr_owner = NULL;
- qemu_irq_raise(ss->output);
+ icp->xirr = (icp->xirr & ~XISR_MASK) | XICS_IPI;
+ icp->pending_priority = icp->mfrr;
+ icp->xirr_owner = NULL;
+ qemu_irq_raise(icp->output);
}
-static void icp_resend(ICPState *ss)
+void icp_resend(ICPState *icp)
{
- ICSState *ics;
+ XICSFabric *xi = icp->xics;
+ XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
- if (ss->mfrr < CPPR(ss)) {
- icp_check_ipi(ss);
- }
- QLIST_FOREACH(ics, &ss->xics->ics, list) {
- ics_resend(ics);
+ if (icp->mfrr < CPPR(icp)) {
+ icp_check_ipi(icp);
}
+
+ xic->ics_resend(xi);
}
-void icp_set_cppr(ICPState *ss, uint8_t cppr)
+void icp_set_cppr(ICPState *icp, uint8_t cppr)
{
uint8_t old_cppr;
uint32_t old_xisr;
- old_cppr = CPPR(ss);
- ss->xirr = (ss->xirr & ~CPPR_MASK) | (cppr << 24);
+ old_cppr = CPPR(icp);
+ icp->xirr = (icp->xirr & ~CPPR_MASK) | (cppr << 24);
if (cppr < old_cppr) {
- if (XISR(ss) && (cppr <= ss->pending_priority)) {
- old_xisr = XISR(ss);
- ss->xirr &= ~XISR_MASK; /* Clear XISR */
- ss->pending_priority = 0xff;
- qemu_irq_lower(ss->output);
- if (ss->xirr_owner) {
- ics_reject(ss->xirr_owner, old_xisr);
- ss->xirr_owner = NULL;
+ if (XISR(icp) && (cppr <= icp->pending_priority)) {
+ old_xisr = XISR(icp);
+ icp->xirr &= ~XISR_MASK; /* Clear XISR */
+ icp->pending_priority = 0xff;
+ qemu_irq_lower(icp->output);
+ if (icp->xirr_owner) {
+ ics_reject(icp->xirr_owner, old_xisr);
+ icp->xirr_owner = NULL;
}
}
} else {
- if (!XISR(ss)) {
- icp_resend(ss);
+ if (!XISR(icp)) {
+ icp_resend(icp);
}
}
}
-void icp_set_mfrr(ICPState *ss, uint8_t mfrr)
+void icp_set_mfrr(ICPState *icp, uint8_t mfrr)
{
- ss->mfrr = mfrr;
- if (mfrr < CPPR(ss)) {
- icp_check_ipi(ss);
+ icp->mfrr = mfrr;
+ if (mfrr < CPPR(icp)) {
+ icp_check_ipi(icp);
}
}
-uint32_t icp_accept(ICPState *ss)
+uint32_t icp_accept(ICPState *icp)
{
- uint32_t xirr = ss->xirr;
+ uint32_t xirr = icp->xirr;
- qemu_irq_lower(ss->output);
- ss->xirr = ss->pending_priority << 24;
- ss->pending_priority = 0xff;
- ss->xirr_owner = NULL;
+ qemu_irq_lower(icp->output);
+ icp->xirr = icp->pending_priority << 24;
+ icp->pending_priority = 0xff;
+ icp->xirr_owner = NULL;
- trace_xics_icp_accept(xirr, ss->xirr);
+ trace_xics_icp_accept(xirr, icp->xirr);
return xirr;
}
-uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
+uint32_t icp_ipoll(ICPState *icp, uint32_t *mfrr)
{
if (mfrr) {
- *mfrr = ss->mfrr;
+ *mfrr = icp->mfrr;
}
- return ss->xirr;
+ return icp->xirr;
}
-void icp_eoi(ICPState *ss, uint32_t xirr)
+void icp_eoi(ICPState *icp, uint32_t xirr)
{
+ XICSFabric *xi = icp->xics;
+ XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
ICSState *ics;
uint32_t irq;
/* Send EOI -> ICS */
- ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
- trace_xics_icp_eoi(ss->cs->cpu_index, xirr, ss->xirr);
+ icp->xirr = (icp->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
+ trace_xics_icp_eoi(icp->cs->cpu_index, xirr, icp->xirr);
irq = xirr & XISR_MASK;
- QLIST_FOREACH(ics, &ss->xics->ics, list) {
- if (ics_valid_irq(ics, irq)) {
- ics_eoi(ics, irq);
- }
+
+ ics = xic->ics_get(xi, irq);
+ if (ics) {
+ ics_eoi(ics, irq);
}
- if (!XISR(ss)) {
- icp_resend(ss);
+ if (!XISR(icp)) {
+ icp_resend(icp);
}
}
static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
{
- XICSState *xics = ics->xics;
- ICPState *ss = xics->ss + server;
+ ICPState *icp = xics_icp_get(ics->xics, server);
trace_xics_icp_irq(server, nr, priority);
- if ((priority >= CPPR(ss))
- || (XISR(ss) && (ss->pending_priority <= priority))) {
+ if ((priority >= CPPR(icp))
+ || (XISR(icp) && (icp->pending_priority <= priority))) {
ics_reject(ics, nr);
} else {
- if (XISR(ss) && ss->xirr_owner) {
- ics_reject(ss->xirr_owner, XISR(ss));
- ss->xirr_owner = NULL;
+ if (XISR(icp) && icp->xirr_owner) {
+ ics_reject(icp->xirr_owner, XISR(icp));
+ icp->xirr_owner = NULL;
}
- ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
- ss->xirr_owner = ics;
- ss->pending_priority = priority;
- trace_xics_icp_raise(ss->xirr, ss->pending_priority);
- qemu_irq_raise(ss->output);
+ icp->xirr = (icp->xirr & ~XISR_MASK) | (nr & XISR_MASK);
+ icp->xirr_owner = ics;
+ icp->pending_priority = priority;
+ trace_xics_icp_raise(icp->xirr, icp->pending_priority);
+ qemu_irq_raise(icp->output);
}
}
static void icp_dispatch_pre_save(void *opaque)
{
- ICPState *ss = opaque;
- ICPStateClass *info = ICP_GET_CLASS(ss);
+ ICPState *icp = opaque;
+ ICPStateClass *info = ICP_GET_CLASS(icp);
if (info->pre_save) {
- info->pre_save(ss);
+ info->pre_save(icp);
}
}
static int icp_dispatch_post_load(void *opaque, int version_id)
{
- ICPState *ss = opaque;
- ICPStateClass *info = ICP_GET_CLASS(ss);
+ ICPState *icp = opaque;
+ ICPStateClass *info = ICP_GET_CLASS(icp);
if (info->post_load) {
- return info->post_load(ss, version_id);
+ return info->post_load(icp, version_id);
}
return 0;
@@ -485,12 +345,30 @@ static void icp_reset(DeviceState *dev)
qemu_set_irq(icp->output, 0);
}
+static void icp_realize(DeviceState *dev, Error **errp)
+{
+ ICPState *icp = ICP(dev);
+ Object *obj;
+ Error *err = NULL;
+
+ obj = object_property_get_link(OBJECT(dev), "xics", &err);
+ if (!obj) {
+ error_setg(errp, "%s: required link 'xics' not found: %s",
+ __func__, error_get_pretty(err));
+ return;
+ }
+
+ icp->xics = XICS_FABRIC(obj);
+}
+
+
static void icp_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->reset = icp_reset;
dc->vmsd = &vmstate_icp_server;
+ dc->realize = icp_realize;
}
static const TypeInfo icp_info = {
@@ -663,17 +541,6 @@ static void ics_simple_reset(DeviceState *dev)
}
}
-static int ics_simple_post_load(ICSState *ics, int version_id)
-{
- int i;
-
- for (i = 0; i < ics->xics->nr_servers; i++) {
- icp_resend(&ics->xics->ss[i]);
- }
-
- return 0;
-}
-
static void ics_simple_dispatch_pre_save(void *opaque)
{
ICSState *ics = opaque;
@@ -746,15 +613,20 @@ static void ics_simple_realize(DeviceState *dev, Error **errp)
ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs);
}
+static Property ics_simple_properties[] = {
+ DEFINE_PROP_UINT32("nr-irqs", ICSState, nr_irqs, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
static void ics_simple_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
ICSStateClass *isc = ICS_BASE_CLASS(klass);
- dc->realize = ics_simple_realize;
+ isc->realize = ics_simple_realize;
+ dc->props = ics_simple_properties;
dc->vmsd = &vmstate_ics_simple;
dc->reset = ics_simple_reset;
- isc->post_load = ics_simple_post_load;
isc->reject = ics_simple_reject;
isc->resend = ics_simple_resend;
isc->eoi = ics_simple_eoi;
@@ -769,38 +641,69 @@ static const TypeInfo ics_simple_info = {
.instance_init = ics_simple_initfn,
};
+static void ics_base_realize(DeviceState *dev, Error **errp)
+{
+ ICSStateClass *icsc = ICS_BASE_GET_CLASS(dev);
+ ICSState *ics = ICS_BASE(dev);
+ Object *obj;
+ Error *err = NULL;
+
+ obj = object_property_get_link(OBJECT(dev), "xics", &err);
+ if (!obj) {
+ error_setg(errp, "%s: required link 'xics' not found: %s",
+ __func__, error_get_pretty(err));
+ return;
+ }
+ ics->xics = XICS_FABRIC(obj);
+
+
+ if (icsc->realize) {
+ icsc->realize(dev, errp);
+ }
+}
+
+static void ics_base_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = ics_base_realize;
+}
+
static const TypeInfo ics_base_info = {
.name = TYPE_ICS_BASE,
.parent = TYPE_DEVICE,
.abstract = true,
.instance_size = sizeof(ICSState),
+ .class_init = ics_base_class_init,
.class_size = sizeof(ICSStateClass),
};
+static const TypeInfo xics_fabric_info = {
+ .name = TYPE_XICS_FABRIC,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(XICSFabricClass),
+};
+
/*
* Exported functions
*/
-ICSState *xics_find_source(XICSState *xics, int irq)
+qemu_irq xics_get_qirq(XICSFabric *xi, int irq)
{
- ICSState *ics;
+ XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
+ ICSState *ics = xic->ics_get(xi, irq);
- QLIST_FOREACH(ics, &xics->ics, list) {
- if (ics_valid_irq(ics, irq)) {
- return ics;
- }
+ if (ics) {
+ return ics->qirqs[irq - ics->offset];
}
+
return NULL;
}
-qemu_irq xics_get_qirq(XICSState *xics, int irq)
+ICPState *xics_icp_get(XICSFabric *xi, int server)
{
- ICSState *ics = xics_find_source(xics, irq);
+ XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
- if (ics) {
- return ics->qirqs[irq - ics->offset];
- }
-
- return NULL;
+ return xic->icp_get(xi, server);
}
void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
@@ -813,10 +716,10 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
static void xics_register_types(void)
{
- type_register_static(&xics_common_info);
type_register_static(&ics_simple_info);
type_register_static(&ics_base_info);
type_register_static(&icp_info);
+ type_register_static(&xics_fabric_info);
}
type_init(xics_register_types)
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 17694ea..0a3daca 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -40,16 +40,12 @@
#include <sys/ioctl.h>
-typedef struct KVMXICSState {
- XICSState parent_obj;
-
- int kernel_xics_fd;
-} KVMXICSState;
+static int kernel_xics_fd = -1;
/*
* ICP-KVM
*/
-static void icp_get_kvm_state(ICPState *ss)
+static void icp_get_kvm_state(ICPState *icp)
{
uint64_t state;
struct kvm_one_reg reg = {
@@ -59,25 +55,25 @@ static void icp_get_kvm_state(ICPState *ss)
int ret;
/* ICP for this CPU thread is not in use, exiting */
- if (!ss->cs) {
+ if (!icp->cs) {
return;
}
- ret = kvm_vcpu_ioctl(ss->cs, KVM_GET_ONE_REG, &reg);
+ ret = kvm_vcpu_ioctl(icp->cs, KVM_GET_ONE_REG, &reg);
if (ret != 0) {
error_report("Unable to retrieve KVM interrupt controller state"
- " for CPU %ld: %s", kvm_arch_vcpu_id(ss->cs), strerror(errno));
+ " for CPU %ld: %s", kvm_arch_vcpu_id(icp->cs), strerror(errno));
exit(1);
}
- ss->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
- ss->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
+ icp->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
+ icp->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
& KVM_REG_PPC_ICP_MFRR_MASK;
- ss->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
+ icp->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
& KVM_REG_PPC_ICP_PPRI_MASK;
}
-static int icp_set_kvm_state(ICPState *ss, int version_id)
+static int icp_set_kvm_state(ICPState *icp, int version_id)
{
uint64_t state;
struct kvm_one_reg reg = {
@@ -87,18 +83,18 @@ static int icp_set_kvm_state(ICPState *ss, int version_id)
int ret;
/* ICP for this CPU thread is not in use, exiting */
- if (!ss->cs) {
+ if (!icp->cs) {
return 0;
}
- state = ((uint64_t)ss->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
- | ((uint64_t)ss->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
- | ((uint64_t)ss->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
+ state = ((uint64_t)icp->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
+ | ((uint64_t)icp->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
+ | ((uint64_t)icp->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
- ret = kvm_vcpu_ioctl(ss->cs, KVM_SET_ONE_REG, &reg);
+ ret = kvm_vcpu_ioctl(icp->cs, KVM_SET_ONE_REG, &reg);
if (ret != 0) {
error_report("Unable to restore KVM interrupt controller state (0x%"
- PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(ss->cs),
+ PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(icp->cs),
strerror(errno));
return ret;
}
@@ -122,6 +118,34 @@ static void icp_kvm_reset(DeviceState *dev)
icp_set_kvm_state(icp, 1);
}
+static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ int ret;
+
+ if (kernel_xics_fd == -1) {
+ abort();
+ }
+
+ /*
+ * If we are reusing a parked vCPU fd corresponding to the CPU
+ * which was hot-removed earlier we don't have to renable
+ * KVM_CAP_IRQ_XICS capability again.
+ */
+ if (icp->cap_irq_xics_enabled) {
+ return;
+ }
+
+ ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
+ kvm_arch_vcpu_id(cs));
+ if (ret < 0) {
+ error_report("Unable to connect CPU%ld to kernel XICS: %s",
+ kvm_arch_vcpu_id(cs), strerror(errno));
+ exit(1);
+ }
+ icp->cap_irq_xics_enabled = true;
+}
+
static void icp_kvm_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -130,6 +154,7 @@ static void icp_kvm_class_init(ObjectClass *klass, void *data)
dc->reset = icp_kvm_reset;
icpc->pre_save = icp_get_kvm_state;
icpc->post_load = icp_set_kvm_state;
+ icpc->cpu_setup = icp_kvm_cpu_setup;
}
static const TypeInfo icp_kvm_info = {
@@ -145,7 +170,6 @@ static const TypeInfo icp_kvm_info = {
*/
static void ics_get_kvm_state(ICSState *ics)
{
- KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
uint64_t state;
struct kvm_device_attr attr = {
.flags = 0,
@@ -160,7 +184,7 @@ static void ics_get_kvm_state(ICSState *ics)
attr.attr = i + ics->offset;
- ret = ioctl(xicskvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
+ ret = ioctl(kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
if (ret != 0) {
error_report("Unable to retrieve KVM interrupt controller state"
" for IRQ %d: %s", i + ics->offset, strerror(errno));
@@ -204,7 +228,6 @@ static void ics_get_kvm_state(ICSState *ics)
static int ics_set_kvm_state(ICSState *ics, int version_id)
{
- KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
uint64_t state;
struct kvm_device_attr attr = {
.flags = 0,
@@ -238,7 +261,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
}
}
- ret = ioctl(xicskvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
+ ret = ioctl(kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
if (ret != 0) {
error_report("Unable to restore KVM interrupt controller state"
" for IRQs %d: %s", i + ics->offset, strerror(errno));
@@ -308,7 +331,7 @@ static void ics_kvm_class_init(ObjectClass *klass, void *data)
DeviceClass *dc = DEVICE_CLASS(klass);
ICSStateClass *icsc = ICS_BASE_CLASS(klass);
- dc->realize = ics_kvm_realize;
+ icsc->realize = ics_kvm_realize;
dc->reset = ics_kvm_reset;
icsc->pre_save = ics_get_kvm_state;
icsc->post_load = ics_set_kvm_state;
@@ -324,57 +347,6 @@ static const TypeInfo ics_kvm_info = {
/*
* XICS-KVM
*/
-static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
-{
- CPUState *cs;
- ICPState *ss;
- KVMXICSState *xicskvm = XICS_SPAPR_KVM(xics);
- int ret;
-
- cs = CPU(cpu);
- ss = &xics->ss[cs->cpu_index];
-
- assert(cs->cpu_index < xics->nr_servers);
- if (xicskvm->kernel_xics_fd == -1) {
- abort();
- }
-
- /*
- * If we are reusing a parked vCPU fd corresponding to the CPU
- * which was hot-removed earlier we don't have to renable
- * KVM_CAP_IRQ_XICS capability again.
- */
- if (ss->cap_irq_xics_enabled) {
- return;
- }
-
- ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, xicskvm->kernel_xics_fd,
- kvm_arch_vcpu_id(cs));
- if (ret < 0) {
- error_report("Unable to connect CPU%ld to kernel XICS: %s",
- kvm_arch_vcpu_id(cs), strerror(errno));
- exit(1);
- }
- ss->cap_irq_xics_enabled = true;
-}
-
-static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
- Error **errp)
-{
- ICSState *ics = QLIST_FIRST(&xics->ics);
-
- /* This needs to be deprecated ... */
- xics->nr_irqs = nr_irqs;
- if (ics) {
- ics->nr_irqs = nr_irqs;
- }
-}
-
-static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers,
- Error **errp)
-{
- xics_set_nr_servers(xics, nr_servers, TYPE_KVM_ICP, errp);
-}
static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
@@ -385,13 +357,9 @@ static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
__func__);
}
-static void xics_kvm_realize(DeviceState *dev, Error **errp)
+int xics_kvm_init(sPAPRMachineState *spapr, Error **errp)
{
- KVMXICSState *xicskvm = XICS_SPAPR_KVM(dev);
- XICSState *xics = XICS_COMMON(dev);
- ICSState *ics;
- int i, rc;
- Error *error = NULL;
+ int rc;
struct kvm_create_device xics_create_device = {
.type = KVM_DEV_TYPE_XICS,
.flags = 0,
@@ -439,72 +407,24 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
goto fail;
}
- xicskvm->kernel_xics_fd = xics_create_device.fd;
-
- QLIST_FOREACH(ics, &xics->ics, list) {
- object_property_set_bool(OBJECT(ics), true, "realized", &error);
- if (error) {
- error_propagate(errp, error);
- goto fail;
- }
- }
-
- assert(xics->nr_servers);
- for (i = 0; i < xics->nr_servers; i++) {
- object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
- &error);
- if (error) {
- error_propagate(errp, error);
- goto fail;
- }
- }
+ kernel_xics_fd = xics_create_device.fd;
kvm_kernel_irqchip = true;
kvm_msi_via_irqfd_allowed = true;
kvm_gsi_direct_mapping = true;
- return;
+ return rc;
fail:
kvmppc_define_rtas_kernel_token(0, "ibm,set-xive");
kvmppc_define_rtas_kernel_token(0, "ibm,get-xive");
kvmppc_define_rtas_kernel_token(0, "ibm,int-on");
kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
+ return -1;
}
-static void xics_kvm_initfn(Object *obj)
-{
- XICSState *xics = XICS_COMMON(obj);
- ICSState *ics;
-
- ics = ICS_SIMPLE(object_new(TYPE_ICS_KVM));
- object_property_add_child(obj, "ics", OBJECT(ics), NULL);
- ics->xics = xics;
- QLIST_INSERT_HEAD(&xics->ics, ics, list);
-}
-
-static void xics_kvm_class_init(ObjectClass *oc, void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(oc);
- XICSStateClass *xsc = XICS_COMMON_CLASS(oc);
-
- dc->realize = xics_kvm_realize;
- xsc->cpu_setup = xics_kvm_cpu_setup;
- xsc->set_nr_irqs = xics_kvm_set_nr_irqs;
- xsc->set_nr_servers = xics_kvm_set_nr_servers;
-}
-
-static const TypeInfo xics_spapr_kvm_info = {
- .name = TYPE_XICS_SPAPR_KVM,
- .parent = TYPE_XICS_COMMON,
- .instance_size = sizeof(KVMXICSState),
- .class_init = xics_kvm_class_init,
- .instance_init = xics_kvm_initfn,
-};
-
static void xics_kvm_register_types(void)
{
- type_register_static(&xics_spapr_kvm_info);
type_register_static(&ics_kvm_info);
type_register_static(&icp_kvm_info);
}
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 2e3f1c5..84d24b2 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -44,7 +44,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
- ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
target_ulong cppr = args[0];
icp_set_cppr(icp, cppr);
@@ -56,12 +56,13 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
{
target_ulong server = xics_get_cpu_index_by_dt_id(args[0]);
target_ulong mfrr = args[1];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), server);
- if (server >= spapr->xics->nr_servers) {
+ if (!icp) {
return H_PARAMETER;
}
- icp_set_mfrr(spapr->xics->ss + server, mfrr);
+ icp_set_mfrr(icp, mfrr);
return H_SUCCESS;
}
@@ -69,7 +70,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
- ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
uint32_t xirr = icp_accept(icp);
args[0] = xirr;
@@ -80,7 +81,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
- ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
uint32_t xirr = icp_accept(icp);
args[0] = xirr;
@@ -92,7 +93,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
- ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
target_ulong xirr = args[0];
icp_eoi(icp, xirr);
@@ -103,7 +104,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
- ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
uint32_t mfrr;
uint32_t xirr = icp_ipoll(icp, &mfrr);
@@ -118,7 +119,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
- ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+ ICSState *ics = spapr->ics;
uint32_t nr, srcno, server, priority;
if ((nargs != 3) || (nret != 1)) {
@@ -134,7 +135,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
server = xics_get_cpu_index_by_dt_id(rtas_ld(args, 1));
priority = rtas_ld(args, 2);
- if (!ics_valid_irq(ics, nr) || (server >= ics->xics->nr_servers)
+ if (!ics_valid_irq(ics, nr) || !xics_icp_get(XICS_FABRIC(spapr), server)
|| (priority > 0xff)) {
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
@@ -151,7 +152,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
- ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+ ICSState *ics = spapr->ics;
uint32_t nr, srcno;
if ((nargs != 1) || (nret != 3)) {
@@ -181,7 +182,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
- ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+ ICSState *ics = spapr->ics;
uint32_t nr, srcno;
if ((nargs != 1) || (nret != 1)) {
@@ -212,7 +213,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
- ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+ ICSState *ics = spapr->ics;
uint32_t nr, srcno;
if ((nargs != 1) || (nret != 1)) {
@@ -239,36 +240,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
}
-static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
- Error **errp)
+int xics_spapr_init(sPAPRMachineState *spapr, Error **errp)
{
- ICSState *ics = QLIST_FIRST(&xics->ics);
-
- /* This needs to be deprecated ... */
- xics->nr_irqs = nr_irqs;
- if (ics) {
- ics->nr_irqs = nr_irqs;
- }
-}
-
-static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers,
- Error **errp)
-{
- xics_set_nr_servers(xics, nr_servers, TYPE_ICP, errp);
-}
-
-static void xics_spapr_realize(DeviceState *dev, Error **errp)
-{
- XICSState *xics = XICS_SPAPR(dev);
- ICSState *ics;
- Error *error = NULL;
- int i;
-
- if (!xics->nr_servers) {
- error_setg(errp, "Number of servers needs to be greater 0");
- return;
- }
-
/* Registration of global state belongs into realize */
spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
@@ -281,55 +254,9 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
spapr_register_hypercall(H_XIRR_X, h_xirr_x);
spapr_register_hypercall(H_EOI, h_eoi);
spapr_register_hypercall(H_IPOLL, h_ipoll);
-
- QLIST_FOREACH(ics, &xics->ics, list) {
- object_property_set_bool(OBJECT(ics), true, "realized", &error);
- if (error) {
- error_propagate(errp, error);
- return;
- }
- }
-
- for (i = 0; i < xics->nr_servers; i++) {
- object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
- &error);
- if (error) {
- error_propagate(errp, error);
- return;
- }
- }
-}
-
-static void xics_spapr_initfn(Object *obj)
-{
- XICSState *xics = XICS_SPAPR(obj);
- ICSState *ics;
-
- ics = ICS_SIMPLE(object_new(TYPE_ICS_SIMPLE));
- object_property_add_child(obj, "ics", OBJECT(ics), NULL);
- ics->xics = xics;
- QLIST_INSERT_HEAD(&xics->ics, ics, list);
-}
-
-static void xics_spapr_class_init(ObjectClass *oc, void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(oc);
- XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
-
- dc->realize = xics_spapr_realize;
- xsc->set_nr_irqs = xics_spapr_set_nr_irqs;
- xsc->set_nr_servers = xics_spapr_set_nr_servers;
+ return 0;
}
-static const TypeInfo xics_spapr_info = {
- .name = TYPE_XICS_SPAPR,
- .parent = TYPE_XICS_COMMON,
- .instance_size = sizeof(XICSState),
- .class_size = sizeof(XICSStateClass),
- .class_init = xics_spapr_class_init,
- .instance_init = xics_spapr_initfn,
-};
-
#define ICS_IRQ_FREE(ics, srcno) \
(!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
@@ -354,9 +281,8 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
return -1;
}
-int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
+int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp)
{
- ICSState *ics = QLIST_FIRST(&xics->ics);
int irq;
if (!ics) {
@@ -387,10 +313,9 @@ int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
* Allocate block of consecutive IRQs, and return the number of the first IRQ in
* the block. If align==true, aligns the first IRQ number to num.
*/
-int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align,
- Error **errp)
+int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
+ bool align, Error **errp)
{
- ICSState *ics = QLIST_FIRST(&xics->ics);
int i, first = -1;
if (!ics) {
@@ -440,20 +365,18 @@ static void ics_free(ICSState *ics, int srcno, int num)
}
}
-void xics_spapr_free(XICSState *xics, int irq, int num)
+void spapr_ics_free(ICSState *ics, int irq, int num)
{
- ICSState *ics = xics_find_source(xics, irq);
-
- if (ics) {
+ if (ics_valid_irq(ics, irq)) {
trace_xics_ics_free(0, irq, num);
ics_free(ics, irq - ics->offset, num);
}
}
-void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
+void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle)
{
uint32_t interrupt_server_ranges_prop[] = {
- 0, cpu_to_be32(xics->nr_servers),
+ 0, cpu_to_be32(nr_servers),
};
int node;
@@ -470,10 +393,3 @@ void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
_FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle));
_FDT(fdt_setprop_cell(fdt, node, "phandle", phandle));
}
-
-static void xics_spapr_register_types(void)
-{
- type_register_static(&xics_spapr_info);
-}
-
-type_init(xics_spapr_register_types)
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 65ba188..aa5d2c1 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -141,9 +141,17 @@ static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr,
static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp)
{
sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev);
+ int ret;
if (nvram->blk) {
nvram->size = blk_getlength(nvram->blk);
+
+ ret = blk_set_perm(nvram->blk,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+ BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
} else {
nvram->size = DEFAULT_NVRAM_SIZE;
}
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index a563555..273f1e4 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1530,6 +1530,34 @@ static const pci_class_desc pci_class_descriptions[] =
{ 0, NULL}
};
+static void pci_for_each_device_under_bus_reverse(PCIBus *bus,
+ void (*fn)(PCIBus *b,
+ PCIDevice *d,
+ void *opaque),
+ void *opaque)
+{
+ PCIDevice *d;
+ int devfn;
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ d = bus->devices[ARRAY_SIZE(bus->devices) - 1 - devfn];
+ if (d) {
+ fn(bus, d, opaque);
+ }
+ }
+}
+
+void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
+ void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
+ void *opaque)
+{
+ bus = pci_find_bus_nr(bus, bus_num);
+
+ if (bus) {
+ pci_for_each_device_under_bus_reverse(bus, fn, opaque);
+ }
+}
+
static void pci_for_each_device_under_bus(PCIBus *bus,
void (*fn)(PCIBus *b, PCIDevice *d,
void *opaque),
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 87d8366..81c6c1c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -63,6 +63,7 @@
#include "qemu/error-report.h"
#include "trace.h"
#include "hw/nmi.h"
+#include "hw/intc/intc.h"
#include "hw/compat.h"
#include "qemu/cutils.h"
@@ -95,37 +96,68 @@
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
-static XICSState *try_create_xics(const char *type, int nr_servers,
- int nr_irqs, Error **errp)
+static int try_create_xics(sPAPRMachineState *spapr, const char *type_ics,
+ const char *type_icp, int nr_servers,
+ int nr_irqs, Error **errp)
{
- Error *err = NULL;
- DeviceState *dev;
+ XICSFabric *xi = XICS_FABRIC(spapr);
+ Error *err = NULL, *local_err = NULL;
+ ICSState *ics = NULL;
+ int i;
- dev = qdev_create(NULL, type);
- qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
- qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
- object_property_set_bool(OBJECT(dev), true, "realized", &err);
+ ics = ICS_SIMPLE(object_new(type_ics));
+ qdev_set_parent_bus(DEVICE(ics), sysbus_get_default());
+ object_property_add_child(OBJECT(spapr), "ics", OBJECT(ics), NULL);
+ object_property_set_int(OBJECT(ics), nr_irqs, "nr-irqs", &err);
+ object_property_add_const_link(OBJECT(ics), "xics", OBJECT(xi), NULL);
+ object_property_set_bool(OBJECT(ics), true, "realized", &local_err);
+ error_propagate(&err, local_err);
if (err) {
- error_propagate(errp, err);
- object_unparent(OBJECT(dev));
- return NULL;
+ goto error;
}
- return XICS_COMMON(dev);
+
+ spapr->icps = g_malloc0(nr_servers * sizeof(ICPState));
+ spapr->nr_servers = nr_servers;
+
+ for (i = 0; i < nr_servers; i++) {
+ ICPState *icp = &spapr->icps[i];
+
+ object_initialize(icp, sizeof(*icp), type_icp);
+ qdev_set_parent_bus(DEVICE(icp), sysbus_get_default());
+ object_property_add_child(OBJECT(spapr), "icp[*]", OBJECT(icp), NULL);
+ object_property_add_const_link(OBJECT(icp), "xics", OBJECT(xi), NULL);
+ object_property_set_bool(OBJECT(icp), true, "realized", &err);
+ if (err) {
+ goto error;
+ }
+ object_unref(OBJECT(icp));
+ }
+
+ spapr->ics = ics;
+ return 0;
+
+error:
+ error_propagate(errp, err);
+ if (ics) {
+ object_unparent(OBJECT(ics));
+ }
+ return -1;
}
-static XICSState *xics_system_init(MachineState *machine,
- int nr_servers, int nr_irqs, Error **errp)
+static int xics_system_init(MachineState *machine,
+ int nr_servers, int nr_irqs, Error **errp)
{
- XICSState *xics = NULL;
+ int rc = -1;
if (kvm_enabled()) {
Error *err = NULL;
- if (machine_kernel_irqchip_allowed(machine)) {
- xics = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs,
- &err);
+ if (machine_kernel_irqchip_allowed(machine) &&
+ !xics_kvm_init(SPAPR_MACHINE(machine), errp)) {
+ rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_KVM,
+ TYPE_KVM_ICP, nr_servers, nr_irqs, &err);
}
- if (machine_kernel_irqchip_required(machine) && !xics) {
+ if (machine_kernel_irqchip_required(machine) && rc < 0) {
error_reportf_err(err,
"kernel_irqchip requested but unavailable: ");
} else {
@@ -133,11 +165,13 @@ static XICSState *xics_system_init(MachineState *machine,
}
}
- if (!xics) {
- xics = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, errp);
+ if (rc < 0) {
+ xics_spapr_init(SPAPR_MACHINE(machine), errp);
+ rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_SIMPLE,
+ TYPE_ICP, nr_servers, nr_irqs, errp);
}
- return xics;
+ return rc;
}
static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
@@ -924,7 +958,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
_FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
/* /interrupt controller */
- spapr_dt_xics(spapr->xics, fdt, PHANDLE_XICP);
+ spapr_dt_xics(spapr->nr_servers, fdt, PHANDLE_XICP);
ret = spapr_populate_memory(spapr, fdt);
if (ret < 0) {
@@ -1053,6 +1087,62 @@ static void close_htab_fd(sPAPRMachineState *spapr)
spapr->htab_fd = -1;
}
+static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+ return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1;
+}
+
+static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp,
+ hwaddr ptex, int n)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+ hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
+
+ if (!spapr->htab) {
+ /*
+ * HTAB is controlled by KVM. Fetch into temporary buffer
+ */
+ ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64);
+ kvmppc_read_hptes(hptes, ptex, n);
+ return hptes;
+ }
+
+ /*
+ * HTAB is controlled by QEMU. Just point to the internally
+ * accessible PTEG.
+ */
+ return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset);
+}
+
+static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
+ const ppc_hash_pte64_t *hptes,
+ hwaddr ptex, int n)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+ if (!spapr->htab) {
+ g_free((void *)hptes);
+ }
+
+ /* Nothing to do for qemu managed HPT */
+}
+
+static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+ uint64_t pte0, uint64_t pte1)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+ hwaddr offset = ptex * HASH_PTE_SIZE_64;
+
+ if (!spapr->htab) {
+ kvmppc_write_hpte(ptex, pte0, pte1);
+ } else {
+ stq_p(spapr->htab + offset, pte0);
+ stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
+ }
+}
+
static int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
{
int shift;
@@ -1252,6 +1342,13 @@ static int spapr_post_load(void *opaque, int version_id)
sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
int err = 0;
+ if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
+ int i;
+ for (i = 0; i < spapr->nr_servers; i++) {
+ icp_resend(&spapr->icps[i]);
+ }
+ }
+
/* In earlier versions, there was no separate qdev for the PAPR
* RTC, so the RTC offset was stored directly in sPAPREnvironment.
* So when migrating from those versions, poke the incoming offset
@@ -1902,9 +1999,8 @@ static void ppc_spapr_init(MachineState *machine)
load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
/* Set up Interrupt Controller before we create the VCPUs */
- spapr->xics = xics_system_init(machine,
- DIV_ROUND_UP(max_cpus * smt, smp_threads),
- XICS_IRQS_SPAPR, &error_fatal);
+ xics_system_init(machine, DIV_ROUND_UP(max_cpus * smt, smp_threads),
+ XICS_IRQS_SPAPR, &error_fatal);
/* Set up containers for ibm,client-set-architecture negotiated options */
spapr->ov5 = spapr_ovec_new();
@@ -2872,6 +2968,40 @@ static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
*mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
}
+static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
+
+ return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL;
+}
+
+static void spapr_ics_resend(XICSFabric *dev)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
+
+ ics_resend(spapr->ics);
+}
+
+static ICPState *spapr_icp_get(XICSFabric *xi, int server)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
+
+ return (server < spapr->nr_servers) ? &spapr->icps[server] : NULL;
+}
+
+static void spapr_pic_print_info(InterruptStatsProvider *obj,
+ Monitor *mon)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+ int i;
+
+ for (i = 0; i < spapr->nr_servers; i++) {
+ icp_pic_print_info(&spapr->icps[i], mon);
+ }
+
+ ics_pic_print_info(spapr->ics, mon);
+}
+
static void spapr_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -2880,6 +3010,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
NMIClass *nc = NMI_CLASS(oc);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+ XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
+ InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
mc->desc = "pSeries Logical Partition (PAPR compliant)";
@@ -2891,7 +3023,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
mc->init = ppc_spapr_init;
mc->reset = ppc_spapr_reset;
mc->block_default_type = IF_SCSI;
- mc->max_cpus = 255;
+ mc->max_cpus = 1024;
mc->no_parallel = 1;
mc->default_boot_order = "";
mc->default_ram_size = 512 * M_BYTE;
@@ -2913,6 +3045,14 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
nc->nmi_monitor_handler = spapr_nmi;
smc->phb_placement = spapr_phb_placement;
vhc->hypercall = emulate_spapr_hypercall;
+ vhc->hpt_mask = spapr_hpt_mask;
+ vhc->map_hptes = spapr_map_hptes;
+ vhc->unmap_hptes = spapr_unmap_hptes;
+ vhc->store_hpte = spapr_store_hpte;
+ xic->ics_get = spapr_ics_get;
+ xic->ics_resend = spapr_ics_resend;
+ xic->icp_get = spapr_icp_get;
+ ispc->print_info = spapr_pic_print_info;
}
static const TypeInfo spapr_machine_info = {
@@ -2929,6 +3069,8 @@ static const TypeInfo spapr_machine_info = {
{ TYPE_NMI },
{ TYPE_HOTPLUG_HANDLER },
{ TYPE_PPC_VIRTUAL_HYPERVISOR },
+ { TYPE_XICS_FABRIC },
+ { TYPE_INTERRUPT_STATS_PROVIDER },
{ }
},
};
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 55cd045..90d682f 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -13,10 +13,12 @@
#include "hw/boards.h"
#include "qapi/error.h"
#include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
#include "target/ppc/kvm_ppc.h"
#include "hw/ppc/ppc.h"
#include "target/ppc/mmu-hash64.h"
#include "sysemu/numa.h"
+#include "qemu/error-report.h"
static void spapr_cpu_reset(void *opaque)
{
@@ -34,15 +36,26 @@ static void spapr_cpu_reset(void *opaque)
env->spr[SPR_HIOR] = 0;
- ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift,
- &error_fatal);
+ /*
+ * This is a hack for the benefit of KVM PR - it abuses the SDR1
+ * slot in kvm_sregs to communicate the userspace address of the
+ * HPT
+ */
+ if (kvm_enabled()) {
+ env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab
+ | (spapr->htab_shift - 18);
+ if (kvmppc_put_books_sregs(cpu) < 0) {
+ error_report("Unable to update SDR1 in KVM");
+ exit(1);
+ }
+ }
}
static void spapr_cpu_destroy(PowerPCCPU *cpu)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
- xics_cpu_destroy(spapr->xics, cpu);
+ xics_cpu_destroy(XICS_FABRIC(spapr), cpu);
qemu_unregister_reset(spapr_cpu_reset, cpu);
}
@@ -57,8 +70,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
/* Enable PAPR mode in TCG or KVM */
- cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
- cpu_ppc_set_papr(cpu);
+ cpu_ppc_set_papr(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
if (cpu->max_compat) {
Error *local_err = NULL;
@@ -76,7 +88,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
cs->numa_node = i;
}
- xics_cpu_setup(spapr->xics, cpu);
+ xics_cpu_setup(XICS_FABRIC(spapr), cpu);
qemu_register_reset(spapr_cpu_reset, cpu);
spapr_cpu_reset(cpu);
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index f85a9c3..24a5758 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -481,7 +481,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
- qemu_irq_pulse(xics_get_qirq(spapr->xics,
+ qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
rtas_event_log_to_irq(spapr,
RTAS_LOG_TYPE_EPOW)));
}
@@ -574,7 +574,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
- qemu_irq_pulse(xics_get_qirq(spapr->xics,
+ qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
rtas_event_log_to_irq(spapr,
RTAS_LOG_TYPE_HOTPLUG)));
}
@@ -695,7 +695,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
spapr_event_sources_get_source(spapr->event_sources, i);
g_assert(source->enabled);
- qemu_irq_pulse(xics_get_qirq(spapr->xics, source->irq));
+ qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), source->irq));
}
}
@@ -752,7 +752,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
spapr->event_sources = spapr_event_sources_new();
spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW,
- xics_spapr_alloc(spapr->xics, 0, false,
+ spapr_ics_alloc(spapr->ics, 0, false,
&error_fatal));
/* NOTE: if machine supports modern/dedicated hotplug event source,
@@ -765,7 +765,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
*/
if (spapr->use_hotplug_event_source) {
spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG,
- xics_spapr_alloc(spapr->xics, 0, false,
+ spapr_ics_alloc(spapr->ics, 0, false,
&error_fatal));
}
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 42d20e0..f05a90e 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -47,12 +47,12 @@ static bool has_spr(PowerPCCPU *cpu, int spr)
return cpu->env.spr_cb[spr].name != NULL;
}
-static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index)
+static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
{
/*
- * hash value/pteg group index is normalized by htab_mask
+ * hash value/pteg group index is normalized by HPT mask
*/
- if (((pte_index & ~7ULL) / HPTES_PER_GROUP) & ~env->htab_mask) {
+ if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
return false;
}
return true;
@@ -77,15 +77,14 @@ static bool is_ram_address(sPAPRMachineState *spapr, hwaddr addr)
static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
- CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
- target_ulong pte_index = args[1];
+ target_ulong ptex = args[1];
target_ulong pteh = args[2];
target_ulong ptel = args[3];
unsigned apshift;
target_ulong raddr;
- target_ulong index;
- uint64_t token;
+ target_ulong slot;
+ const ppc_hash_pte64_t *hptes;
apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
if (!apshift) {
@@ -116,36 +115,36 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
pteh &= ~0x60ULL;
- if (!valid_pte_index(env, pte_index)) {
+ if (!valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
- index = 0;
+ slot = ptex & 7ULL;
+ ptex = ptex & ~7ULL;
+
if (likely((flags & H_EXACT) == 0)) {
- pte_index &= ~7ULL;
- token = ppc_hash64_start_access(cpu, pte_index);
- for (; index < 8; index++) {
- if (!(ppc_hash64_load_hpte0(cpu, token, index) & HPTE64_V_VALID)) {
+ hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+ for (slot = 0; slot < 8; slot++) {
+ if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
break;
}
}
- ppc_hash64_stop_access(cpu, token);
- if (index == 8) {
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+ if (slot == 8) {
return H_PTEG_FULL;
}
} else {
- token = ppc_hash64_start_access(cpu, pte_index);
- if (ppc_hash64_load_hpte0(cpu, token, 0) & HPTE64_V_VALID) {
- ppc_hash64_stop_access(cpu, token);
+ hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
+ if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
return H_PTEG_FULL;
}
- ppc_hash64_stop_access(cpu, token);
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
}
- ppc_hash64_store_hpte(cpu, pte_index + index,
- pteh | HPTE64_V_HPTE_DIRTY, ptel);
+ ppc_hash64_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
- args[0] = pte_index + index;
+ args[0] = ptex + slot;
return H_SUCCESS;
}
@@ -161,18 +160,17 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
target_ulong flags,
target_ulong *vp, target_ulong *rp)
{
- CPUPPCState *env = &cpu->env;
- uint64_t token;
+ const ppc_hash_pte64_t *hptes;
target_ulong v, r;
- if (!valid_pte_index(env, ptex)) {
+ if (!valid_ptex(cpu, ptex)) {
return REMOVE_PARM;
}
- token = ppc_hash64_start_access(cpu, ptex);
- v = ppc_hash64_load_hpte0(cpu, token, 0);
- r = ppc_hash64_load_hpte1(cpu, token, 0);
- ppc_hash64_stop_access(cpu, token);
+ hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+ v = ppc_hash64_hpte0(cpu, hptes, 0);
+ r = ppc_hash64_hpte1(cpu, hptes, 0);
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
if ((v & HPTE64_V_VALID) == 0 ||
((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
@@ -191,11 +189,11 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
{
CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
- target_ulong pte_index = args[1];
+ target_ulong ptex = args[1];
target_ulong avpn = args[2];
RemoveResult ret;
- ret = remove_hpte(cpu, pte_index, avpn, flags,
+ ret = remove_hpte(cpu, ptex, avpn, flags,
&args[0], &args[1]);
switch (ret) {
@@ -291,19 +289,19 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
{
CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
- target_ulong pte_index = args[1];
+ target_ulong ptex = args[1];
target_ulong avpn = args[2];
- uint64_t token;
+ const ppc_hash_pte64_t *hptes;
target_ulong v, r;
- if (!valid_pte_index(env, pte_index)) {
+ if (!valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
- token = ppc_hash64_start_access(cpu, pte_index);
- v = ppc_hash64_load_hpte0(cpu, token, 0);
- r = ppc_hash64_load_hpte1(cpu, token, 0);
- ppc_hash64_stop_access(cpu, token);
+ hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+ v = ppc_hash64_hpte0(cpu, hptes, 0);
+ r = ppc_hash64_hpte1(cpu, hptes, 0);
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
if ((v & HPTE64_V_VALID) == 0 ||
((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
@@ -315,36 +313,35 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
r |= (flags << 55) & HPTE64_R_PP0;
r |= (flags << 48) & HPTE64_R_KEY_HI;
r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
- ppc_hash64_store_hpte(cpu, pte_index,
+ ppc_hash64_store_hpte(cpu, ptex,
(v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
- ppc_hash64_tlb_flush_hpte(cpu, pte_index, v, r);
+ ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
/* Flush the tlb */
check_tlb_flush(env, true);
/* Don't need a memory barrier, due to qemu's global lock */
- ppc_hash64_store_hpte(cpu, pte_index, v | HPTE64_V_HPTE_DIRTY, r);
+ ppc_hash64_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
return H_SUCCESS;
}
static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
- CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
- target_ulong pte_index = args[1];
+ target_ulong ptex = args[1];
uint8_t *hpte;
int i, ridx, n_entries = 1;
- if (!valid_pte_index(env, pte_index)) {
+ if (!valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
if (flags & H_READ_4) {
/* Clear the two low order bits */
- pte_index &= ~(3ULL);
+ ptex &= ~(3ULL);
n_entries = 4;
}
- hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
+ hpte = spapr->htab + (ptex * HASH_PTE_SIZE_64);
for (i = 0, ridx = 0; i < n_entries; i++) {
args[ridx++] = ldq_p(hpte);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index fd6fc1d..2a3499e 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -43,6 +43,7 @@
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_ids.h"
#include "hw/ppc/spapr_drc.h"
#include "sysemu/device_tree.h"
#include "sysemu/kvm.h"
@@ -325,7 +326,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
return;
}
- xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
+ spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
if (msi_present(pdev)) {
spapr_msi_setmsg(pdev, 0, false, 0, 0);
}
@@ -363,7 +364,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
}
/* Allocate MSIs */
- irq = xics_spapr_alloc_block(spapr->xics, req_num, false,
+ irq = spapr_ics_alloc_block(spapr->ics, req_num, false,
ret_intr_type == RTAS_TYPE_MSI, &err);
if (err) {
error_reportf_err(err, "Can't allocate MSIs for device %x: ",
@@ -374,7 +375,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
/* Release previous MSIs */
if (msi) {
- xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
+ spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
g_hash_table_remove(phb->msi, &config_addr);
}
@@ -736,7 +737,7 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
trace_spapr_pci_msi_write(addr, data, irq);
- qemu_irq_pulse(xics_get_qirq(spapr->xics, irq));
+ qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), irq));
}
static const MemoryRegionOps spapr_msi_ops = {
@@ -946,6 +947,274 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
rp->assigned_len = assigned_idx * sizeof(ResourceFields);
}
+typedef struct PCIClass PCIClass;
+typedef struct PCISubClass PCISubClass;
+typedef struct PCIIFace PCIIFace;
+
+struct PCIIFace {
+ int iface;
+ const char *name;
+};
+
+struct PCISubClass {
+ int subclass;
+ const char *name;
+ const PCIIFace *iface;
+};
+
+struct PCIClass {
+ const char *name;
+ const PCISubClass *subc;
+};
+
+static const PCISubClass undef_subclass[] = {
+ { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mass_subclass[] = {
+ { PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
+ { PCI_CLASS_STORAGE_IDE, "ide", NULL },
+ { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
+ { PCI_CLASS_STORAGE_IPI, "ipi", NULL },
+ { PCI_CLASS_STORAGE_RAID, "raid", NULL },
+ { PCI_CLASS_STORAGE_ATA, "ata", NULL },
+ { PCI_CLASS_STORAGE_SATA, "sata", NULL },
+ { PCI_CLASS_STORAGE_SAS, "sas", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass net_subclass[] = {
+ { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
+ { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
+ { PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
+ { PCI_CLASS_NETWORK_ATM, "atm", NULL },
+ { PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
+ { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
+ { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass displ_subclass[] = {
+ { PCI_CLASS_DISPLAY_VGA, "vga", NULL },
+ { PCI_CLASS_DISPLAY_XGA, "xga", NULL },
+ { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass media_subclass[] = {
+ { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
+ { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
+ { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mem_subclass[] = {
+ { PCI_CLASS_MEMORY_RAM, "memory", NULL },
+ { PCI_CLASS_MEMORY_FLASH, "flash", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass bridg_subclass[] = {
+ { PCI_CLASS_BRIDGE_HOST, "host", NULL },
+ { PCI_CLASS_BRIDGE_ISA, "isa", NULL },
+ { PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
+ { PCI_CLASS_BRIDGE_MC, "mca", NULL },
+ { PCI_CLASS_BRIDGE_PCI, "pci", NULL },
+ { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
+ { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
+ { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
+ { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
+ { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL },
+ { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass comm_subclass[] = {
+ { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL },
+ { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL },
+ { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL },
+ { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL },
+ { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL },
+ { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL },
+ { 0xFF, NULL, NULL, },
+};
+
+static const PCIIFace pic_iface[] = {
+ { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" },
+ { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" },
+ { 0xFF, NULL },
+};
+
+static const PCISubClass sys_subclass[] = {
+ { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface },
+ { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL },
+ { PCI_CLASS_SYSTEM_TIMER, "timer", NULL },
+ { PCI_CLASS_SYSTEM_RTC, "rtc", NULL },
+ { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL },
+ { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass inp_subclass[] = {
+ { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL },
+ { PCI_CLASS_INPUT_PEN, "pen", NULL },
+ { PCI_CLASS_INPUT_MOUSE, "mouse", NULL },
+ { PCI_CLASS_INPUT_SCANNER, "scanner", NULL },
+ { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass dock_subclass[] = {
+ { PCI_CLASS_DOCKING_GENERIC, "dock", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass cpu_subclass[] = {
+ { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL },
+ { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL },
+ { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL },
+ { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCIIFace usb_iface[] = {
+ { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" },
+ { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", },
+ { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" },
+ { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" },
+ { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" },
+ { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" },
+ { 0xFF, NULL },
+};
+
+static const PCISubClass ser_subclass[] = {
+ { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL },
+ { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL },
+ { PCI_CLASS_SERIAL_SSA, "ssa", NULL },
+ { PCI_CLASS_SERIAL_USB, "usb", usb_iface },
+ { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL },
+ { PCI_CLASS_SERIAL_SMBUS, "smb", NULL },
+ { PCI_CLASS_SERIAL_IB, "infiniband", NULL },
+ { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL },
+ { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL },
+ { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass wrl_subclass[] = {
+ { PCI_CLASS_WIRELESS_IRDA, "irda", NULL },
+ { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL },
+ { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL },
+ { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL },
+ { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass sat_subclass[] = {
+ { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL },
+ { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL },
+ { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL },
+ { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass crypt_subclass[] = {
+ { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL },
+ { PCI_CLASS_CRYPT_ENTERTAINMENT,
+ "entertainment-encryption", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass spc_subclass[] = {
+ { PCI_CLASS_SP_DPIO, "dpio", NULL },
+ { PCI_CLASS_SP_PERF, "counter", NULL },
+ { PCI_CLASS_SP_SYNCH, "measurement", NULL },
+ { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCIClass pci_classes[] = {
+ { "legacy-device", undef_subclass },
+ { "mass-storage", mass_subclass },
+ { "network", net_subclass },
+ { "display", displ_subclass, },
+ { "multimedia-device", media_subclass },
+ { "memory-controller", mem_subclass },
+ { "unknown-bridge", bridg_subclass },
+ { "communication-controller", comm_subclass},
+ { "system-peripheral", sys_subclass },
+ { "input-controller", inp_subclass },
+ { "docking-station", dock_subclass },
+ { "cpu", cpu_subclass },
+ { "serial-bus", ser_subclass },
+ { "wireless-controller", wrl_subclass },
+ { "intelligent-io", NULL },
+ { "satellite-device", sat_subclass },
+ { "encryption", crypt_subclass },
+ { "data-processing-controller", spc_subclass },
+};
+
+static const char *pci_find_device_name(uint8_t class, uint8_t subclass,
+ uint8_t iface)
+{
+ const PCIClass *pclass;
+ const PCISubClass *psubclass;
+ const PCIIFace *piface;
+ const char *name;
+
+ if (class >= ARRAY_SIZE(pci_classes)) {
+ return "pci";
+ }
+
+ pclass = pci_classes + class;
+ name = pclass->name;
+
+ if (pclass->subc == NULL) {
+ return name;
+ }
+
+ psubclass = pclass->subc;
+ while ((psubclass->subclass & 0xff) != 0xff) {
+ if ((psubclass->subclass & 0xff) == subclass) {
+ name = psubclass->name;
+ break;
+ }
+ psubclass++;
+ }
+
+ piface = psubclass->iface;
+ if (piface == NULL) {
+ return name;
+ }
+ while ((piface->iface & 0xff) != 0xff) {
+ if ((piface->iface & 0xff) == iface) {
+ name = piface->name;
+ break;
+ }
+ piface++;
+ }
+
+ return name;
+}
+
+static void pci_get_node_name(char *nodename, int len, PCIDevice *dev)
+{
+ int slot = PCI_SLOT(dev->devfn);
+ int func = PCI_FUNC(dev->devfn);
+ uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
+ const char *name;
+
+ name = pci_find_device_name((ccode >> 16) & 0xff, (ccode >> 8) & 0xff,
+ ccode & 0xff);
+
+ if (func != 0) {
+ snprintf(nodename, len, "%s@%x,%x", name, slot, func);
+ } else {
+ snprintf(nodename, len, "%s@%x", name, slot);
+ }
+}
+
static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
PCIDevice *pdev);
@@ -957,6 +1226,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
int pci_status, err;
char *buf = NULL;
uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
+ uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
uint32_t max_msi, max_msix;
if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
@@ -971,8 +1241,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
pci_default_read_config(dev, PCI_DEVICE_ID, 2)));
_FDT(fdt_setprop_cell(fdt, offset, "revision-id",
pci_default_read_config(dev, PCI_REVISION_ID, 1)));
- _FDT(fdt_setprop_cell(fdt, offset, "class-code",
- pci_default_read_config(dev, PCI_CLASS_PROG, 3)));
+ _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode));
if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) {
_FDT(fdt_setprop_cell(fdt, offset, "interrupts",
pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)));
@@ -1013,11 +1282,10 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
_FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
}
- /* NOTE: this is normally generated by firmware via path/unit name,
- * but in our case we must set it manually since it does not get
- * processed by OF beforehand
- */
- _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
+ _FDT(fdt_setprop_string(fdt, offset, "name",
+ pci_find_device_name((ccode >> 16) & 0xff,
+ (ccode >> 8) & 0xff,
+ ccode & 0xff)));
buf = spapr_phb_get_loc_code(sphb, dev);
if (!buf) {
error_report("Failed setting the ibm,loc-code");
@@ -1061,15 +1329,9 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
void *fdt, int node_offset)
{
int offset, ret;
- int slot = PCI_SLOT(dev->devfn);
- int func = PCI_FUNC(dev->devfn);
char nodename[FDT_NAME_MAX];
- if (func != 0) {
- snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func);
- } else {
- snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot);
- }
+ pci_get_node_name(nodename, FDT_NAME_MAX, dev);
offset = fdt_add_subnode(fdt, node_offset, nodename);
ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb);
@@ -1485,7 +1747,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
uint32_t irq;
Error *local_err = NULL;
- irq = xics_spapr_alloc_block(spapr->xics, 1, true, false, &local_err);
+ irq = spapr_ics_alloc_block(spapr->ics, 1, true, false, &local_err);
if (local_err) {
error_propagate(errp, local_err);
error_prepend(errp, "can't allocate LSIs: ");
@@ -1782,9 +2044,9 @@ static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
s_fdt.fdt = p->fdt;
s_fdt.node_off = offset;
s_fdt.sphb = p->sphb;
- pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
- spapr_populate_pci_devices_dt,
- &s_fdt);
+ pci_for_each_device_reverse(sec_bus, pci_bus_num(sec_bus),
+ spapr_populate_pci_devices_dt,
+ &s_fdt);
}
static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
@@ -1953,9 +2215,9 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
s_fdt.fdt = fdt;
s_fdt.node_off = bus_off;
s_fdt.sphb = phb;
- pci_for_each_device(bus, pci_bus_num(bus),
- spapr_populate_pci_devices_dt,
- &s_fdt);
+ pci_for_each_device_reverse(bus, pci_bus_num(bus),
+ spapr_populate_pci_devices_dt,
+ &s_fdt);
ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
SPAPR_DR_CONNECTOR_TYPE_PCI);
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 8bfc5f9..a0ee4fd 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -454,7 +454,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
dev->qdev.id = id;
}
- dev->irq = xics_spapr_alloc(spapr->xics, dev->irq, false, &local_err);
+ dev->irq = spapr_ics_alloc(spapr->ics, dev->irq, false, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 2e2664f..7978c7d 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -20,6 +20,7 @@
#include "hw/s390x/virtio-ccw.h"
#include "hw/s390x/css.h"
#include "ipl.h"
+#include "qemu/error-report.h"
#define KERN_IMAGE_START 0x010000UL
#define KERN_PARM_AREA 0x010480UL
@@ -209,6 +210,7 @@ static Property s390_ipl_properties[] = {
DEFINE_PROP_STRING("initrd", S390IPLState, initrd),
DEFINE_PROP_STRING("cmdline", S390IPLState, cmdline),
DEFINE_PROP_STRING("firmware", S390IPLState, firmware),
+ DEFINE_PROP_STRING("netboot_fw", S390IPLState, netboot_fw),
DEFINE_PROP_BOOL("enforce_bios", S390IPLState, enforce_bios, false),
DEFINE_PROP_BOOL("iplbext_migration", S390IPLState, iplbext_migration,
true),
@@ -226,6 +228,12 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
TYPE_VIRTIO_CCW_DEVICE);
SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st),
TYPE_SCSI_DEVICE);
+ VirtIONet *vn = (VirtIONet *) object_dynamic_cast(OBJECT(dev_st),
+ TYPE_VIRTIO_NET);
+
+ if (vn) {
+ ipl->netboot = true;
+ }
if (virtio_ccw_dev) {
CcwDevice *ccw_dev = CCW_DEVICE(virtio_ccw_dev);
@@ -258,12 +266,86 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
return false;
}
+static int load_netboot_image(Error **errp)
+{
+ S390IPLState *ipl = get_ipl_device();
+ char *netboot_filename;
+ MemoryRegion *sysmem = get_system_memory();
+ MemoryRegion *mr = NULL;
+ void *ram_ptr = NULL;
+ int img_size = -1;
+
+ mr = memory_region_find(sysmem, 0, 1).mr;
+ if (!mr) {
+ error_setg(errp, "Failed to find memory region at address 0");
+ return -1;
+ }
+
+ ram_ptr = memory_region_get_ram_ptr(mr);
+ if (!ram_ptr) {
+ error_setg(errp, "No RAM found");
+ goto unref_mr;
+ }
+
+ netboot_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, ipl->netboot_fw);
+ if (netboot_filename == NULL) {
+ error_setg(errp, "Could not find network bootloader");
+ goto unref_mr;
+ }
+
+ img_size = load_elf_ram(netboot_filename, NULL, NULL, &ipl->start_addr,
+ NULL, NULL, 1, EM_S390, 0, 0, NULL, false);
+
+ if (img_size < 0) {
+ img_size = load_image_size(netboot_filename, ram_ptr, ram_size);
+ ipl->start_addr = KERN_IMAGE_START;
+ }
+
+ if (img_size < 0) {
+ error_setg(errp, "Failed to load network bootloader");
+ }
+
+ g_free(netboot_filename);
+
+unref_mr:
+ memory_region_unref(mr);
+ return img_size;
+}
+
+static bool is_virtio_net_device(IplParameterBlock *iplb)
+{
+ uint8_t cssid;
+ uint8_t ssid;
+ uint16_t devno;
+ uint16_t schid;
+ SubchDev *sch = NULL;
+
+ if (iplb->pbt != S390_IPL_TYPE_CCW) {
+ return false;
+ }
+
+ devno = be16_to_cpu(iplb->ccw.devno);
+ ssid = iplb->ccw.ssid & 3;
+
+ for (schid = 0; schid < MAX_SCHID; schid++) {
+ for (cssid = 0; cssid < MAX_CSSID; cssid++) {
+ sch = css_find_subch(1, cssid, ssid, schid);
+
+ if (sch && sch->devno == devno) {
+ return sch->id.cu_model == VIRTIO_ID_NET;
+ }
+ }
+ }
+ return false;
+}
+
void s390_ipl_update_diag308(IplParameterBlock *iplb)
{
S390IPLState *ipl = get_ipl_device();
ipl->iplb = *iplb;
ipl->iplb_valid = true;
+ ipl->netboot = is_virtio_net_device(iplb);
}
IplParameterBlock *s390_ipl_get_iplb(void)
@@ -287,6 +369,7 @@ void s390_reipl_request(void)
void s390_ipl_prepare_cpu(S390CPU *cpu)
{
S390IPLState *ipl = get_ipl_device();
+ Error *err = NULL;
cpu->env.psw.addr = ipl->start_addr;
cpu->env.psw.mask = IPL_PSW_MASK;
@@ -297,6 +380,13 @@ void s390_ipl_prepare_cpu(S390CPU *cpu)
ipl->iplb_valid = s390_gen_initial_iplb(ipl);
}
}
+ if (ipl->netboot) {
+ if (load_netboot_image(&err) < 0) {
+ error_report_err(err);
+ vm_stop(RUN_STATE_INTERNAL_ERROR);
+ }
+ ipl->iplb.ccw.netboot_start_addr = ipl->start_addr;
+ }
}
static void s390_ipl_reset(DeviceState *dev)
diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index c891095..46930e4 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -16,7 +16,8 @@
#include "cpu.h"
struct IplBlockCcw {
- uint8_t reserved0[85];
+ uint64_t netboot_start_addr;
+ uint8_t reserved0[77];
uint8_t ssid;
uint16_t devno;
uint8_t vm_flags;
@@ -100,12 +101,14 @@ struct S390IPLState {
IplParameterBlock iplb;
bool iplb_valid;
bool reipl_requested;
+ bool netboot;
/*< public >*/
char *kernel;
char *initrd;
char *cmdline;
char *firmware;
+ char *netboot_fw;
uint8_t cssid;
uint8_t ssid;
uint16_t devno;
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 4f0d62b..40914fd 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -116,7 +116,8 @@ static void ccw_init(MachineState *machine)
/* get a BUS */
css_bus = virtual_css_bus_init();
s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline,
- machine->initrd_filename, "s390-ccw.img", true);
+ machine->initrd_filename, "s390-ccw.img",
+ "s390-netboot.img", true);
s390_flic_init();
dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c
index 9cfb090..afa4148 100644
--- a/hw/s390x/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -65,6 +65,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
const char *kernel_cmdline,
const char *initrd_filename,
const char *firmware,
+ const char *netboot_fw,
bool enforce_bios)
{
Object *new = object_new(TYPE_S390_IPL);
@@ -78,6 +79,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
}
qdev_prop_set_string(dev, "cmdline", kernel_cmdline);
qdev_prop_set_string(dev, "firmware", firmware);
+ qdev_prop_set_string(dev, "netboot_fw", netboot_fw);
qdev_prop_set_bit(dev, "enforce_bios", enforce_bios);
object_property_add_child(qdev_get_machine(), TYPE_S390_IPL,
new, NULL);
diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h
index f588b80..f2377a3 100644
--- a/hw/s390x/s390-virtio.h
+++ b/hw/s390x/s390-virtio.h
@@ -24,6 +24,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
const char *kernel_cmdline,
const char *initrd_filename,
const char *firmware,
+ const char *netboot_fw,
bool enforce_bios);
void s390_create_virtio_net(BusState *bus, const char *name);
void s390_nmi(NMIState *n, int cpu_index, Error **errp);
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index bbfb5dc..a53f058 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2240,7 +2240,7 @@ static void scsi_disk_resize_cb(void *opaque)
}
}
-static void scsi_cd_change_media_cb(void *opaque, bool load)
+static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp)
{
SCSIDiskState *s = opaque;
@@ -2328,7 +2328,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
return;
}
}
- blkconf_apply_backend_options(&dev->conf);
+ blkconf_apply_backend_options(&dev->conf,
+ blk_is_read_only(s->qdev.conf.blk),
+ dev->type == TYPE_DISK, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
if (s->qdev.conf.discard_granularity == -1) {
s->qdev.conf.discard_granularity =
@@ -2380,7 +2386,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
if (!dev->conf.blk) {
- dev->conf.blk = blk_new();
+ dev->conf.blk = blk_new(0, BLK_PERM_ALL);
}
s->qdev.blocksize = 2048;
diff --git a/hw/sd/core.c b/hw/sd/core.c
index 14c2bdf..295dc44 100644
--- a/hw/sd/core.c
+++ b/hw/sd/core.c
@@ -131,6 +131,33 @@ void sdbus_set_readonly(SDBus *sdbus, bool readonly)
}
}
+void sdbus_reparent_card(SDBus *from, SDBus *to)
+{
+ SDState *card = get_card(from);
+ SDCardClass *sc;
+ bool readonly;
+
+ /* We directly reparent the card object rather than implementing this
+ * as a hotpluggable connection because we don't want to expose SD cards
+ * to users as being hotpluggable, and we can get away with it in this
+ * limited use case. This could perhaps be implemented more cleanly in
+ * future by adding support to the hotplug infrastructure for "device
+ * can be hotplugged only via code, not by user".
+ */
+
+ if (!card) {
+ return;
+ }
+
+ sc = SD_CARD_GET_CLASS(card);
+ readonly = sc->get_readonly(card);
+
+ sdbus_set_inserted(from, false);
+ qdev_set_parent_bus(DEVICE(card), &to->qbus);
+ sdbus_set_inserted(to, true);
+ sdbus_set_readonly(to, readonly);
+}
+
static const TypeInfo sd_bus_info = {
.name = TYPE_SD_BUS,
.parent = TYPE_BUS,
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 8e88e83..ba47bff 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -458,7 +458,7 @@ static bool sd_get_readonly(SDState *sd)
return sd->wp_switch;
}
-static void sd_cardchange(void *opaque, bool load)
+static void sd_cardchange(void *opaque, bool load, Error **errp)
{
SDState *sd = opaque;
DeviceState *dev = DEVICE(sd);
@@ -1887,6 +1887,7 @@ static void sd_instance_finalize(Object *obj)
static void sd_realize(DeviceState *dev, Error **errp)
{
SDState *sd = SD_CARD(dev);
+ int ret;
if (sd->blk && blk_is_read_only(sd->blk)) {
error_setg(errp, "Cannot use read-only drive as SD card");
@@ -1894,6 +1895,11 @@ static void sd_realize(DeviceState *dev, Error **errp)
}
if (sd->blk) {
+ ret = blk_set_perm(sd->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+ BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
blk_set_dev_ops(sd->blk, &sd_block_ops, sd);
}
}
diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs
index fc99668..dd6f27e 100644
--- a/hw/timer/Makefile.objs
+++ b/hw/timer/Makefile.objs
@@ -1,5 +1,6 @@
common-obj-$(CONFIG_ARM_TIMER) += arm_timer.o
common-obj-$(CONFIG_ARM_MPTIMER) += arm_mptimer.o
+common-obj-$(CONFIG_ARM_V7M) += armv7m_systick.o
common-obj-$(CONFIG_A9_GTIMER) += a9gtimer.o
common-obj-$(CONFIG_CADENCE) += cadence_ttc.o
common-obj-$(CONFIG_DS1338) += ds1338.o
diff --git a/hw/timer/armv7m_systick.c b/hw/timer/armv7m_systick.c
new file mode 100644
index 0000000..df8d280
--- /dev/null
+++ b/hw/timer/armv7m_systick.c
@@ -0,0 +1,240 @@
+/*
+ * ARMv7M SysTick timer
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Written by Paul Brook
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell
+ *
+ * This code is licensed under the GPL (version 2 or later).
+ */
+
+#include "qemu/osdep.h"
+#include "hw/timer/armv7m_systick.h"
+#include "qemu-common.h"
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qemu/log.h"
+#include "trace.h"
+
+/* qemu timers run at 1GHz. We want something closer to 1MHz. */
+#define SYSTICK_SCALE 1000ULL
+
+#define SYSTICK_ENABLE (1 << 0)
+#define SYSTICK_TICKINT (1 << 1)
+#define SYSTICK_CLKSOURCE (1 << 2)
+#define SYSTICK_COUNTFLAG (1 << 16)
+
+int system_clock_scale;
+
+/* Conversion factor from qemu timer to SysTick frequencies. */
+static inline int64_t systick_scale(SysTickState *s)
+{
+ if (s->control & SYSTICK_CLKSOURCE) {
+ return system_clock_scale;
+ } else {
+ return 1000;
+ }
+}
+
+static void systick_reload(SysTickState *s, int reset)
+{
+ /* The Cortex-M3 Devices Generic User Guide says that "When the
+ * ENABLE bit is set to 1, the counter loads the RELOAD value from the
+ * SYST RVR register and then counts down". So, we need to check the
+ * ENABLE bit before reloading the value.
+ */
+ trace_systick_reload();
+
+ if ((s->control & SYSTICK_ENABLE) == 0) {
+ return;
+ }
+
+ if (reset) {
+ s->tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ }
+ s->tick += (s->reload + 1) * systick_scale(s);
+ timer_mod(s->timer, s->tick);
+}
+
+static void systick_timer_tick(void *opaque)
+{
+ SysTickState *s = (SysTickState *)opaque;
+
+ trace_systick_timer_tick();
+
+ s->control |= SYSTICK_COUNTFLAG;
+ if (s->control & SYSTICK_TICKINT) {
+ /* Tell the NVIC to pend the SysTick exception */
+ qemu_irq_pulse(s->irq);
+ }
+ if (s->reload == 0) {
+ s->control &= ~SYSTICK_ENABLE;
+ } else {
+ systick_reload(s, 0);
+ }
+}
+
+static uint64_t systick_read(void *opaque, hwaddr addr, unsigned size)
+{
+ SysTickState *s = opaque;
+ uint32_t val;
+
+ switch (addr) {
+ case 0x0: /* SysTick Control and Status. */
+ val = s->control;
+ s->control &= ~SYSTICK_COUNTFLAG;
+ break;
+ case 0x4: /* SysTick Reload Value. */
+ val = s->reload;
+ break;
+ case 0x8: /* SysTick Current Value. */
+ {
+ int64_t t;
+
+ if ((s->control & SYSTICK_ENABLE) == 0) {
+ val = 0;
+ break;
+ }
+ t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ if (t >= s->tick) {
+ val = 0;
+ break;
+ }
+ val = ((s->tick - (t + 1)) / systick_scale(s)) + 1;
+ /* The interrupt in triggered when the timer reaches zero.
+ However the counter is not reloaded until the next clock
+ tick. This is a hack to return zero during the first tick. */
+ if (val > s->reload) {
+ val = 0;
+ }
+ break;
+ }
+ case 0xc: /* SysTick Calibration Value. */
+ val = 10000;
+ break;
+ default:
+ val = 0;
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "SysTick: Bad read offset 0x%" HWADDR_PRIx "\n", addr);
+ break;
+ }
+
+ trace_systick_read(addr, val, size);
+ return val;
+}
+
+static void systick_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
+{
+ SysTickState *s = opaque;
+
+ trace_systick_write(addr, value, size);
+
+ switch (addr) {
+ case 0x0: /* SysTick Control and Status. */
+ {
+ uint32_t oldval = s->control;
+
+ s->control &= 0xfffffff8;
+ s->control |= value & 7;
+ if ((oldval ^ value) & SYSTICK_ENABLE) {
+ int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ if (value & SYSTICK_ENABLE) {
+ if (s->tick) {
+ s->tick += now;
+ timer_mod(s->timer, s->tick);
+ } else {
+ systick_reload(s, 1);
+ }
+ } else {
+ timer_del(s->timer);
+ s->tick -= now;
+ if (s->tick < 0) {
+ s->tick = 0;
+ }
+ }
+ } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) {
+ /* This is a hack. Force the timer to be reloaded
+ when the reference clock is changed. */
+ systick_reload(s, 1);
+ }
+ break;
+ }
+ case 0x4: /* SysTick Reload Value. */
+ s->reload = value;
+ break;
+ case 0x8: /* SysTick Current Value. Writes reload the timer. */
+ systick_reload(s, 1);
+ s->control &= ~SYSTICK_COUNTFLAG;
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "SysTick: Bad write offset 0x%" HWADDR_PRIx "\n", addr);
+ }
+}
+
+static const MemoryRegionOps systick_ops = {
+ .read = systick_read,
+ .write = systick_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .valid.min_access_size = 4,
+ .valid.max_access_size = 4,
+};
+
+static void systick_reset(DeviceState *dev)
+{
+ SysTickState *s = SYSTICK(dev);
+
+ s->control = 0;
+ s->reload = 0;
+ s->tick = 0;
+ timer_del(s->timer);
+}
+
+static void systick_instance_init(Object *obj)
+{
+ SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+ SysTickState *s = SYSTICK(obj);
+
+ memory_region_init_io(&s->iomem, obj, &systick_ops, s, "systick", 0xe0);
+ sysbus_init_mmio(sbd, &s->iomem);
+ sysbus_init_irq(sbd, &s->irq);
+ s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s);
+}
+
+static const VMStateDescription vmstate_systick = {
+ .name = "armv7m_systick",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(control, SysTickState),
+ VMSTATE_UINT32(reload, SysTickState),
+ VMSTATE_INT64(tick, SysTickState),
+ VMSTATE_TIMER_PTR(timer, SysTickState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void systick_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->vmsd = &vmstate_systick;
+ dc->reset = systick_reset;
+}
+
+static const TypeInfo armv7m_systick_info = {
+ .name = TYPE_SYSTICK,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_init = systick_instance_init,
+ .instance_size = sizeof(SysTickState),
+ .class_init = systick_class_init,
+};
+
+static void armv7m_systick_register_types(void)
+{
+ type_register_static(&armv7m_systick_info);
+}
+
+type_init(armv7m_systick_register_types)
diff --git a/hw/timer/trace-events b/hw/timer/trace-events
index 3495c41..d17cfe6 100644
--- a/hw/timer/trace-events
+++ b/hw/timer/trace-events
@@ -49,3 +49,9 @@ aspeed_timer_ctrl_pulse_enable(uint8_t i, bool enable) "Timer %" PRIu8 ": %d"
aspeed_timer_set_ctrl2(uint32_t value) "Value: 0x%" PRIx32
aspeed_timer_set_value(int timer, int reg, uint32_t value) "Timer %d register %d: 0x%" PRIx32
aspeed_timer_read(uint64_t offset, unsigned size, uint64_t value) "From 0x%" PRIx64 ": of size %u: 0x%" PRIx64
+
+# hw/timer/armv7m_systick.c
+systick_reload(void) "systick reload"
+systick_timer_tick(void) "systick reload"
+systick_read(uint64_t addr, uint32_t value, unsigned size) "systick read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
+systick_write(uint64_t addr, uint32_t value, unsigned size) "systick write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index efe4b8e..24f1608 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -8,7 +8,6 @@
#include "monitor/monitor.h"
#include "trace.h"
#include "qemu/cutils.h"
-#include "migration/migration.h"
static void usb_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent);
@@ -688,8 +687,6 @@ USBDevice *usbdevice_create(const char *cmdline)
const char *params;
int len;
USBDevice *dev;
- ObjectClass *klass;
- DeviceClass *dc;
params = strchr(cmdline,':');
if (params) {
@@ -724,22 +721,6 @@ USBDevice *usbdevice_create(const char *cmdline)
return NULL;
}
- klass = object_class_by_name(f->name);
- if (klass == NULL) {
- error_report("Device '%s' not found", f->name);
- return NULL;
- }
-
- dc = DEVICE_CLASS(klass);
-
- if (only_migratable) {
- if (dc->vmsd->unmigratable) {
- error_report("Device %s is not migratable, but --only-migratable "
- "was specified", f->name);
- return NULL;
- }
- }
-
if (f->usbdevice_init) {
dev = f->usbdevice_init(bus, params);
} else {
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index c607f76..8a61ec9 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -589,6 +589,13 @@ static const struct SCSIBusInfo usb_msd_scsi_info_bot = {
.load_request = usb_msd_load_request,
};
+static void usb_msd_unrealize_storage(USBDevice *dev, Error **errp)
+{
+ MSDState *s = USB_STORAGE_DEV(dev);
+
+ object_unref(OBJECT(&s->bus));
+}
+
static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
{
MSDState *s = USB_STORAGE_DEV(dev);
@@ -603,7 +610,11 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
blkconf_serial(&s->conf, &dev->serial);
blkconf_blocksizes(&s->conf);
- blkconf_apply_backend_options(&s->conf);
+ blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
/*
* Hack alert: this pretends to be a block device, but it's really
@@ -635,6 +646,13 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
s->scsi_dev = scsi_dev;
}
+static void usb_msd_unrealize_bot(USBDevice *dev, Error **errp)
+{
+ MSDState *s = USB_STORAGE_DEV(dev);
+
+ object_unref(OBJECT(&s->bus));
+}
+
static void usb_msd_realize_bot(USBDevice *dev, Error **errp)
{
MSDState *s = USB_STORAGE_DEV(dev);
@@ -755,6 +773,7 @@ static void usb_msd_class_initfn_storage(ObjectClass *klass, void *data)
USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
uc->realize = usb_msd_realize_storage;
+ uc->unrealize = usb_msd_unrealize_storage;
dc->props = msd_properties;
}
@@ -817,6 +836,7 @@ static void usb_msd_class_initfn_bot(ObjectClass *klass, void *data)
USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
uc->realize = usb_msd_realize_bot;
+ uc->unrealize = usb_msd_unrealize_bot;
uc->attached_settable = true;
}
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index 3b26655..fffc424 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -896,6 +896,8 @@ static void usb_uas_unrealize(USBDevice *dev, Error **errp)
UASDevice *uas = USB_UAS(dev);
qemu_bh_delete(uas->status_bh);
+
+ object_unref(OBJECT(&uas->bus));
}
static void usb_uas_realize(USBDevice *dev, Error **errp)
diff --git a/include/block/block.h b/include/block/block.h
index bde5ebd..c7c4a3a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -82,6 +82,7 @@ typedef struct HDGeometry {
} HDGeometry;
#define BDRV_O_RDWR 0x0002
+#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
@@ -187,6 +188,42 @@ typedef enum BlockOpType {
BLOCK_OP_TYPE_MAX,
} BlockOpType;
+/* Block node permission constants */
+enum {
+ /**
+ * A user that has the "permission" of consistent reads is guaranteed that
+ * their view of the contents of the block device is complete and
+ * self-consistent, representing the contents of a disk at a specific
+ * point.
+ *
+ * For most block devices (including their backing files) this is true, but
+ * the property cannot be maintained in a few situations like for
+ * intermediate nodes of a commit block job.
+ */
+ BLK_PERM_CONSISTENT_READ = 0x01,
+
+ /** This permission is required to change the visible disk contents. */
+ BLK_PERM_WRITE = 0x02,
+
+ /**
+ * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
+ * required for writes to the block node when the caller promises that
+ * the visible disk content doesn't change.
+ */
+ BLK_PERM_WRITE_UNCHANGED = 0x04,
+
+ /** This permission is required to change the size of a block node. */
+ BLK_PERM_RESIZE = 0x08,
+
+ /**
+ * This permission is required to change the node that this BdrvChild
+ * points to.
+ */
+ BLK_PERM_GRAPH_MOD = 0x10,
+
+ BLK_PERM_ALL = 0x1f,
+};
+
/* disk I/O throttling */
void bdrv_init(void);
void bdrv_init_with_whitelist(void);
@@ -199,7 +236,8 @@ int bdrv_create(BlockDriver *drv, const char* filename,
QemuOpts *opts, Error **errp);
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
BlockDriverState *bdrv_new(void);
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+ Error **errp);
void bdrv_replace_in_backing_chain(BlockDriverState *old,
BlockDriverState *new);
@@ -210,7 +248,8 @@ BdrvChild *bdrv_open_child(const char *filename,
BlockDriverState* parent,
const BdrvChildRole *child_role,
bool allow_none, Error **errp);
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
const char *bdref_key, Error **errp);
BlockDriverState *bdrv_open(const char *filename, const char *reference,
@@ -484,7 +523,8 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
- const BdrvChildRole *child_role);
+ const BdrvChildRole *child_role,
+ Error **errp);
bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1670941..a57c0bf 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -320,6 +320,59 @@ struct BlockDriver {
void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
Error **errp);
+ /**
+ * Informs the block driver that a permission change is intended. The
+ * driver checks whether the change is permissible and may take other
+ * preparations for the change (e.g. get file system locks). This operation
+ * is always followed either by a call to either .bdrv_set_perm or
+ * .bdrv_abort_perm_update.
+ *
+ * Checks whether the requested set of cumulative permissions in @perm
+ * can be granted for accessing @bs and whether no other users are using
+ * permissions other than those given in @shared (both arguments take
+ * BLK_PERM_* bitmasks).
+ *
+ * If both conditions are met, 0 is returned. Otherwise, -errno is returned
+ * and errp is set to an error describing the conflict.
+ */
+ int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
+ uint64_t shared, Error **errp);
+
+ /**
+ * Called to inform the driver that the set of cumulative set of used
+ * permissions for @bs has changed to @perm, and the set of sharable
+ * permission to @shared. The driver can use this to propagate changes to
+ * its children (i.e. request permissions only if a parent actually needs
+ * them).
+ *
+ * This function is only invoked after bdrv_check_perm(), so block drivers
+ * may rely on preparations made in their .bdrv_check_perm implementation.
+ */
+ void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
+
+ /*
+ * Called to inform the driver that after a previous bdrv_check_perm()
+ * call, the permission update is not performed and any preparations made
+ * for it (e.g. taken file locks) need to be undone.
+ *
+ * This function can be called even for nodes that never saw a
+ * bdrv_check_perm() call. It is a no-op then.
+ */
+ void (*bdrv_abort_perm_update)(BlockDriverState *bs);
+
+ /**
+ * Returns in @nperm and @nshared the permissions that the driver for @bs
+ * needs on its child @c, based on the cumulative permissions requested by
+ * the parents in @parent_perm and @parent_shared.
+ *
+ * If @c is NULL, return the permissions for attaching a new child for the
+ * given @role.
+ */
+ void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t parent_perm, uint64_t parent_shared,
+ uint64_t *nperm, uint64_t *nshared);
+
QLIST_ENTRY(BlockDriver) list;
};
@@ -388,6 +441,10 @@ typedef struct BdrvAioNotifier {
} BdrvAioNotifier;
struct BdrvChildRole {
+ /* If true, bdrv_replace_in_backing_chain() doesn't change the node this
+ * BdrvChild points to. */
+ bool stay_at_node;
+
void (*inherit_options)(int *child_flags, QDict *child_options,
int parent_flags, QDict *parent_options);
@@ -399,6 +456,12 @@ struct BdrvChildRole {
* name), or NULL if the parent can't provide a better name. */
const char* (*get_name)(BdrvChild *child);
+ /* Returns a malloced string that describes the parent of the child for a
+ * human reader. This could be a node-name, BlockBackend name, qdev ID or
+ * QOM path of the device owning the BlockBackend, job type and ID etc. The
+ * caller is responsible for freeing the memory. */
+ char* (*get_parent_desc)(BdrvChild *child);
+
/*
* If this pair of functions is implemented, the parent doesn't issue new
* requests after returning from .drained_begin() until .drained_end() is
@@ -409,16 +472,32 @@ struct BdrvChildRole {
*/
void (*drained_begin)(BdrvChild *child);
void (*drained_end)(BdrvChild *child);
+
+ void (*attach)(BdrvChild *child);
+ void (*detach)(BdrvChild *child);
};
extern const BdrvChildRole child_file;
extern const BdrvChildRole child_format;
+extern const BdrvChildRole child_backing;
struct BdrvChild {
BlockDriverState *bs;
char *name;
const BdrvChildRole *role;
void *opaque;
+
+ /**
+ * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
+ */
+ uint64_t perm;
+
+ /**
+ * Permissions that can still be granted to other users of @bs while this
+ * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
+ */
+ uint64_t shared_perm;
+
QLIST_ENTRY(BdrvChild) next;
QLIST_ENTRY(BdrvChild) next_parent;
};
@@ -701,13 +780,16 @@ void stream_start(const char *job_id, BlockDriverState *bs,
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @on_error: The action to take upon error.
* @backing_file_str: String to use as the backing file in @top's overlay
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @top. NULL means
+ * that a node name should be autogenerated.
* @errp: Error object.
*
*/
void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
- Error **errp);
+ const char *filter_node_name, Error **errp);
/**
* commit_active_start:
* @job_id: The id of the newly-created job, or %NULL to use the
@@ -718,6 +800,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
* See @BlockJobCreateFlags
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
* @cb: Completion function for the job.
* @opaque: Opaque pointer value passed to @cb.
* @errp: Error object.
@@ -727,8 +812,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
void commit_active_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, int creation_flags,
int64_t speed, BlockdevOnError on_error,
- BlockCompletionFunc *cb,
- void *opaque, Error **errp, bool auto_complete);
+ const char *filter_node_name,
+ BlockCompletionFunc *cb, void *opaque, Error **errp,
+ bool auto_complete);
/*
* mirror_start:
* @job_id: The id of the newly-created job, or %NULL to use the
@@ -745,6 +831,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
* @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target.
* @unmap: Whether to unmap target where source sectors only contain zeroes.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the mirror job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
* @errp: Error object.
*
* Start a mirroring operation on @bs. Clusters that are allocated
@@ -758,7 +847,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
- bool unmap, Error **errp);
+ bool unmap, const char *filter_node_name, Error **errp);
/*
* backup_job_create:
@@ -796,11 +885,36 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr);
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role,
- void *opaque);
+ uint64_t perm, uint64_t shared_perm,
+ void *opaque, Error **errp);
void bdrv_root_unref_child(BdrvChild *child);
+int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+ Error **errp);
+void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
+void bdrv_child_abort_perm_update(BdrvChild *c);
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+ Error **errp);
+
+/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
+ * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to
+ * all children */
+void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared);
+
+/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
+ * (non-raw) image formats: Like above for bs->backing, but for bs->file it
+ * requires WRITE | RESIZE for read-write images, always requires
+ * CONSISTENT_READ and doesn't share WRITE. */
+void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared);
+
const char *bdrv_get_parent_name(const BlockDriverState *bs);
-void blk_dev_change_media_cb(BlockBackend *blk, bool load);
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
bool blk_dev_has_removable_media(BlockBackend *blk);
bool blk_dev_has_tray(BlockBackend *blk);
void blk_dev_eject_request(BlockBackend *blk, bool force);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 1acb256..9e906f7 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -169,13 +169,25 @@ BlockJob *block_job_get(const char *id);
/**
* block_job_add_bdrv:
* @job: A block job
+ * @name: The name to assign to the new BdrvChild
* @bs: A BlockDriverState that is involved in @job
+ * @perm, @shared_perm: Permissions to request on the node
*
* Add @bs to the list of BlockDriverState that are involved in
* @job. This means that all operations will be blocked on @bs while
* @job exists.
*/
-void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs);
+int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
+ uint64_t perm, uint64_t shared_perm, Error **errp);
+
+/**
+ * block_job_remove_all_bdrv:
+ * @job: The block job
+ *
+ * Remove all BlockDriverStates from the list of nodes that are involved in the
+ * job. This removes the blockers added with block_job_add_bdrv().
+ */
+void block_job_remove_all_bdrv(BlockJob *job);
/**
* block_job_set_speed:
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 8223822..3f86cc5 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -119,6 +119,7 @@ struct BlockJobDriver {
* generated automatically.
* @job_type: The class object for the newly-created job.
* @bs: The block
+ * @perm, @shared_perm: Permissions to request for @bs
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @cb: Completion function for the job.
* @opaque: Opaque pointer value passed to @cb.
@@ -134,7 +135,8 @@ struct BlockJobDriver {
* called from a wrapper that is specific to the job type.
*/
void *block_job_create(const char *job_id, const BlockJobDriver *driver,
- BlockDriverState *bs, int64_t speed, int flags,
+ BlockDriverState *bs, uint64_t perm,
+ uint64_t shared_perm, int64_t speed, int flags,
BlockCompletionFunc *cb, void *opaque, Error **errp);
/**
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index bd15853..8c305aa 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -64,6 +64,7 @@ void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
void qemu_ram_unset_idstr(RAMBlock *block);
const char *qemu_ram_get_idstr(RAMBlock *rb);
size_t qemu_ram_pagesize(RAMBlock *block);
+size_t qemu_ram_pagesize_largest(void);
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
int len, int is_write);
@@ -105,6 +106,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
ram_addr_t offset, ram_addr_t length, void *opaque);
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
#endif
diff --git a/include/glib-compat.h b/include/glib-compat.h
index 0cd24ff..863c8cf 100644
--- a/include/glib-compat.h
+++ b/include/glib-compat.h
@@ -328,4 +328,25 @@ static inline void g_source_set_name_by_id(guint tag, const char *name)
#define g_test_subprocess() (0)
#endif
+
+#if !GLIB_CHECK_VERSION(2, 34, 0)
+static inline void
+g_test_add_data_func_full(const char *path,
+ gpointer data,
+ gpointer fn,
+ gpointer data_free_func)
+{
+#if GLIB_CHECK_VERSION(2, 26, 0)
+ /* back-compat casts, remove this once we can require new-enough glib */
+ g_test_add_vtable(path, 0, data, NULL,
+ (GTestFixtureFunc)fn, (GTestFixtureFunc) data_free_func);
+#else
+ /* back-compat casts, remove this once we can require new-enough glib */
+ g_test_add_vtable(path, 0, data, NULL,
+ (void (*)(void)) fn, (void (*)(void)) data_free_func);
+#endif
+}
+#endif
+
+
#endif
diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
index c175c0e..a3f79d3 100644
--- a/include/hw/arm/arm.h
+++ b/include/hw/arm/arm.h
@@ -26,6 +26,18 @@ typedef enum {
/* armv7m.c */
DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
const char *kernel_filename, const char *cpu_model);
+/**
+ * armv7m_load_kernel:
+ * @cpu: CPU
+ * @kernel_filename: file to load
+ * @mem_size: mem_size: maximum image size to load
+ *
+ * Load the guest image for an ARMv7M system. This must be called by
+ * any ARMv7M board, either directly or via armv7m_init(). (This is
+ * necessary to ensure that the CPU resets correctly on system reset,
+ * as well as for kernel loading.)
+ */
+void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size);
/*
* struct used as a parameter of the arm_load_kernel machine init
diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h
new file mode 100644
index 0000000..a9b3f2a
--- /dev/null
+++ b/include/hw/arm/armv7m.h
@@ -0,0 +1,63 @@
+/*
+ * ARMv7M CPU object
+ *
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This code is licensed under the GPL version 2 or later.
+ */
+
+#ifndef HW_ARM_ARMV7M_H
+#define HW_ARM_ARMV7M_H
+
+#include "hw/sysbus.h"
+#include "hw/arm/armv7m_nvic.h"
+
+#define TYPE_BITBAND "ARM,bitband-memory"
+#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND)
+
+typedef struct {
+ /*< private >*/
+ SysBusDevice parent_obj;
+ /*< public >*/
+
+ AddressSpace *source_as;
+ MemoryRegion iomem;
+ uint32_t base;
+ MemoryRegion *source_memory;
+} BitBandState;
+
+#define TYPE_ARMV7M "armv7m"
+#define ARMV7M(obj) OBJECT_CHECK(ARMv7MState, (obj), TYPE_ARMV7M)
+
+#define ARMV7M_NUM_BITBANDS 2
+
+/* ARMv7M container object.
+ * + Unnamed GPIO input lines: external IRQ lines for the NVIC
+ * + Named GPIO output SYSRESETREQ: signalled for guest AIRCR.SYSRESETREQ
+ * + Property "cpu-model": CPU model to instantiate
+ * + Property "num-irq": number of external IRQ lines
+ * + Property "memory": MemoryRegion defining the physical address space
+ * that CPU accesses see. (The NVIC, bitbanding and other CPU-internal
+ * devices will be automatically layered on top of this view.)
+ */
+typedef struct ARMv7MState {
+ /*< private >*/
+ SysBusDevice parent_obj;
+ /*< public >*/
+ NVICState nvic;
+ BitBandState bitband[ARMV7M_NUM_BITBANDS];
+ ARMCPU *cpu;
+
+ /* MemoryRegion we pass to the CPU, with our devices layered on
+ * top of the ones the board provides in board_memory.
+ */
+ MemoryRegion container;
+
+ /* Properties */
+ char *cpu_model;
+ /* MemoryRegion the board provides to us (with its devices, RAM, etc) */
+ MemoryRegion *board_memory;
+} ARMv7MState;
+
+#endif
diff --git a/include/hw/arm/armv7m_nvic.h b/include/hw/arm/armv7m_nvic.h
new file mode 100644
index 0000000..1d145fb
--- /dev/null
+++ b/include/hw/arm/armv7m_nvic.h
@@ -0,0 +1,62 @@
+/*
+ * ARMv7M NVIC object
+ *
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This code is licensed under the GPL version 2 or later.
+ */
+
+#ifndef HW_ARM_ARMV7M_NVIC_H
+#define HW_ARM_ARMV7M_NVIC_H
+
+#include "target/arm/cpu.h"
+#include "hw/sysbus.h"
+#include "hw/timer/armv7m_systick.h"
+
+#define TYPE_NVIC "armv7m_nvic"
+
+#define NVIC(obj) \
+ OBJECT_CHECK(NVICState, (obj), TYPE_NVIC)
+
+/* Highest permitted number of exceptions (architectural limit) */
+#define NVIC_MAX_VECTORS 512
+
+typedef struct VecInfo {
+ /* Exception priorities can range from -3 to 255; only the unmodifiable
+ * priority values for RESET, NMI and HardFault can be negative.
+ */
+ int16_t prio;
+ uint8_t enabled;
+ uint8_t pending;
+ uint8_t active;
+ uint8_t level; /* exceptions <=15 never set level */
+} VecInfo;
+
+typedef struct NVICState {
+ /*< private >*/
+ SysBusDevice parent_obj;
+ /*< public >*/
+
+ ARMCPU *cpu;
+
+ VecInfo vectors[NVIC_MAX_VECTORS];
+ uint32_t prigroup;
+
+ /* vectpending and exception_prio are both cached state that can
+ * be recalculated from the vectors[] array and the prigroup field.
+ */
+ unsigned int vectpending; /* highest prio pending enabled exception */
+ int exception_prio; /* group prio of the highest prio active exception */
+
+ MemoryRegion sysregmem;
+ MemoryRegion container;
+
+ uint32_t num_irq;
+ qemu_irq excpout;
+ qemu_irq sysresetreq;
+
+ SysTickState systick;
+} NVICState;
+
+#endif
diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h
index 31241c7..122b286 100644
--- a/include/hw/arm/bcm2835_peripherals.h
+++ b/include/hw/arm/bcm2835_peripherals.h
@@ -22,6 +22,8 @@
#include "hw/misc/bcm2835_rng.h"
#include "hw/misc/bcm2835_mbox.h"
#include "hw/sd/sdhci.h"
+#include "hw/sd/bcm2835_sdhost.h"
+#include "hw/gpio/bcm2835_gpio.h"
#define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals"
#define BCM2835_PERIPHERALS(obj) \
@@ -45,6 +47,8 @@ typedef struct BCM2835PeripheralState {
BCM2835RngState rng;
BCM2835MboxState mboxes;
SDHCIState sdhci;
+ BCM2835SDHostState sdhost;
+ BCM2835GpioState gpio;
} BCM2835PeripheralState;
#endif /* BCM2835_PERIPHERALS_H */
diff --git a/include/hw/arm/stm32f205_soc.h b/include/hw/arm/stm32f205_soc.h
index 1332141..e2dce11 100644
--- a/include/hw/arm/stm32f205_soc.h
+++ b/include/hw/arm/stm32f205_soc.h
@@ -31,6 +31,7 @@
#include "hw/adc/stm32f2xx_adc.h"
#include "hw/or-irq.h"
#include "hw/ssi/stm32f2xx_spi.h"
+#include "hw/arm/armv7m.h"
#define TYPE_STM32F205_SOC "stm32f205-soc"
#define STM32F205_SOC(obj) \
@@ -51,9 +52,10 @@ typedef struct STM32F205State {
SysBusDevice parent_obj;
/*< public >*/
- char *kernel_filename;
char *cpu_model;
+ ARMv7MState armv7m;
+
STM32F2XXSyscfgState syscfg;
STM32F2XXUsartState usart[STM_NUM_USARTS];
STM32F2XXTimerState timer[STM_NUM_TIMERS];
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index df9d207..f3f6e8e 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -26,6 +26,7 @@ typedef struct BlockConf {
/* geometry, not all devices use this */
uint32_t cyls, heads, secs;
OnOffAuto wce;
+ bool share_rw;
BlockdevOnError rerror;
BlockdevOnError werror;
} BlockConf;
@@ -53,7 +54,9 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \
DEFINE_PROP_UINT32("discard_granularity", _state, \
_conf.discard_granularity, -1), \
- DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, ON_OFF_AUTO_AUTO)
+ DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \
+ ON_OFF_AUTO_AUTO), \
+ DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false)
#define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf) \
DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0), \
@@ -73,7 +76,8 @@ void blkconf_geometry(BlockConf *conf, int *trans,
unsigned cyls_max, unsigned heads_max, unsigned secs_max,
Error **errp);
void blkconf_blocksizes(BlockConf *conf);
-void blkconf_apply_backend_options(BlockConf *conf);
+void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+ bool resizable, Error **errp);
/* Hard disk geometry */
diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index 25659b9..a172a60 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -264,7 +264,7 @@ static int glue(load_elf, SZ)(const char *name, int fd,
int must_swab, uint64_t *pentry,
uint64_t *lowaddr, uint64_t *highaddr,
int elf_machine, int clear_lsb, int data_swab,
- AddressSpace *as)
+ AddressSpace *as, bool load_rom)
{
struct elfhdr ehdr;
struct elf_phdr *phdr = NULL, *ph;
@@ -403,10 +403,15 @@ static int glue(load_elf, SZ)(const char *name, int fd,
*pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr;
}
- snprintf(label, sizeof(label), "phdr #%d: %s", i, name);
+ if (load_rom) {
+ snprintf(label, sizeof(label), "phdr #%d: %s", i, name);
- /* rom_add_elf_program() seize the ownership of 'data' */
- rom_add_elf_program(label, data, file_size, mem_size, addr, as);
+ /* rom_add_elf_program() seize the ownership of 'data' */
+ rom_add_elf_program(label, data, file_size, mem_size, addr, as);
+ } else {
+ cpu_physical_memory_write(addr, data, file_size);
+ g_free(data);
+ }
total_size += mem_size;
if (addr < low)
diff --git a/include/hw/gpio/bcm2835_gpio.h b/include/hw/gpio/bcm2835_gpio.h
new file mode 100644
index 0000000..9f8e0c7
--- /dev/null
+++ b/include/hw/gpio/bcm2835_gpio.h
@@ -0,0 +1,39 @@
+/*
+ * Raspberry Pi (BCM2835) GPIO Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ * Clement Deschamps <clement.deschamps@antfield.fr>
+ * Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_GPIO_H
+#define BCM2835_GPIO_H
+
+#include "hw/sd/sd.h"
+
+typedef struct BCM2835GpioState {
+ SysBusDevice parent_obj;
+
+ MemoryRegion iomem;
+
+ /* SDBus selector */
+ SDBus sdbus;
+ SDBus *sdbus_sdhci;
+ SDBus *sdbus_sdhost;
+
+ uint8_t fsel[54];
+ uint32_t lev0, lev1;
+ uint8_t sd_fsel;
+ qemu_irq out[54];
+} BCM2835GpioState;
+
+#define TYPE_BCM2835_GPIO "bcm2835_gpio"
+#define BCM2835_GPIO(obj) \
+ OBJECT_CHECK(BCM2835GpioState, (obj), TYPE_BCM2835_GPIO)
+
+#endif
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index 4156051..bccdfe1 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -172,6 +172,7 @@ struct GICv3CPUState {
uint8_t gicr_ipriorityr[GIC_INTERNAL];
/* CPU interface */
+ uint64_t icc_sre_el1;
uint64_t icc_ctlr_el1[2];
uint64_t icc_pmr_el1;
uint64_t icc_bpr[3];
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 40c4153..490c9ff 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -65,7 +65,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
#define ELF_LOAD_WRONG_ENDIAN -4
const char *load_elf_strerror(int error);
-/** load_elf_as:
+/** load_elf_ram:
* @filename: Path of ELF file
* @translate_fn: optional function to translate load addresses
* @translate_opaque: opaque data passed to @translate_fn
@@ -81,6 +81,7 @@ const char *load_elf_strerror(int error);
* words and 3 for within doublewords.
* @as: The AddressSpace to load the ELF to. The value of address_space_memory
* is used if nothing is supplied here.
+ * @load_rom : Load ELF binary as ROM
*
* Load an ELF file's contents to the emulated system's address space.
* Clients may optionally specify a callback to perform address
@@ -93,6 +94,16 @@ const char *load_elf_strerror(int error);
* If @elf_machine is EM_NONE then the machine type will be read from the
* ELF header and no checks will be carried out against the machine type.
*/
+int load_elf_ram(const char *filename,
+ uint64_t (*translate_fn)(void *, uint64_t),
+ void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+ uint64_t *highaddr, int big_endian, int elf_machine,
+ int clear_lsb, int data_swab, AddressSpace *as,
+ bool load_rom);
+
+/** load_elf_as:
+ * Same as load_elf_ram(), but always loads the elf as ROM
+ */
int load_elf_as(const char *filename,
uint64_t (*translate_fn)(void *, uint64_t),
void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 092294e..dfa7614 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -106,7 +106,7 @@ static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
- return xics_get_qirq(spapr->xics, phb->lsi_table[pin].irq);
+ return xics_get_qirq(XICS_FABRIC(spapr), phb->lsi_table[pin].irq);
}
PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index);
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 6983f13..9349acb 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -429,6 +429,10 @@ int pci_bus_numa_node(PCIBus *bus);
void pci_for_each_device(PCIBus *bus, int bus_num,
void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
void *opaque);
+void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
+ void (*fn)(PCIBus *bus, PCIDevice *d,
+ void *opaque),
+ void *opaque);
void pci_for_each_bus_depth_first(PCIBus *bus,
void *(*begin)(PCIBus *bus, void *parent_state),
void (*end)(PCIBus *bus, void *state),
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index d77ca60..d22ad8d 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -13,41 +13,84 @@
/* Device classes and subclasses */
-#define PCI_BASE_CLASS_STORAGE 0x01
-#define PCI_BASE_CLASS_NETWORK 0x02
+#define PCI_CLASS_NOT_DEFINED 0x0000
+#define PCI_CLASS_NOT_DEFINED_VGA 0x0001
+#define PCI_BASE_CLASS_STORAGE 0x01
#define PCI_CLASS_STORAGE_SCSI 0x0100
#define PCI_CLASS_STORAGE_IDE 0x0101
+#define PCI_CLASS_STORAGE_FLOPPY 0x0102
+#define PCI_CLASS_STORAGE_IPI 0x0103
#define PCI_CLASS_STORAGE_RAID 0x0104
+#define PCI_CLASS_STORAGE_ATA 0x0105
#define PCI_CLASS_STORAGE_SATA 0x0106
+#define PCI_CLASS_STORAGE_SAS 0x0107
#define PCI_CLASS_STORAGE_EXPRESS 0x0108
#define PCI_CLASS_STORAGE_OTHER 0x0180
+#define PCI_BASE_CLASS_NETWORK 0x02
#define PCI_CLASS_NETWORK_ETHERNET 0x0200
+#define PCI_CLASS_NETWORK_TOKEN_RING 0x0201
+#define PCI_CLASS_NETWORK_FDDI 0x0202
+#define PCI_CLASS_NETWORK_ATM 0x0203
+#define PCI_CLASS_NETWORK_ISDN 0x0204
+#define PCI_CLASS_NETWORK_WORLDFIP 0x0205
+#define PCI_CLASS_NETWORK_PICMG214 0x0206
#define PCI_CLASS_NETWORK_OTHER 0x0280
+#define PCI_BASE_CLASS_DISPLAY 0x03
#define PCI_CLASS_DISPLAY_VGA 0x0300
+#define PCI_CLASS_DISPLAY_XGA 0x0301
+#define PCI_CLASS_DISPLAY_3D 0x0302
#define PCI_CLASS_DISPLAY_OTHER 0x0380
+#define PCI_BASE_CLASS_MULTIMEDIA 0x04
+#define PCI_CLASS_MULTIMEDIA_VIDEO 0x0400
#define PCI_CLASS_MULTIMEDIA_AUDIO 0x0401
+#define PCI_CLASS_MULTIMEDIA_PHONE 0x0402
+#define PCI_CLASS_MULTIMEDIA_OTHER 0x0480
+#define PCI_BASE_CLASS_MEMORY 0x05
#define PCI_CLASS_MEMORY_RAM 0x0500
+#define PCI_CLASS_MEMORY_FLASH 0x0501
+#define PCI_CLASS_MEMORY_OTHER 0x0580
-#define PCI_CLASS_SYSTEM_SDHCI 0x0805
-#define PCI_CLASS_SYSTEM_OTHER 0x0880
-
-#define PCI_CLASS_SERIAL_USB 0x0c03
-#define PCI_CLASS_SERIAL_SMBUS 0x0c05
-
+#define PCI_BASE_CLASS_BRIDGE 0x06
#define PCI_CLASS_BRIDGE_HOST 0x0600
#define PCI_CLASS_BRIDGE_ISA 0x0601
+#define PCI_CLASS_BRIDGE_EISA 0x0602
+#define PCI_CLASS_BRIDGE_MC 0x0603
#define PCI_CLASS_BRIDGE_PCI 0x0604
#define PCI_CLASS_BRIDGE_PCI_INF_SUB 0x01
+#define PCI_CLASS_BRIDGE_PCMCIA 0x0605
+#define PCI_CLASS_BRIDGE_NUBUS 0x0606
+#define PCI_CLASS_BRIDGE_CARDBUS 0x0607
+#define PCI_CLASS_BRIDGE_RACEWAY 0x0608
+#define PCI_CLASS_BRIDGE_PCI_SEMITP 0x0609
+#define PCI_CLASS_BRIDGE_IB_PCI 0x060a
#define PCI_CLASS_BRIDGE_OTHER 0x0680
+#define PCI_BASE_CLASS_COMMUNICATION 0x07
#define PCI_CLASS_COMMUNICATION_SERIAL 0x0700
+#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701
+#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702
+#define PCI_CLASS_COMMUNICATION_MODEM 0x0703
+#define PCI_CLASS_COMMUNICATION_GPIB 0x0704
+#define PCI_CLASS_COMMUNICATION_SC 0x0705
#define PCI_CLASS_COMMUNICATION_OTHER 0x0780
+#define PCI_BASE_CLASS_SYSTEM 0x08
+#define PCI_CLASS_SYSTEM_PIC 0x0800
+#define PCI_CLASS_SYSTEM_PIC_IOAPIC 0x080010
+#define PCI_CLASS_SYSTEM_PIC_IOXAPIC 0x080020
+#define PCI_CLASS_SYSTEM_DMA 0x0801
+#define PCI_CLASS_SYSTEM_TIMER 0x0802
+#define PCI_CLASS_SYSTEM_RTC 0x0803
+#define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804
+#define PCI_CLASS_SYSTEM_SDHCI 0x0805
+#define PCI_CLASS_SYSTEM_OTHER 0x0880
+
+#define PCI_BASE_CLASS_INPUT 0x09
#define PCI_CLASS_INPUT_KEYBOARD 0x0900
#define PCI_CLASS_INPUT_PEN 0x0901
#define PCI_CLASS_INPUT_MOUSE 0x0902
@@ -55,8 +98,59 @@
#define PCI_CLASS_INPUT_GAMEPORT 0x0904
#define PCI_CLASS_INPUT_OTHER 0x0980
-#define PCI_CLASS_PROCESSOR_CO 0x0b40
+#define PCI_BASE_CLASS_DOCKING 0x0a
+#define PCI_CLASS_DOCKING_GENERIC 0x0a00
+#define PCI_CLASS_DOCKING_OTHER 0x0a80
+
+#define PCI_BASE_CLASS_PROCESSOR 0x0b
+#define PCI_CLASS_PROCESSOR_PENTIUM 0x0b02
#define PCI_CLASS_PROCESSOR_POWERPC 0x0b20
+#define PCI_CLASS_PROCESSOR_MIPS 0x0b30
+#define PCI_CLASS_PROCESSOR_CO 0x0b40
+
+#define PCI_BASE_CLASS_SERIAL 0x0c
+#define PCI_CLASS_SERIAL_FIREWIRE 0x0c00
+#define PCI_CLASS_SERIAL_ACCESS 0x0c01
+#define PCI_CLASS_SERIAL_SSA 0x0c02
+#define PCI_CLASS_SERIAL_USB 0x0c03
+#define PCI_CLASS_SERIAL_USB_UHCI 0x0c0300
+#define PCI_CLASS_SERIAL_USB_OHCI 0x0c0310
+#define PCI_CLASS_SERIAL_USB_EHCI 0x0c0320
+#define PCI_CLASS_SERIAL_USB_XHCI 0x0c0330
+#define PCI_CLASS_SERIAL_USB_UNKNOWN 0x0c0380
+#define PCI_CLASS_SERIAL_USB_DEVICE 0x0c03fe
+#define PCI_CLASS_SERIAL_FIBER 0x0c04
+#define PCI_CLASS_SERIAL_SMBUS 0x0c05
+#define PCI_CLASS_SERIAL_IB 0x0c06
+#define PCI_CLASS_SERIAL_IPMI 0x0c07
+#define PCI_CLASS_SERIAL_SERCOS 0x0c08
+#define PCI_CLASS_SERIAL_CANBUS 0x0c09
+
+#define PCI_BASE_CLASS_WIRELESS 0x0d
+#define PCI_CLASS_WIRELESS_IRDA 0x0d00
+#define PCI_CLASS_WIRELESS_CIR 0x0d01
+#define PCI_CLASS_WIRELESS_RF_CONTROLLER 0x0d10
+#define PCI_CLASS_WIRELESS_BLUETOOTH 0x0d11
+#define PCI_CLASS_WIRELESS_BROADBAND 0x0d12
+#define PCI_CLASS_WIRELESS_OTHER 0x0d80
+
+#define PCI_BASE_CLASS_SATELLITE 0x0f
+#define PCI_CLASS_SATELLITE_TV 0x0f00
+#define PCI_CLASS_SATELLITE_AUDIO 0x0f01
+#define PCI_CLASS_SATELLITE_VOICE 0x0f03
+#define PCI_CLASS_SATELLITE_DATA 0x0f04
+
+#define PCI_BASE_CLASS_CRYPT 0x10
+#define PCI_CLASS_CRYPT_NETWORK 0x1000
+#define PCI_CLASS_CRYPT_ENTERTAINMENT 0x1001
+#define PCI_CLASS_CRYPT_OTHER 0x1080
+
+#define PCI_BASE_CLASS_SIGNAL_PROCESSING 0x11
+#define PCI_CLASS_SP_DPIO 0x1100
+#define PCI_CLASS_SP_PERF 0x1101
+#define PCI_CLASS_SP_SYNCH 0x1110
+#define PCI_CLASS_SP_MANAGEMENT 0x1120
+#define PCI_CLASS_SP_OTHER 0x1180
#define PCI_CLASS_OTHERS 0xff
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index f9b17d8..cfd2711 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -58,7 +58,7 @@ struct sPAPRMachineState {
struct VIOsPAPRBus *vio_bus;
QLIST_HEAD(, sPAPRPHBState) phbs;
struct sPAPRNVRAM *nvram;
- XICSState *xics;
+ ICSState *ics;
DeviceState *rtc;
void *htab;
@@ -94,6 +94,9 @@ struct sPAPRMachineState {
/*< public >*/
char *kvm_type;
MemoryHotplugState hotplug_memory;
+
+ uint32_t nr_servers;
+ ICPState *icps;
};
#define H_SUCCESS 0
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index fc6f673..2e9685a 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -87,7 +87,7 @@ static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
- return xics_get_qirq(spapr->xics, dev->irq);
+ return xics_get_qirq(XICS_FABRIC(spapr), dev->irq);
}
static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr,
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 3f0c316..1945913 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -30,29 +30,6 @@
#include "hw/sysbus.h"
-#define TYPE_XICS_COMMON "xics-common"
-#define XICS_COMMON(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_COMMON)
-
-/*
- * Retain xics as the type name to be compatible for migration. Rest all the
- * functions, class and variables are renamed as xics_spapr.
- */
-#define TYPE_XICS_SPAPR "xics"
-#define XICS_SPAPR(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_SPAPR)
-
-#define TYPE_XICS_SPAPR_KVM "xics-spapr-kvm"
-#define XICS_SPAPR_KVM(obj) \
- OBJECT_CHECK(KVMXICSState, (obj), TYPE_XICS_SPAPR_KVM)
-
-#define XICS_COMMON_CLASS(klass) \
- OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_COMMON)
-#define XICS_SPAPR_CLASS(klass) \
- OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_SPAPR)
-#define XICS_COMMON_GET_CLASS(obj) \
- OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_COMMON)
-#define XICS_SPAPR_GET_CLASS(obj) \
- OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_SPAPR)
-
#define XICS_IPI 0x2
#define XICS_BUID 0x1
#define XICS_IRQ_BASE (XICS_BUID << 12)
@@ -62,31 +39,12 @@
* (the kernel implementation supports more but we don't exploit
* that yet)
*/
-typedef struct XICSStateClass XICSStateClass;
-typedef struct XICSState XICSState;
typedef struct ICPStateClass ICPStateClass;
typedef struct ICPState ICPState;
typedef struct ICSStateClass ICSStateClass;
typedef struct ICSState ICSState;
typedef struct ICSIRQState ICSIRQState;
-
-struct XICSStateClass {
- DeviceClass parent_class;
-
- void (*cpu_setup)(XICSState *icp, PowerPCCPU *cpu);
- void (*set_nr_irqs)(XICSState *icp, uint32_t nr_irqs, Error **errp);
- void (*set_nr_servers)(XICSState *icp, uint32_t nr_servers, Error **errp);
-};
-
-struct XICSState {
- /*< private >*/
- SysBusDevice parent_obj;
- /*< public >*/
- uint32_t nr_servers;
- uint32_t nr_irqs;
- ICPState *ss;
- QLIST_HEAD(, ICSState) ics;
-};
+typedef struct XICSFabric XICSFabric;
#define TYPE_ICP "icp"
#define ICP(obj) OBJECT_CHECK(ICPState, (obj), TYPE_ICP)
@@ -104,6 +62,7 @@ struct ICPStateClass {
void (*pre_save)(ICPState *s);
int (*post_load)(ICPState *s, int version_id);
+ void (*cpu_setup)(ICPState *icp, PowerPCCPU *cpu);
};
struct ICPState {
@@ -118,7 +77,7 @@ struct ICPState {
qemu_irq output;
bool cap_irq_xics_enabled;
- XICSState *xics;
+ XICSFabric *xics;
};
#define TYPE_ICS_BASE "ics-base"
@@ -139,6 +98,7 @@ struct ICPState {
struct ICSStateClass {
DeviceClass parent_class;
+ void (*realize)(DeviceState *dev, Error **errp);
void (*pre_save)(ICSState *s);
int (*post_load)(ICSState *s, int version_id);
void (*reject)(ICSState *s, uint32_t irq);
@@ -154,8 +114,7 @@ struct ICSState {
uint32_t offset;
qemu_irq *qirqs;
ICSIRQState *irqs;
- XICSState *xics;
- QLIST_ENTRY(ICSState) list;
+ XICSFabric *xics;
};
static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
@@ -180,19 +139,37 @@ struct ICSIRQState {
uint8_t flags;
};
+typedef struct XICSFabric {
+ Object parent;
+} XICSFabric;
+
+#define TYPE_XICS_FABRIC "xics-fabric"
+#define XICS_FABRIC(obj) \
+ OBJECT_CHECK(XICSFabric, (obj), TYPE_XICS_FABRIC)
+#define XICS_FABRIC_CLASS(klass) \
+ OBJECT_CLASS_CHECK(XICSFabricClass, (klass), TYPE_XICS_FABRIC)
+#define XICS_FABRIC_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(XICSFabricClass, (obj), TYPE_XICS_FABRIC)
+
+typedef struct XICSFabricClass {
+ InterfaceClass parent;
+ ICSState *(*ics_get)(XICSFabric *xi, int irq);
+ void (*ics_resend)(XICSFabric *xi);
+ ICPState *(*icp_get)(XICSFabric *xi, int server);
+} XICSFabricClass;
+
#define XICS_IRQS_SPAPR 1024
-qemu_irq xics_get_qirq(XICSState *icp, int irq);
-int xics_spapr_alloc(XICSState *icp, int irq_hint, bool lsi, Error **errp);
-int xics_spapr_alloc_block(XICSState *icp, int num, bool lsi, bool align,
+int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp);
+int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi, bool align,
Error **errp);
-void xics_spapr_free(XICSState *icp, int irq, int num);
-void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle);
+void spapr_ics_free(ICSState *ics, int irq, int num);
+void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle);
-void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
-void xics_cpu_destroy(XICSState *icp, PowerPCCPU *cpu);
-void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
- const char *typename, Error **errp);
+qemu_irq xics_get_qirq(XICSFabric *xi, int irq);
+ICPState *xics_icp_get(XICSFabric *xi, int server);
+void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu);
+void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu);
/* Internal XICS interfaces */
int xics_get_cpu_index_by_dt_id(int cpu_dt_id);
@@ -207,7 +184,15 @@ void ics_simple_write_xive(ICSState *ics, int nr, int server,
uint8_t priority, uint8_t saved_priority);
void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
+void icp_pic_print_info(ICPState *icp, Monitor *mon);
+void ics_pic_print_info(ICSState *ics, Monitor *mon);
+
+void ics_resend(ICSState *ics);
+void icp_resend(ICPState *ss);
+
+typedef struct sPAPRMachineState sPAPRMachineState;
-ICSState *xics_find_source(XICSState *icp, int irq);
+int xics_kvm_init(sPAPRMachineState *spapr, Error **errp);
+int xics_spapr_init(sPAPRMachineState *spapr, Error **errp);
#endif /* XICS_H */
diff --git a/include/hw/ptimer.h b/include/hw/ptimer.h
index 48cccbd..eafc3f0 100644
--- a/include/hw/ptimer.h
+++ b/include/hw/ptimer.h
@@ -60,6 +60,7 @@ typedef struct ptimer_state ptimer_state;
typedef void (*ptimer_cb)(void *opaque);
ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask);
+void ptimer_free(ptimer_state *s);
void ptimer_set_period(ptimer_state *s, int64_t period);
void ptimer_set_freq(ptimer_state *s, uint32_t freq);
uint64_t ptimer_get_limit(ptimer_state *s);
diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index 79909b2..96caefe 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -140,6 +140,17 @@ uint8_t sdbus_read_data(SDBus *sd);
bool sdbus_data_ready(SDBus *sd);
bool sdbus_get_inserted(SDBus *sd);
bool sdbus_get_readonly(SDBus *sd);
+/**
+ * sdbus_reparent_card: Reparent an SD card from one controller to another
+ * @from: controller bus to remove card from
+ * @to: controller bus to move card to
+ *
+ * Reparent an SD card, effectively unplugging it from one controller
+ * and inserting it into another. This is useful for SoCs like the
+ * bcm2835 which have two SD controllers and connect a single SD card
+ * to them, selected by the guest reprogramming GPIO line routing.
+ */
+void sdbus_reparent_card(SDBus *from, SDBus *to);
/* Functions to be used by SD devices to report back to qdevified controllers */
void sdbus_set_inserted(SDBus *sd, bool inserted);
diff --git a/include/hw/timer/armv7m_systick.h b/include/hw/timer/armv7m_systick.h
new file mode 100644
index 0000000..cca04de
--- /dev/null
+++ b/include/hw/timer/armv7m_systick.h
@@ -0,0 +1,34 @@
+/*
+ * ARMv7M SysTick timer
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Written by Paul Brook
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell
+ *
+ * This code is licensed under the GPL (version 2 or later).
+ */
+
+#ifndef HW_TIMER_ARMV7M_SYSTICK_H
+#define HW_TIMER_ARMV7M_SYSTICK_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_SYSTICK "armv7m_systick"
+
+#define SYSTICK(obj) OBJECT_CHECK(SysTickState, (obj), TYPE_SYSTICK)
+
+typedef struct SysTickState {
+ /*< private >*/
+ SysBusDevice parent_obj;
+ /*< public >*/
+
+ uint32_t control;
+ uint32_t reload;
+ int64_t tick;
+ QEMUTimer *timer;
+ MemoryRegion iomem;
+ qemu_irq irq;
+} SysTickState;
+
+#endif
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 1735d66..5720c88 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -22,6 +22,7 @@
#include "qapi-types.h"
#include "exec/cpu-common.h"
#include "qemu/coroutine_int.h"
+#include "qom/object.h"
#define QEMU_VM_FILE_MAGIC 0x5145564d
#define QEMU_VM_FILE_VERSION_COMPAT 0x00000002
@@ -92,6 +93,7 @@ struct MigrationIncomingState {
*/
QemuEvent main_thread_load_event;
+ size_t largest_page_size;
bool have_fault_thread;
QemuThread fault_thread;
QemuSemaphore fault_thread_sem;
@@ -107,6 +109,7 @@ struct MigrationIncomingState {
QEMUFile *to_src_file;
QemuMutex rp_mutex; /* We send replies from multiple threads */
void *postcopy_tmp_page;
+ void *postcopy_tmp_zero_page;
QEMUBH *bh;
@@ -313,6 +316,8 @@ int migrate_add_blocker(Error *reason, Error **errp);
*/
void migrate_del_blocker(Error *reason);
+int check_migratable(Object *obj, Error **err);
+
bool migrate_release_ram(void);
bool migrate_postcopy_ram(void);
bool migrate_zero_blocks(void);
@@ -375,6 +380,7 @@ void global_state_store_running(void);
void flush_page_queue(MigrationState *ms);
int ram_save_queue_pages(MigrationState *ms, const char *rbname,
ram_addr_t start, ram_addr_t len);
+uint64_t ram_pagesize_summary(void);
PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index b6a7491f..8e036b9 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -35,13 +35,6 @@ int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages);
int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis);
/*
- * Discard the contents of 'length' bytes from 'start'
- * We can assume that if we've been called postcopy_ram_hosttest returned true
- */
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
- size_t length);
-
-/*
* Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
* however leaving it until after precopy means that most of the precopy
* data is still THPd
@@ -81,13 +74,15 @@ void postcopy_discard_send_finish(MigrationState *ms,
* to use other postcopy_ routines to allocate.
* returns 0 on success
*/
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from);
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+ size_t pagesize);
/*
* Place a zero page at (host) atomically
* returns 0 on success
*/
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host);
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+ size_t pagesize);
/*
* Allocate a page of memory that can be mapped at a later point in time
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 63e7b02..f2dbf84 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -253,6 +253,10 @@ extern const VMStateInfo vmstate_info_uint16;
extern const VMStateInfo vmstate_info_uint32;
extern const VMStateInfo vmstate_info_uint64;
+/** Put this in the stream when migrating a null pointer.*/
+#define VMS_NULLPTR_MARKER (0x30U) /* '0' */
+extern const VMStateInfo vmstate_info_nullptr;
+
extern const VMStateInfo vmstate_info_float64;
extern const VMStateInfo vmstate_info_cpudouble;
diff --git a/include/qemu-io.h b/include/qemu-io.h
index 4d402b9..196fde0 100644
--- a/include/qemu-io.h
+++ b/include/qemu-io.h
@@ -36,6 +36,7 @@ typedef struct cmdinfo {
const char *args;
const char *oneline;
helpfunc_t help;
+ uint64_t perm;
} cmdinfo_t;
extern bool qemuio_misalign;
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 9abed51..26e6285 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -610,7 +610,10 @@ void timer_deinit(QEMUTimer *ts);
*
* Free a timer (it must not be on the active list)
*/
-void timer_free(QEMUTimer *ts);
+static inline void timer_free(QEMUTimer *ts)
+{
+ g_free(ts);
+}
/**
* timer_del:
diff --git a/include/standard-headers/asm-x86/hyperv.h b/include/standard-headers/asm-x86/hyperv.h
index 47b38fb..eca9a2c 100644
--- a/include/standard-headers/asm-x86/hyperv.h
+++ b/include/standard-headers/asm-x86/hyperv.h
@@ -73,6 +73,9 @@
*/
#define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8)
+/* Crash MSR available */
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
+
/*
* Feature identification: EBX indicates which flags were specified at
* partition creation. The format is the same as the partition creation
@@ -144,6 +147,11 @@
*/
#define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5)
+/*
+ * Crash notification flag.
+ */
+#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
+
/* MSR used to identify the guest OS. */
#define HV_X64_MSR_GUEST_OS_ID 0x40000000
diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h
index 5c10f7e..c8b3338 100644
--- a/include/standard-headers/linux/input-event-codes.h
+++ b/include/standard-headers/linux/input-event-codes.h
@@ -640,7 +640,7 @@
* Control a data application associated with the currently viewed channel,
* e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.)
*/
-#define KEY_DATA 0x275
+#define KEY_DATA 0x277
#define BTN_TRIGGER_HAPPY 0x2c0
#define BTN_TRIGGER_HAPPY1 0x2c0
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index e5a2e68..634c9c4 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -23,6 +23,14 @@
#define LINUX_PCI_REGS_H
/*
+ * Conventional PCI and PCI-X Mode 1 devices have 256 bytes of
+ * configuration space. PCI-X Mode 2 and PCIe devices have 4096 bytes of
+ * configuration space.
+ */
+#define PCI_CFG_SPACE_SIZE 256
+#define PCI_CFG_SPACE_EXP_SIZE 4096
+
+/*
* Under PCI, each device has 256 bytes of configuration address space,
* of which the first 64 bytes are standardized as follows:
*/
@@ -674,6 +682,7 @@
#define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */
#define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */
#define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */
+#define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */
#define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */
#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PTM
@@ -965,6 +974,7 @@
#define PCI_EXP_DPC_STATUS 8 /* DPC Status */
#define PCI_EXP_DPC_STATUS_TRIGGER 0x01 /* Trigger Status */
#define PCI_EXP_DPC_STATUS_INTERRUPT 0x08 /* Interrupt Status */
+#define PCI_EXP_DPC_RP_BUSY 0x10 /* Root Port Busy */
#define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */
@@ -977,4 +987,19 @@
#define PCI_PTM_CTRL_ENABLE 0x00000001 /* PTM enable */
#define PCI_PTM_CTRL_ROOT 0x00000002 /* Root select */
+/* L1 PM Substates */
+#define PCI_L1SS_CAP 4 /* capability register */
+#define PCI_L1SS_CAP_PCIPM_L1_2 1 /* PCI PM L1.2 Support */
+#define PCI_L1SS_CAP_PCIPM_L1_1 2 /* PCI PM L1.1 Support */
+#define PCI_L1SS_CAP_ASPM_L1_2 4 /* ASPM L1.2 Support */
+#define PCI_L1SS_CAP_ASPM_L1_1 8 /* ASPM L1.1 Support */
+#define PCI_L1SS_CAP_L1_PM_SS 16 /* L1 PM Substates Support */
+#define PCI_L1SS_CTL1 8 /* Control Register 1 */
+#define PCI_L1SS_CTL1_PCIPM_L1_2 1 /* PCI PM L1.2 Enable */
+#define PCI_L1SS_CTL1_PCIPM_L1_1 2 /* PCI PM L1.1 Support */
+#define PCI_L1SS_CTL1_ASPM_L1_2 4 /* ASPM L1.2 Support */
+#define PCI_L1SS_CTL1_ASPM_L1_1 8 /* ASPM L1.1 Support */
+#define PCI_L1SS_CTL1_L1SS_MASK 0x0000000F
+#define PCI_L1SS_CTL2 0xC /* Control Register 2 */
+
#endif /* LINUX_PCI_REGS_H */
diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h
index fe74e42..6d5c3b2 100644
--- a/include/standard-headers/linux/virtio_ids.h
+++ b/include/standard-headers/linux/virtio_ids.h
@@ -43,4 +43,5 @@
#define VIRTIO_ID_INPUT 18 /* virtio input */
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
+
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index f365a51..096c17f 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -34,7 +34,7 @@ typedef struct BlockDevOps {
* changes. Sure would be useful if it did.
* Device models with removable media must implement this callback.
*/
- void (*change_media_cb)(void *opaque, bool load);
+ void (*change_media_cb)(void *opaque, bool load, Error **errp);
/*
* Runs when an eject request is issued from the monitor, the tray
* is closed, and the medium is locked.
@@ -84,7 +84,7 @@ typedef struct BlockBackendPublic {
QLIST_ENTRY(BlockBackendPublic) round_robin;
} BlockBackendPublic;
-BlockBackend *blk_new(void);
+BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm);
BlockBackend *blk_new_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp);
int blk_get_refcnt(BlockBackend *blk);
@@ -102,9 +102,12 @@ BlockBackend *blk_by_public(BlockBackendPublic *public);
BlockDriverState *blk_bs(BlockBackend *blk);
void blk_remove_bs(BlockBackend *blk);
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs);
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
bool bdrv_has_blk(BlockDriverState *bs);
bool bdrv_is_root_node(BlockDriverState *bs);
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp);
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
void blk_iostatus_enable(BlockBackend *blk);
diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index 2fb7859..1101d55 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -87,9 +87,11 @@ struct kvm_regs {
/* Supported VGICv3 address types */
#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
+#define KVM_VGIC_ITS_ADDR_TYPE 4
#define KVM_VGIC_V3_DIST_SIZE SZ_64K
#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K)
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */
@@ -179,10 +181,23 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+ (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
/* KVM_IRQ_LINE irq field index values */
diff --git a/linux-headers/asm-arm/unistd-common.h b/linux-headers/asm-arm/unistd-common.h
new file mode 100644
index 0000000..13a74af
--- /dev/null
+++ b/linux-headers/asm-arm/unistd-common.h
@@ -0,0 +1,357 @@
+#ifndef _ASM_ARM_UNISTD_COMMON_H
+#define _ASM_ARM_UNISTD_COMMON_H 1
+
+#define __NR_restart_syscall (__NR_SYSCALL_BASE + 0)
+#define __NR_exit (__NR_SYSCALL_BASE + 1)
+#define __NR_fork (__NR_SYSCALL_BASE + 2)
+#define __NR_read (__NR_SYSCALL_BASE + 3)
+#define __NR_write (__NR_SYSCALL_BASE + 4)
+#define __NR_open (__NR_SYSCALL_BASE + 5)
+#define __NR_close (__NR_SYSCALL_BASE + 6)
+#define __NR_creat (__NR_SYSCALL_BASE + 8)
+#define __NR_link (__NR_SYSCALL_BASE + 9)
+#define __NR_unlink (__NR_SYSCALL_BASE + 10)
+#define __NR_execve (__NR_SYSCALL_BASE + 11)
+#define __NR_chdir (__NR_SYSCALL_BASE + 12)
+#define __NR_mknod (__NR_SYSCALL_BASE + 14)
+#define __NR_chmod (__NR_SYSCALL_BASE + 15)
+#define __NR_lchown (__NR_SYSCALL_BASE + 16)
+#define __NR_lseek (__NR_SYSCALL_BASE + 19)
+#define __NR_getpid (__NR_SYSCALL_BASE + 20)
+#define __NR_mount (__NR_SYSCALL_BASE + 21)
+#define __NR_setuid (__NR_SYSCALL_BASE + 23)
+#define __NR_getuid (__NR_SYSCALL_BASE + 24)
+#define __NR_ptrace (__NR_SYSCALL_BASE + 26)
+#define __NR_pause (__NR_SYSCALL_BASE + 29)
+#define __NR_access (__NR_SYSCALL_BASE + 33)
+#define __NR_nice (__NR_SYSCALL_BASE + 34)
+#define __NR_sync (__NR_SYSCALL_BASE + 36)
+#define __NR_kill (__NR_SYSCALL_BASE + 37)
+#define __NR_rename (__NR_SYSCALL_BASE + 38)
+#define __NR_mkdir (__NR_SYSCALL_BASE + 39)
+#define __NR_rmdir (__NR_SYSCALL_BASE + 40)
+#define __NR_dup (__NR_SYSCALL_BASE + 41)
+#define __NR_pipe (__NR_SYSCALL_BASE + 42)
+#define __NR_times (__NR_SYSCALL_BASE + 43)
+#define __NR_brk (__NR_SYSCALL_BASE + 45)
+#define __NR_setgid (__NR_SYSCALL_BASE + 46)
+#define __NR_getgid (__NR_SYSCALL_BASE + 47)
+#define __NR_geteuid (__NR_SYSCALL_BASE + 49)
+#define __NR_getegid (__NR_SYSCALL_BASE + 50)
+#define __NR_acct (__NR_SYSCALL_BASE + 51)
+#define __NR_umount2 (__NR_SYSCALL_BASE + 52)
+#define __NR_ioctl (__NR_SYSCALL_BASE + 54)
+#define __NR_fcntl (__NR_SYSCALL_BASE + 55)
+#define __NR_setpgid (__NR_SYSCALL_BASE + 57)
+#define __NR_umask (__NR_SYSCALL_BASE + 60)
+#define __NR_chroot (__NR_SYSCALL_BASE + 61)
+#define __NR_ustat (__NR_SYSCALL_BASE + 62)
+#define __NR_dup2 (__NR_SYSCALL_BASE + 63)
+#define __NR_getppid (__NR_SYSCALL_BASE + 64)
+#define __NR_getpgrp (__NR_SYSCALL_BASE + 65)
+#define __NR_setsid (__NR_SYSCALL_BASE + 66)
+#define __NR_sigaction (__NR_SYSCALL_BASE + 67)
+#define __NR_setreuid (__NR_SYSCALL_BASE + 70)
+#define __NR_setregid (__NR_SYSCALL_BASE + 71)
+#define __NR_sigsuspend (__NR_SYSCALL_BASE + 72)
+#define __NR_sigpending (__NR_SYSCALL_BASE + 73)
+#define __NR_sethostname (__NR_SYSCALL_BASE + 74)
+#define __NR_setrlimit (__NR_SYSCALL_BASE + 75)
+#define __NR_getrusage (__NR_SYSCALL_BASE + 77)
+#define __NR_gettimeofday (__NR_SYSCALL_BASE + 78)
+#define __NR_settimeofday (__NR_SYSCALL_BASE + 79)
+#define __NR_getgroups (__NR_SYSCALL_BASE + 80)
+#define __NR_setgroups (__NR_SYSCALL_BASE + 81)
+#define __NR_symlink (__NR_SYSCALL_BASE + 83)
+#define __NR_readlink (__NR_SYSCALL_BASE + 85)
+#define __NR_uselib (__NR_SYSCALL_BASE + 86)
+#define __NR_swapon (__NR_SYSCALL_BASE + 87)
+#define __NR_reboot (__NR_SYSCALL_BASE + 88)
+#define __NR_munmap (__NR_SYSCALL_BASE + 91)
+#define __NR_truncate (__NR_SYSCALL_BASE + 92)
+#define __NR_ftruncate (__NR_SYSCALL_BASE + 93)
+#define __NR_fchmod (__NR_SYSCALL_BASE + 94)
+#define __NR_fchown (__NR_SYSCALL_BASE + 95)
+#define __NR_getpriority (__NR_SYSCALL_BASE + 96)
+#define __NR_setpriority (__NR_SYSCALL_BASE + 97)
+#define __NR_statfs (__NR_SYSCALL_BASE + 99)
+#define __NR_fstatfs (__NR_SYSCALL_BASE + 100)
+#define __NR_syslog (__NR_SYSCALL_BASE + 103)
+#define __NR_setitimer (__NR_SYSCALL_BASE + 104)
+#define __NR_getitimer (__NR_SYSCALL_BASE + 105)
+#define __NR_stat (__NR_SYSCALL_BASE + 106)
+#define __NR_lstat (__NR_SYSCALL_BASE + 107)
+#define __NR_fstat (__NR_SYSCALL_BASE + 108)
+#define __NR_vhangup (__NR_SYSCALL_BASE + 111)
+#define __NR_wait4 (__NR_SYSCALL_BASE + 114)
+#define __NR_swapoff (__NR_SYSCALL_BASE + 115)
+#define __NR_sysinfo (__NR_SYSCALL_BASE + 116)
+#define __NR_fsync (__NR_SYSCALL_BASE + 118)
+#define __NR_sigreturn (__NR_SYSCALL_BASE + 119)
+#define __NR_clone (__NR_SYSCALL_BASE + 120)
+#define __NR_setdomainname (__NR_SYSCALL_BASE + 121)
+#define __NR_uname (__NR_SYSCALL_BASE + 122)
+#define __NR_adjtimex (__NR_SYSCALL_BASE + 124)
+#define __NR_mprotect (__NR_SYSCALL_BASE + 125)
+#define __NR_sigprocmask (__NR_SYSCALL_BASE + 126)
+#define __NR_init_module (__NR_SYSCALL_BASE + 128)
+#define __NR_delete_module (__NR_SYSCALL_BASE + 129)
+#define __NR_quotactl (__NR_SYSCALL_BASE + 131)
+#define __NR_getpgid (__NR_SYSCALL_BASE + 132)
+#define __NR_fchdir (__NR_SYSCALL_BASE + 133)
+#define __NR_bdflush (__NR_SYSCALL_BASE + 134)
+#define __NR_sysfs (__NR_SYSCALL_BASE + 135)
+#define __NR_personality (__NR_SYSCALL_BASE + 136)
+#define __NR_setfsuid (__NR_SYSCALL_BASE + 138)
+#define __NR_setfsgid (__NR_SYSCALL_BASE + 139)
+#define __NR__llseek (__NR_SYSCALL_BASE + 140)
+#define __NR_getdents (__NR_SYSCALL_BASE + 141)
+#define __NR__newselect (__NR_SYSCALL_BASE + 142)
+#define __NR_flock (__NR_SYSCALL_BASE + 143)
+#define __NR_msync (__NR_SYSCALL_BASE + 144)
+#define __NR_readv (__NR_SYSCALL_BASE + 145)
+#define __NR_writev (__NR_SYSCALL_BASE + 146)
+#define __NR_getsid (__NR_SYSCALL_BASE + 147)
+#define __NR_fdatasync (__NR_SYSCALL_BASE + 148)
+#define __NR__sysctl (__NR_SYSCALL_BASE + 149)
+#define __NR_mlock (__NR_SYSCALL_BASE + 150)
+#define __NR_munlock (__NR_SYSCALL_BASE + 151)
+#define __NR_mlockall (__NR_SYSCALL_BASE + 152)
+#define __NR_munlockall (__NR_SYSCALL_BASE + 153)
+#define __NR_sched_setparam (__NR_SYSCALL_BASE + 154)
+#define __NR_sched_getparam (__NR_SYSCALL_BASE + 155)
+#define __NR_sched_setscheduler (__NR_SYSCALL_BASE + 156)
+#define __NR_sched_getscheduler (__NR_SYSCALL_BASE + 157)
+#define __NR_sched_yield (__NR_SYSCALL_BASE + 158)
+#define __NR_sched_get_priority_max (__NR_SYSCALL_BASE + 159)
+#define __NR_sched_get_priority_min (__NR_SYSCALL_BASE + 160)
+#define __NR_sched_rr_get_interval (__NR_SYSCALL_BASE + 161)
+#define __NR_nanosleep (__NR_SYSCALL_BASE + 162)
+#define __NR_mremap (__NR_SYSCALL_BASE + 163)
+#define __NR_setresuid (__NR_SYSCALL_BASE + 164)
+#define __NR_getresuid (__NR_SYSCALL_BASE + 165)
+#define __NR_poll (__NR_SYSCALL_BASE + 168)
+#define __NR_nfsservctl (__NR_SYSCALL_BASE + 169)
+#define __NR_setresgid (__NR_SYSCALL_BASE + 170)
+#define __NR_getresgid (__NR_SYSCALL_BASE + 171)
+#define __NR_prctl (__NR_SYSCALL_BASE + 172)
+#define __NR_rt_sigreturn (__NR_SYSCALL_BASE + 173)
+#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174)
+#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175)
+#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176)
+#define __NR_rt_sigtimedwait (__NR_SYSCALL_BASE + 177)
+#define __NR_rt_sigqueueinfo (__NR_SYSCALL_BASE + 178)
+#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179)
+#define __NR_pread64 (__NR_SYSCALL_BASE + 180)
+#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181)
+#define __NR_chown (__NR_SYSCALL_BASE + 182)
+#define __NR_getcwd (__NR_SYSCALL_BASE + 183)
+#define __NR_capget (__NR_SYSCALL_BASE + 184)
+#define __NR_capset (__NR_SYSCALL_BASE + 185)
+#define __NR_sigaltstack (__NR_SYSCALL_BASE + 186)
+#define __NR_sendfile (__NR_SYSCALL_BASE + 187)
+#define __NR_vfork (__NR_SYSCALL_BASE + 190)
+#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191)
+#define __NR_mmap2 (__NR_SYSCALL_BASE + 192)
+#define __NR_truncate64 (__NR_SYSCALL_BASE + 193)
+#define __NR_ftruncate64 (__NR_SYSCALL_BASE + 194)
+#define __NR_stat64 (__NR_SYSCALL_BASE + 195)
+#define __NR_lstat64 (__NR_SYSCALL_BASE + 196)
+#define __NR_fstat64 (__NR_SYSCALL_BASE + 197)
+#define __NR_lchown32 (__NR_SYSCALL_BASE + 198)
+#define __NR_getuid32 (__NR_SYSCALL_BASE + 199)
+#define __NR_getgid32 (__NR_SYSCALL_BASE + 200)
+#define __NR_geteuid32 (__NR_SYSCALL_BASE + 201)
+#define __NR_getegid32 (__NR_SYSCALL_BASE + 202)
+#define __NR_setreuid32 (__NR_SYSCALL_BASE + 203)
+#define __NR_setregid32 (__NR_SYSCALL_BASE + 204)
+#define __NR_getgroups32 (__NR_SYSCALL_BASE + 205)
+#define __NR_setgroups32 (__NR_SYSCALL_BASE + 206)
+#define __NR_fchown32 (__NR_SYSCALL_BASE + 207)
+#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208)
+#define __NR_getresuid32 (__NR_SYSCALL_BASE + 209)
+#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210)
+#define __NR_getresgid32 (__NR_SYSCALL_BASE + 211)
+#define __NR_chown32 (__NR_SYSCALL_BASE + 212)
+#define __NR_setuid32 (__NR_SYSCALL_BASE + 213)
+#define __NR_setgid32 (__NR_SYSCALL_BASE + 214)
+#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215)
+#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216)
+#define __NR_getdents64 (__NR_SYSCALL_BASE + 217)
+#define __NR_pivot_root (__NR_SYSCALL_BASE + 218)
+#define __NR_mincore (__NR_SYSCALL_BASE + 219)
+#define __NR_madvise (__NR_SYSCALL_BASE + 220)
+#define __NR_fcntl64 (__NR_SYSCALL_BASE + 221)
+#define __NR_gettid (__NR_SYSCALL_BASE + 224)
+#define __NR_readahead (__NR_SYSCALL_BASE + 225)
+#define __NR_setxattr (__NR_SYSCALL_BASE + 226)
+#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227)
+#define __NR_fsetxattr (__NR_SYSCALL_BASE + 228)
+#define __NR_getxattr (__NR_SYSCALL_BASE + 229)
+#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230)
+#define __NR_fgetxattr (__NR_SYSCALL_BASE + 231)
+#define __NR_listxattr (__NR_SYSCALL_BASE + 232)
+#define __NR_llistxattr (__NR_SYSCALL_BASE + 233)
+#define __NR_flistxattr (__NR_SYSCALL_BASE + 234)
+#define __NR_removexattr (__NR_SYSCALL_BASE + 235)
+#define __NR_lremovexattr (__NR_SYSCALL_BASE + 236)
+#define __NR_fremovexattr (__NR_SYSCALL_BASE + 237)
+#define __NR_tkill (__NR_SYSCALL_BASE + 238)
+#define __NR_sendfile64 (__NR_SYSCALL_BASE + 239)
+#define __NR_futex (__NR_SYSCALL_BASE + 240)
+#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241)
+#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242)
+#define __NR_io_setup (__NR_SYSCALL_BASE + 243)
+#define __NR_io_destroy (__NR_SYSCALL_BASE + 244)
+#define __NR_io_getevents (__NR_SYSCALL_BASE + 245)
+#define __NR_io_submit (__NR_SYSCALL_BASE + 246)
+#define __NR_io_cancel (__NR_SYSCALL_BASE + 247)
+#define __NR_exit_group (__NR_SYSCALL_BASE + 248)
+#define __NR_lookup_dcookie (__NR_SYSCALL_BASE + 249)
+#define __NR_epoll_create (__NR_SYSCALL_BASE + 250)
+#define __NR_epoll_ctl (__NR_SYSCALL_BASE + 251)
+#define __NR_epoll_wait (__NR_SYSCALL_BASE + 252)
+#define __NR_remap_file_pages (__NR_SYSCALL_BASE + 253)
+#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256)
+#define __NR_timer_create (__NR_SYSCALL_BASE + 257)
+#define __NR_timer_settime (__NR_SYSCALL_BASE + 258)
+#define __NR_timer_gettime (__NR_SYSCALL_BASE + 259)
+#define __NR_timer_getoverrun (__NR_SYSCALL_BASE + 260)
+#define __NR_timer_delete (__NR_SYSCALL_BASE + 261)
+#define __NR_clock_settime (__NR_SYSCALL_BASE + 262)
+#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263)
+#define __NR_clock_getres (__NR_SYSCALL_BASE + 264)
+#define __NR_clock_nanosleep (__NR_SYSCALL_BASE + 265)
+#define __NR_statfs64 (__NR_SYSCALL_BASE + 266)
+#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267)
+#define __NR_tgkill (__NR_SYSCALL_BASE + 268)
+#define __NR_utimes (__NR_SYSCALL_BASE + 269)
+#define __NR_arm_fadvise64_64 (__NR_SYSCALL_BASE + 270)
+#define __NR_pciconfig_iobase (__NR_SYSCALL_BASE + 271)
+#define __NR_pciconfig_read (__NR_SYSCALL_BASE + 272)
+#define __NR_pciconfig_write (__NR_SYSCALL_BASE + 273)
+#define __NR_mq_open (__NR_SYSCALL_BASE + 274)
+#define __NR_mq_unlink (__NR_SYSCALL_BASE + 275)
+#define __NR_mq_timedsend (__NR_SYSCALL_BASE + 276)
+#define __NR_mq_timedreceive (__NR_SYSCALL_BASE + 277)
+#define __NR_mq_notify (__NR_SYSCALL_BASE + 278)
+#define __NR_mq_getsetattr (__NR_SYSCALL_BASE + 279)
+#define __NR_waitid (__NR_SYSCALL_BASE + 280)
+#define __NR_socket (__NR_SYSCALL_BASE + 281)
+#define __NR_bind (__NR_SYSCALL_BASE + 282)
+#define __NR_connect (__NR_SYSCALL_BASE + 283)
+#define __NR_listen (__NR_SYSCALL_BASE + 284)
+#define __NR_accept (__NR_SYSCALL_BASE + 285)
+#define __NR_getsockname (__NR_SYSCALL_BASE + 286)
+#define __NR_getpeername (__NR_SYSCALL_BASE + 287)
+#define __NR_socketpair (__NR_SYSCALL_BASE + 288)
+#define __NR_send (__NR_SYSCALL_BASE + 289)
+#define __NR_sendto (__NR_SYSCALL_BASE + 290)
+#define __NR_recv (__NR_SYSCALL_BASE + 291)
+#define __NR_recvfrom (__NR_SYSCALL_BASE + 292)
+#define __NR_shutdown (__NR_SYSCALL_BASE + 293)
+#define __NR_setsockopt (__NR_SYSCALL_BASE + 294)
+#define __NR_getsockopt (__NR_SYSCALL_BASE + 295)
+#define __NR_sendmsg (__NR_SYSCALL_BASE + 296)
+#define __NR_recvmsg (__NR_SYSCALL_BASE + 297)
+#define __NR_semop (__NR_SYSCALL_BASE + 298)
+#define __NR_semget (__NR_SYSCALL_BASE + 299)
+#define __NR_semctl (__NR_SYSCALL_BASE + 300)
+#define __NR_msgsnd (__NR_SYSCALL_BASE + 301)
+#define __NR_msgrcv (__NR_SYSCALL_BASE + 302)
+#define __NR_msgget (__NR_SYSCALL_BASE + 303)
+#define __NR_msgctl (__NR_SYSCALL_BASE + 304)
+#define __NR_shmat (__NR_SYSCALL_BASE + 305)
+#define __NR_shmdt (__NR_SYSCALL_BASE + 306)
+#define __NR_shmget (__NR_SYSCALL_BASE + 307)
+#define __NR_shmctl (__NR_SYSCALL_BASE + 308)
+#define __NR_add_key (__NR_SYSCALL_BASE + 309)
+#define __NR_request_key (__NR_SYSCALL_BASE + 310)
+#define __NR_keyctl (__NR_SYSCALL_BASE + 311)
+#define __NR_semtimedop (__NR_SYSCALL_BASE + 312)
+#define __NR_vserver (__NR_SYSCALL_BASE + 313)
+#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314)
+#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315)
+#define __NR_inotify_init (__NR_SYSCALL_BASE + 316)
+#define __NR_inotify_add_watch (__NR_SYSCALL_BASE + 317)
+#define __NR_inotify_rm_watch (__NR_SYSCALL_BASE + 318)
+#define __NR_mbind (__NR_SYSCALL_BASE + 319)
+#define __NR_get_mempolicy (__NR_SYSCALL_BASE + 320)
+#define __NR_set_mempolicy (__NR_SYSCALL_BASE + 321)
+#define __NR_openat (__NR_SYSCALL_BASE + 322)
+#define __NR_mkdirat (__NR_SYSCALL_BASE + 323)
+#define __NR_mknodat (__NR_SYSCALL_BASE + 324)
+#define __NR_fchownat (__NR_SYSCALL_BASE + 325)
+#define __NR_futimesat (__NR_SYSCALL_BASE + 326)
+#define __NR_fstatat64 (__NR_SYSCALL_BASE + 327)
+#define __NR_unlinkat (__NR_SYSCALL_BASE + 328)
+#define __NR_renameat (__NR_SYSCALL_BASE + 329)
+#define __NR_linkat (__NR_SYSCALL_BASE + 330)
+#define __NR_symlinkat (__NR_SYSCALL_BASE + 331)
+#define __NR_readlinkat (__NR_SYSCALL_BASE + 332)
+#define __NR_fchmodat (__NR_SYSCALL_BASE + 333)
+#define __NR_faccessat (__NR_SYSCALL_BASE + 334)
+#define __NR_pselect6 (__NR_SYSCALL_BASE + 335)
+#define __NR_ppoll (__NR_SYSCALL_BASE + 336)
+#define __NR_unshare (__NR_SYSCALL_BASE + 337)
+#define __NR_set_robust_list (__NR_SYSCALL_BASE + 338)
+#define __NR_get_robust_list (__NR_SYSCALL_BASE + 339)
+#define __NR_splice (__NR_SYSCALL_BASE + 340)
+#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE + 341)
+#define __NR_tee (__NR_SYSCALL_BASE + 342)
+#define __NR_vmsplice (__NR_SYSCALL_BASE + 343)
+#define __NR_move_pages (__NR_SYSCALL_BASE + 344)
+#define __NR_getcpu (__NR_SYSCALL_BASE + 345)
+#define __NR_epoll_pwait (__NR_SYSCALL_BASE + 346)
+#define __NR_kexec_load (__NR_SYSCALL_BASE + 347)
+#define __NR_utimensat (__NR_SYSCALL_BASE + 348)
+#define __NR_signalfd (__NR_SYSCALL_BASE + 349)
+#define __NR_timerfd_create (__NR_SYSCALL_BASE + 350)
+#define __NR_eventfd (__NR_SYSCALL_BASE + 351)
+#define __NR_fallocate (__NR_SYSCALL_BASE + 352)
+#define __NR_timerfd_settime (__NR_SYSCALL_BASE + 353)
+#define __NR_timerfd_gettime (__NR_SYSCALL_BASE + 354)
+#define __NR_signalfd4 (__NR_SYSCALL_BASE + 355)
+#define __NR_eventfd2 (__NR_SYSCALL_BASE + 356)
+#define __NR_epoll_create1 (__NR_SYSCALL_BASE + 357)
+#define __NR_dup3 (__NR_SYSCALL_BASE + 358)
+#define __NR_pipe2 (__NR_SYSCALL_BASE + 359)
+#define __NR_inotify_init1 (__NR_SYSCALL_BASE + 360)
+#define __NR_preadv (__NR_SYSCALL_BASE + 361)
+#define __NR_pwritev (__NR_SYSCALL_BASE + 362)
+#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE + 363)
+#define __NR_perf_event_open (__NR_SYSCALL_BASE + 364)
+#define __NR_recvmmsg (__NR_SYSCALL_BASE + 365)
+#define __NR_accept4 (__NR_SYSCALL_BASE + 366)
+#define __NR_fanotify_init (__NR_SYSCALL_BASE + 367)
+#define __NR_fanotify_mark (__NR_SYSCALL_BASE + 368)
+#define __NR_prlimit64 (__NR_SYSCALL_BASE + 369)
+#define __NR_name_to_handle_at (__NR_SYSCALL_BASE + 370)
+#define __NR_open_by_handle_at (__NR_SYSCALL_BASE + 371)
+#define __NR_clock_adjtime (__NR_SYSCALL_BASE + 372)
+#define __NR_syncfs (__NR_SYSCALL_BASE + 373)
+#define __NR_sendmmsg (__NR_SYSCALL_BASE + 374)
+#define __NR_setns (__NR_SYSCALL_BASE + 375)
+#define __NR_process_vm_readv (__NR_SYSCALL_BASE + 376)
+#define __NR_process_vm_writev (__NR_SYSCALL_BASE + 377)
+#define __NR_kcmp (__NR_SYSCALL_BASE + 378)
+#define __NR_finit_module (__NR_SYSCALL_BASE + 379)
+#define __NR_sched_setattr (__NR_SYSCALL_BASE + 380)
+#define __NR_sched_getattr (__NR_SYSCALL_BASE + 381)
+#define __NR_renameat2 (__NR_SYSCALL_BASE + 382)
+#define __NR_seccomp (__NR_SYSCALL_BASE + 383)
+#define __NR_getrandom (__NR_SYSCALL_BASE + 384)
+#define __NR_memfd_create (__NR_SYSCALL_BASE + 385)
+#define __NR_bpf (__NR_SYSCALL_BASE + 386)
+#define __NR_execveat (__NR_SYSCALL_BASE + 387)
+#define __NR_userfaultfd (__NR_SYSCALL_BASE + 388)
+#define __NR_membarrier (__NR_SYSCALL_BASE + 389)
+#define __NR_mlock2 (__NR_SYSCALL_BASE + 390)
+#define __NR_copy_file_range (__NR_SYSCALL_BASE + 391)
+#define __NR_preadv2 (__NR_SYSCALL_BASE + 392)
+#define __NR_pwritev2 (__NR_SYSCALL_BASE + 393)
+#define __NR_pkey_mprotect (__NR_SYSCALL_BASE + 394)
+#define __NR_pkey_alloc (__NR_SYSCALL_BASE + 395)
+#define __NR_pkey_free (__NR_SYSCALL_BASE + 396)
+
+#endif /* _ASM_ARM_UNISTD_COMMON_H */
diff --git a/linux-headers/asm-arm/unistd-eabi.h b/linux-headers/asm-arm/unistd-eabi.h
new file mode 100644
index 0000000..266f1fc
--- /dev/null
+++ b/linux-headers/asm-arm/unistd-eabi.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_ARM_UNISTD_EABI_H
+#define _ASM_ARM_UNISTD_EABI_H 1
+
+
+#endif /* _ASM_ARM_UNISTD_EABI_H */
diff --git a/linux-headers/asm-arm/unistd-oabi.h b/linux-headers/asm-arm/unistd-oabi.h
new file mode 100644
index 0000000..47d9afb
--- /dev/null
+++ b/linux-headers/asm-arm/unistd-oabi.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_ARM_UNISTD_OABI_H
+#define _ASM_ARM_UNISTD_OABI_H 1
+
+#define __NR_time (__NR_SYSCALL_BASE + 13)
+#define __NR_umount (__NR_SYSCALL_BASE + 22)
+#define __NR_stime (__NR_SYSCALL_BASE + 25)
+#define __NR_alarm (__NR_SYSCALL_BASE + 27)
+#define __NR_utime (__NR_SYSCALL_BASE + 30)
+#define __NR_getrlimit (__NR_SYSCALL_BASE + 76)
+#define __NR_select (__NR_SYSCALL_BASE + 82)
+#define __NR_readdir (__NR_SYSCALL_BASE + 89)
+#define __NR_mmap (__NR_SYSCALL_BASE + 90)
+#define __NR_socketcall (__NR_SYSCALL_BASE + 102)
+#define __NR_syscall (__NR_SYSCALL_BASE + 113)
+#define __NR_ipc (__NR_SYSCALL_BASE + 117)
+
+#endif /* _ASM_ARM_UNISTD_OABI_H */
diff --git a/linux-headers/asm-arm/unistd.h b/linux-headers/asm-arm/unistd.h
index ceb5450..155571b 100644
--- a/linux-headers/asm-arm/unistd.h
+++ b/linux-headers/asm-arm/unistd.h
@@ -17,409 +17,14 @@
#if defined(__thumb__) || defined(__ARM_EABI__)
#define __NR_SYSCALL_BASE 0
+#include <asm/unistd-eabi.h>
#else
#define __NR_SYSCALL_BASE __NR_OABI_SYSCALL_BASE
+#include <asm/unistd-oabi.h>
#endif
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_restart_syscall (__NR_SYSCALL_BASE+ 0)
-#define __NR_exit (__NR_SYSCALL_BASE+ 1)
-#define __NR_fork (__NR_SYSCALL_BASE+ 2)
-#define __NR_read (__NR_SYSCALL_BASE+ 3)
-#define __NR_write (__NR_SYSCALL_BASE+ 4)
-#define __NR_open (__NR_SYSCALL_BASE+ 5)
-#define __NR_close (__NR_SYSCALL_BASE+ 6)
- /* 7 was sys_waitpid */
-#define __NR_creat (__NR_SYSCALL_BASE+ 8)
-#define __NR_link (__NR_SYSCALL_BASE+ 9)
-#define __NR_unlink (__NR_SYSCALL_BASE+ 10)
-#define __NR_execve (__NR_SYSCALL_BASE+ 11)
-#define __NR_chdir (__NR_SYSCALL_BASE+ 12)
-#define __NR_time (__NR_SYSCALL_BASE+ 13)
-#define __NR_mknod (__NR_SYSCALL_BASE+ 14)
-#define __NR_chmod (__NR_SYSCALL_BASE+ 15)
-#define __NR_lchown (__NR_SYSCALL_BASE+ 16)
- /* 17 was sys_break */
- /* 18 was sys_stat */
-#define __NR_lseek (__NR_SYSCALL_BASE+ 19)
-#define __NR_getpid (__NR_SYSCALL_BASE+ 20)
-#define __NR_mount (__NR_SYSCALL_BASE+ 21)
-#define __NR_umount (__NR_SYSCALL_BASE+ 22)
-#define __NR_setuid (__NR_SYSCALL_BASE+ 23)
-#define __NR_getuid (__NR_SYSCALL_BASE+ 24)
-#define __NR_stime (__NR_SYSCALL_BASE+ 25)
-#define __NR_ptrace (__NR_SYSCALL_BASE+ 26)
-#define __NR_alarm (__NR_SYSCALL_BASE+ 27)
- /* 28 was sys_fstat */
-#define __NR_pause (__NR_SYSCALL_BASE+ 29)
-#define __NR_utime (__NR_SYSCALL_BASE+ 30)
- /* 31 was sys_stty */
- /* 32 was sys_gtty */
-#define __NR_access (__NR_SYSCALL_BASE+ 33)
-#define __NR_nice (__NR_SYSCALL_BASE+ 34)
- /* 35 was sys_ftime */
-#define __NR_sync (__NR_SYSCALL_BASE+ 36)
-#define __NR_kill (__NR_SYSCALL_BASE+ 37)
-#define __NR_rename (__NR_SYSCALL_BASE+ 38)
-#define __NR_mkdir (__NR_SYSCALL_BASE+ 39)
-#define __NR_rmdir (__NR_SYSCALL_BASE+ 40)
-#define __NR_dup (__NR_SYSCALL_BASE+ 41)
-#define __NR_pipe (__NR_SYSCALL_BASE+ 42)
-#define __NR_times (__NR_SYSCALL_BASE+ 43)
- /* 44 was sys_prof */
-#define __NR_brk (__NR_SYSCALL_BASE+ 45)
-#define __NR_setgid (__NR_SYSCALL_BASE+ 46)
-#define __NR_getgid (__NR_SYSCALL_BASE+ 47)
- /* 48 was sys_signal */
-#define __NR_geteuid (__NR_SYSCALL_BASE+ 49)
-#define __NR_getegid (__NR_SYSCALL_BASE+ 50)
-#define __NR_acct (__NR_SYSCALL_BASE+ 51)
-#define __NR_umount2 (__NR_SYSCALL_BASE+ 52)
- /* 53 was sys_lock */
-#define __NR_ioctl (__NR_SYSCALL_BASE+ 54)
-#define __NR_fcntl (__NR_SYSCALL_BASE+ 55)
- /* 56 was sys_mpx */
-#define __NR_setpgid (__NR_SYSCALL_BASE+ 57)
- /* 58 was sys_ulimit */
- /* 59 was sys_olduname */
-#define __NR_umask (__NR_SYSCALL_BASE+ 60)
-#define __NR_chroot (__NR_SYSCALL_BASE+ 61)
-#define __NR_ustat (__NR_SYSCALL_BASE+ 62)
-#define __NR_dup2 (__NR_SYSCALL_BASE+ 63)
-#define __NR_getppid (__NR_SYSCALL_BASE+ 64)
-#define __NR_getpgrp (__NR_SYSCALL_BASE+ 65)
-#define __NR_setsid (__NR_SYSCALL_BASE+ 66)
-#define __NR_sigaction (__NR_SYSCALL_BASE+ 67)
- /* 68 was sys_sgetmask */
- /* 69 was sys_ssetmask */
-#define __NR_setreuid (__NR_SYSCALL_BASE+ 70)
-#define __NR_setregid (__NR_SYSCALL_BASE+ 71)
-#define __NR_sigsuspend (__NR_SYSCALL_BASE+ 72)
-#define __NR_sigpending (__NR_SYSCALL_BASE+ 73)
-#define __NR_sethostname (__NR_SYSCALL_BASE+ 74)
-#define __NR_setrlimit (__NR_SYSCALL_BASE+ 75)
-#define __NR_getrlimit (__NR_SYSCALL_BASE+ 76) /* Back compat 2GB limited rlimit */
-#define __NR_getrusage (__NR_SYSCALL_BASE+ 77)
-#define __NR_gettimeofday (__NR_SYSCALL_BASE+ 78)
-#define __NR_settimeofday (__NR_SYSCALL_BASE+ 79)
-#define __NR_getgroups (__NR_SYSCALL_BASE+ 80)
-#define __NR_setgroups (__NR_SYSCALL_BASE+ 81)
-#define __NR_select (__NR_SYSCALL_BASE+ 82)
-#define __NR_symlink (__NR_SYSCALL_BASE+ 83)
- /* 84 was sys_lstat */
-#define __NR_readlink (__NR_SYSCALL_BASE+ 85)
-#define __NR_uselib (__NR_SYSCALL_BASE+ 86)
-#define __NR_swapon (__NR_SYSCALL_BASE+ 87)
-#define __NR_reboot (__NR_SYSCALL_BASE+ 88)
-#define __NR_readdir (__NR_SYSCALL_BASE+ 89)
-#define __NR_mmap (__NR_SYSCALL_BASE+ 90)
-#define __NR_munmap (__NR_SYSCALL_BASE+ 91)
-#define __NR_truncate (__NR_SYSCALL_BASE+ 92)
-#define __NR_ftruncate (__NR_SYSCALL_BASE+ 93)
-#define __NR_fchmod (__NR_SYSCALL_BASE+ 94)
-#define __NR_fchown (__NR_SYSCALL_BASE+ 95)
-#define __NR_getpriority (__NR_SYSCALL_BASE+ 96)
-#define __NR_setpriority (__NR_SYSCALL_BASE+ 97)
- /* 98 was sys_profil */
-#define __NR_statfs (__NR_SYSCALL_BASE+ 99)
-#define __NR_fstatfs (__NR_SYSCALL_BASE+100)
- /* 101 was sys_ioperm */
-#define __NR_socketcall (__NR_SYSCALL_BASE+102)
-#define __NR_syslog (__NR_SYSCALL_BASE+103)
-#define __NR_setitimer (__NR_SYSCALL_BASE+104)
-#define __NR_getitimer (__NR_SYSCALL_BASE+105)
-#define __NR_stat (__NR_SYSCALL_BASE+106)
-#define __NR_lstat (__NR_SYSCALL_BASE+107)
-#define __NR_fstat (__NR_SYSCALL_BASE+108)
- /* 109 was sys_uname */
- /* 110 was sys_iopl */
-#define __NR_vhangup (__NR_SYSCALL_BASE+111)
- /* 112 was sys_idle */
-#define __NR_syscall (__NR_SYSCALL_BASE+113) /* syscall to call a syscall! */
-#define __NR_wait4 (__NR_SYSCALL_BASE+114)
-#define __NR_swapoff (__NR_SYSCALL_BASE+115)
-#define __NR_sysinfo (__NR_SYSCALL_BASE+116)
-#define __NR_ipc (__NR_SYSCALL_BASE+117)
-#define __NR_fsync (__NR_SYSCALL_BASE+118)
-#define __NR_sigreturn (__NR_SYSCALL_BASE+119)
-#define __NR_clone (__NR_SYSCALL_BASE+120)
-#define __NR_setdomainname (__NR_SYSCALL_BASE+121)
-#define __NR_uname (__NR_SYSCALL_BASE+122)
- /* 123 was sys_modify_ldt */
-#define __NR_adjtimex (__NR_SYSCALL_BASE+124)
-#define __NR_mprotect (__NR_SYSCALL_BASE+125)
-#define __NR_sigprocmask (__NR_SYSCALL_BASE+126)
- /* 127 was sys_create_module */
-#define __NR_init_module (__NR_SYSCALL_BASE+128)
-#define __NR_delete_module (__NR_SYSCALL_BASE+129)
- /* 130 was sys_get_kernel_syms */
-#define __NR_quotactl (__NR_SYSCALL_BASE+131)
-#define __NR_getpgid (__NR_SYSCALL_BASE+132)
-#define __NR_fchdir (__NR_SYSCALL_BASE+133)
-#define __NR_bdflush (__NR_SYSCALL_BASE+134)
-#define __NR_sysfs (__NR_SYSCALL_BASE+135)
-#define __NR_personality (__NR_SYSCALL_BASE+136)
- /* 137 was sys_afs_syscall */
-#define __NR_setfsuid (__NR_SYSCALL_BASE+138)
-#define __NR_setfsgid (__NR_SYSCALL_BASE+139)
-#define __NR__llseek (__NR_SYSCALL_BASE+140)
-#define __NR_getdents (__NR_SYSCALL_BASE+141)
-#define __NR__newselect (__NR_SYSCALL_BASE+142)
-#define __NR_flock (__NR_SYSCALL_BASE+143)
-#define __NR_msync (__NR_SYSCALL_BASE+144)
-#define __NR_readv (__NR_SYSCALL_BASE+145)
-#define __NR_writev (__NR_SYSCALL_BASE+146)
-#define __NR_getsid (__NR_SYSCALL_BASE+147)
-#define __NR_fdatasync (__NR_SYSCALL_BASE+148)
-#define __NR__sysctl (__NR_SYSCALL_BASE+149)
-#define __NR_mlock (__NR_SYSCALL_BASE+150)
-#define __NR_munlock (__NR_SYSCALL_BASE+151)
-#define __NR_mlockall (__NR_SYSCALL_BASE+152)
-#define __NR_munlockall (__NR_SYSCALL_BASE+153)
-#define __NR_sched_setparam (__NR_SYSCALL_BASE+154)
-#define __NR_sched_getparam (__NR_SYSCALL_BASE+155)
-#define __NR_sched_setscheduler (__NR_SYSCALL_BASE+156)
-#define __NR_sched_getscheduler (__NR_SYSCALL_BASE+157)
-#define __NR_sched_yield (__NR_SYSCALL_BASE+158)
-#define __NR_sched_get_priority_max (__NR_SYSCALL_BASE+159)
-#define __NR_sched_get_priority_min (__NR_SYSCALL_BASE+160)
-#define __NR_sched_rr_get_interval (__NR_SYSCALL_BASE+161)
-#define __NR_nanosleep (__NR_SYSCALL_BASE+162)
-#define __NR_mremap (__NR_SYSCALL_BASE+163)
-#define __NR_setresuid (__NR_SYSCALL_BASE+164)
-#define __NR_getresuid (__NR_SYSCALL_BASE+165)
- /* 166 was sys_vm86 */
- /* 167 was sys_query_module */
-#define __NR_poll (__NR_SYSCALL_BASE+168)
-#define __NR_nfsservctl (__NR_SYSCALL_BASE+169)
-#define __NR_setresgid (__NR_SYSCALL_BASE+170)
-#define __NR_getresgid (__NR_SYSCALL_BASE+171)
-#define __NR_prctl (__NR_SYSCALL_BASE+172)
-#define __NR_rt_sigreturn (__NR_SYSCALL_BASE+173)
-#define __NR_rt_sigaction (__NR_SYSCALL_BASE+174)
-#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE+175)
-#define __NR_rt_sigpending (__NR_SYSCALL_BASE+176)
-#define __NR_rt_sigtimedwait (__NR_SYSCALL_BASE+177)
-#define __NR_rt_sigqueueinfo (__NR_SYSCALL_BASE+178)
-#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE+179)
-#define __NR_pread64 (__NR_SYSCALL_BASE+180)
-#define __NR_pwrite64 (__NR_SYSCALL_BASE+181)
-#define __NR_chown (__NR_SYSCALL_BASE+182)
-#define __NR_getcwd (__NR_SYSCALL_BASE+183)
-#define __NR_capget (__NR_SYSCALL_BASE+184)
-#define __NR_capset (__NR_SYSCALL_BASE+185)
-#define __NR_sigaltstack (__NR_SYSCALL_BASE+186)
-#define __NR_sendfile (__NR_SYSCALL_BASE+187)
- /* 188 reserved */
- /* 189 reserved */
-#define __NR_vfork (__NR_SYSCALL_BASE+190)
-#define __NR_ugetrlimit (__NR_SYSCALL_BASE+191) /* SuS compliant getrlimit */
-#define __NR_mmap2 (__NR_SYSCALL_BASE+192)
-#define __NR_truncate64 (__NR_SYSCALL_BASE+193)
-#define __NR_ftruncate64 (__NR_SYSCALL_BASE+194)
-#define __NR_stat64 (__NR_SYSCALL_BASE+195)
-#define __NR_lstat64 (__NR_SYSCALL_BASE+196)
-#define __NR_fstat64 (__NR_SYSCALL_BASE+197)
-#define __NR_lchown32 (__NR_SYSCALL_BASE+198)
-#define __NR_getuid32 (__NR_SYSCALL_BASE+199)
-#define __NR_getgid32 (__NR_SYSCALL_BASE+200)
-#define __NR_geteuid32 (__NR_SYSCALL_BASE+201)
-#define __NR_getegid32 (__NR_SYSCALL_BASE+202)
-#define __NR_setreuid32 (__NR_SYSCALL_BASE+203)
-#define __NR_setregid32 (__NR_SYSCALL_BASE+204)
-#define __NR_getgroups32 (__NR_SYSCALL_BASE+205)
-#define __NR_setgroups32 (__NR_SYSCALL_BASE+206)
-#define __NR_fchown32 (__NR_SYSCALL_BASE+207)
-#define __NR_setresuid32 (__NR_SYSCALL_BASE+208)
-#define __NR_getresuid32 (__NR_SYSCALL_BASE+209)
-#define __NR_setresgid32 (__NR_SYSCALL_BASE+210)
-#define __NR_getresgid32 (__NR_SYSCALL_BASE+211)
-#define __NR_chown32 (__NR_SYSCALL_BASE+212)
-#define __NR_setuid32 (__NR_SYSCALL_BASE+213)
-#define __NR_setgid32 (__NR_SYSCALL_BASE+214)
-#define __NR_setfsuid32 (__NR_SYSCALL_BASE+215)
-#define __NR_setfsgid32 (__NR_SYSCALL_BASE+216)
-#define __NR_getdents64 (__NR_SYSCALL_BASE+217)
-#define __NR_pivot_root (__NR_SYSCALL_BASE+218)
-#define __NR_mincore (__NR_SYSCALL_BASE+219)
-#define __NR_madvise (__NR_SYSCALL_BASE+220)
-#define __NR_fcntl64 (__NR_SYSCALL_BASE+221)
- /* 222 for tux */
- /* 223 is unused */
-#define __NR_gettid (__NR_SYSCALL_BASE+224)
-#define __NR_readahead (__NR_SYSCALL_BASE+225)
-#define __NR_setxattr (__NR_SYSCALL_BASE+226)
-#define __NR_lsetxattr (__NR_SYSCALL_BASE+227)
-#define __NR_fsetxattr (__NR_SYSCALL_BASE+228)
-#define __NR_getxattr (__NR_SYSCALL_BASE+229)
-#define __NR_lgetxattr (__NR_SYSCALL_BASE+230)
-#define __NR_fgetxattr (__NR_SYSCALL_BASE+231)
-#define __NR_listxattr (__NR_SYSCALL_BASE+232)
-#define __NR_llistxattr (__NR_SYSCALL_BASE+233)
-#define __NR_flistxattr (__NR_SYSCALL_BASE+234)
-#define __NR_removexattr (__NR_SYSCALL_BASE+235)
-#define __NR_lremovexattr (__NR_SYSCALL_BASE+236)
-#define __NR_fremovexattr (__NR_SYSCALL_BASE+237)
-#define __NR_tkill (__NR_SYSCALL_BASE+238)
-#define __NR_sendfile64 (__NR_SYSCALL_BASE+239)
-#define __NR_futex (__NR_SYSCALL_BASE+240)
-#define __NR_sched_setaffinity (__NR_SYSCALL_BASE+241)
-#define __NR_sched_getaffinity (__NR_SYSCALL_BASE+242)
-#define __NR_io_setup (__NR_SYSCALL_BASE+243)
-#define __NR_io_destroy (__NR_SYSCALL_BASE+244)
-#define __NR_io_getevents (__NR_SYSCALL_BASE+245)
-#define __NR_io_submit (__NR_SYSCALL_BASE+246)
-#define __NR_io_cancel (__NR_SYSCALL_BASE+247)
-#define __NR_exit_group (__NR_SYSCALL_BASE+248)
-#define __NR_lookup_dcookie (__NR_SYSCALL_BASE+249)
-#define __NR_epoll_create (__NR_SYSCALL_BASE+250)
-#define __NR_epoll_ctl (__NR_SYSCALL_BASE+251)
-#define __NR_epoll_wait (__NR_SYSCALL_BASE+252)
-#define __NR_remap_file_pages (__NR_SYSCALL_BASE+253)
- /* 254 for set_thread_area */
- /* 255 for get_thread_area */
-#define __NR_set_tid_address (__NR_SYSCALL_BASE+256)
-#define __NR_timer_create (__NR_SYSCALL_BASE+257)
-#define __NR_timer_settime (__NR_SYSCALL_BASE+258)
-#define __NR_timer_gettime (__NR_SYSCALL_BASE+259)
-#define __NR_timer_getoverrun (__NR_SYSCALL_BASE+260)
-#define __NR_timer_delete (__NR_SYSCALL_BASE+261)
-#define __NR_clock_settime (__NR_SYSCALL_BASE+262)
-#define __NR_clock_gettime (__NR_SYSCALL_BASE+263)
-#define __NR_clock_getres (__NR_SYSCALL_BASE+264)
-#define __NR_clock_nanosleep (__NR_SYSCALL_BASE+265)
-#define __NR_statfs64 (__NR_SYSCALL_BASE+266)
-#define __NR_fstatfs64 (__NR_SYSCALL_BASE+267)
-#define __NR_tgkill (__NR_SYSCALL_BASE+268)
-#define __NR_utimes (__NR_SYSCALL_BASE+269)
-#define __NR_arm_fadvise64_64 (__NR_SYSCALL_BASE+270)
-#define __NR_pciconfig_iobase (__NR_SYSCALL_BASE+271)
-#define __NR_pciconfig_read (__NR_SYSCALL_BASE+272)
-#define __NR_pciconfig_write (__NR_SYSCALL_BASE+273)
-#define __NR_mq_open (__NR_SYSCALL_BASE+274)
-#define __NR_mq_unlink (__NR_SYSCALL_BASE+275)
-#define __NR_mq_timedsend (__NR_SYSCALL_BASE+276)
-#define __NR_mq_timedreceive (__NR_SYSCALL_BASE+277)
-#define __NR_mq_notify (__NR_SYSCALL_BASE+278)
-#define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279)
-#define __NR_waitid (__NR_SYSCALL_BASE+280)
-#define __NR_socket (__NR_SYSCALL_BASE+281)
-#define __NR_bind (__NR_SYSCALL_BASE+282)
-#define __NR_connect (__NR_SYSCALL_BASE+283)
-#define __NR_listen (__NR_SYSCALL_BASE+284)
-#define __NR_accept (__NR_SYSCALL_BASE+285)
-#define __NR_getsockname (__NR_SYSCALL_BASE+286)
-#define __NR_getpeername (__NR_SYSCALL_BASE+287)
-#define __NR_socketpair (__NR_SYSCALL_BASE+288)
-#define __NR_send (__NR_SYSCALL_BASE+289)
-#define __NR_sendto (__NR_SYSCALL_BASE+290)
-#define __NR_recv (__NR_SYSCALL_BASE+291)
-#define __NR_recvfrom (__NR_SYSCALL_BASE+292)
-#define __NR_shutdown (__NR_SYSCALL_BASE+293)
-#define __NR_setsockopt (__NR_SYSCALL_BASE+294)
-#define __NR_getsockopt (__NR_SYSCALL_BASE+295)
-#define __NR_sendmsg (__NR_SYSCALL_BASE+296)
-#define __NR_recvmsg (__NR_SYSCALL_BASE+297)
-#define __NR_semop (__NR_SYSCALL_BASE+298)
-#define __NR_semget (__NR_SYSCALL_BASE+299)
-#define __NR_semctl (__NR_SYSCALL_BASE+300)
-#define __NR_msgsnd (__NR_SYSCALL_BASE+301)
-#define __NR_msgrcv (__NR_SYSCALL_BASE+302)
-#define __NR_msgget (__NR_SYSCALL_BASE+303)
-#define __NR_msgctl (__NR_SYSCALL_BASE+304)
-#define __NR_shmat (__NR_SYSCALL_BASE+305)
-#define __NR_shmdt (__NR_SYSCALL_BASE+306)
-#define __NR_shmget (__NR_SYSCALL_BASE+307)
-#define __NR_shmctl (__NR_SYSCALL_BASE+308)
-#define __NR_add_key (__NR_SYSCALL_BASE+309)
-#define __NR_request_key (__NR_SYSCALL_BASE+310)
-#define __NR_keyctl (__NR_SYSCALL_BASE+311)
-#define __NR_semtimedop (__NR_SYSCALL_BASE+312)
-#define __NR_vserver (__NR_SYSCALL_BASE+313)
-#define __NR_ioprio_set (__NR_SYSCALL_BASE+314)
-#define __NR_ioprio_get (__NR_SYSCALL_BASE+315)
-#define __NR_inotify_init (__NR_SYSCALL_BASE+316)
-#define __NR_inotify_add_watch (__NR_SYSCALL_BASE+317)
-#define __NR_inotify_rm_watch (__NR_SYSCALL_BASE+318)
-#define __NR_mbind (__NR_SYSCALL_BASE+319)
-#define __NR_get_mempolicy (__NR_SYSCALL_BASE+320)
-#define __NR_set_mempolicy (__NR_SYSCALL_BASE+321)
-#define __NR_openat (__NR_SYSCALL_BASE+322)
-#define __NR_mkdirat (__NR_SYSCALL_BASE+323)
-#define __NR_mknodat (__NR_SYSCALL_BASE+324)
-#define __NR_fchownat (__NR_SYSCALL_BASE+325)
-#define __NR_futimesat (__NR_SYSCALL_BASE+326)
-#define __NR_fstatat64 (__NR_SYSCALL_BASE+327)
-#define __NR_unlinkat (__NR_SYSCALL_BASE+328)
-#define __NR_renameat (__NR_SYSCALL_BASE+329)
-#define __NR_linkat (__NR_SYSCALL_BASE+330)
-#define __NR_symlinkat (__NR_SYSCALL_BASE+331)
-#define __NR_readlinkat (__NR_SYSCALL_BASE+332)
-#define __NR_fchmodat (__NR_SYSCALL_BASE+333)
-#define __NR_faccessat (__NR_SYSCALL_BASE+334)
-#define __NR_pselect6 (__NR_SYSCALL_BASE+335)
-#define __NR_ppoll (__NR_SYSCALL_BASE+336)
-#define __NR_unshare (__NR_SYSCALL_BASE+337)
-#define __NR_set_robust_list (__NR_SYSCALL_BASE+338)
-#define __NR_get_robust_list (__NR_SYSCALL_BASE+339)
-#define __NR_splice (__NR_SYSCALL_BASE+340)
-#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE+341)
+#include <asm/unistd-common.h>
#define __NR_sync_file_range2 __NR_arm_sync_file_range
-#define __NR_tee (__NR_SYSCALL_BASE+342)
-#define __NR_vmsplice (__NR_SYSCALL_BASE+343)
-#define __NR_move_pages (__NR_SYSCALL_BASE+344)
-#define __NR_getcpu (__NR_SYSCALL_BASE+345)
-#define __NR_epoll_pwait (__NR_SYSCALL_BASE+346)
-#define __NR_kexec_load (__NR_SYSCALL_BASE+347)
-#define __NR_utimensat (__NR_SYSCALL_BASE+348)
-#define __NR_signalfd (__NR_SYSCALL_BASE+349)
-#define __NR_timerfd_create (__NR_SYSCALL_BASE+350)
-#define __NR_eventfd (__NR_SYSCALL_BASE+351)
-#define __NR_fallocate (__NR_SYSCALL_BASE+352)
-#define __NR_timerfd_settime (__NR_SYSCALL_BASE+353)
-#define __NR_timerfd_gettime (__NR_SYSCALL_BASE+354)
-#define __NR_signalfd4 (__NR_SYSCALL_BASE+355)
-#define __NR_eventfd2 (__NR_SYSCALL_BASE+356)
-#define __NR_epoll_create1 (__NR_SYSCALL_BASE+357)
-#define __NR_dup3 (__NR_SYSCALL_BASE+358)
-#define __NR_pipe2 (__NR_SYSCALL_BASE+359)
-#define __NR_inotify_init1 (__NR_SYSCALL_BASE+360)
-#define __NR_preadv (__NR_SYSCALL_BASE+361)
-#define __NR_pwritev (__NR_SYSCALL_BASE+362)
-#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363)
-#define __NR_perf_event_open (__NR_SYSCALL_BASE+364)
-#define __NR_recvmmsg (__NR_SYSCALL_BASE+365)
-#define __NR_accept4 (__NR_SYSCALL_BASE+366)
-#define __NR_fanotify_init (__NR_SYSCALL_BASE+367)
-#define __NR_fanotify_mark (__NR_SYSCALL_BASE+368)
-#define __NR_prlimit64 (__NR_SYSCALL_BASE+369)
-#define __NR_name_to_handle_at (__NR_SYSCALL_BASE+370)
-#define __NR_open_by_handle_at (__NR_SYSCALL_BASE+371)
-#define __NR_clock_adjtime (__NR_SYSCALL_BASE+372)
-#define __NR_syncfs (__NR_SYSCALL_BASE+373)
-#define __NR_sendmmsg (__NR_SYSCALL_BASE+374)
-#define __NR_setns (__NR_SYSCALL_BASE+375)
-#define __NR_process_vm_readv (__NR_SYSCALL_BASE+376)
-#define __NR_process_vm_writev (__NR_SYSCALL_BASE+377)
-#define __NR_kcmp (__NR_SYSCALL_BASE+378)
-#define __NR_finit_module (__NR_SYSCALL_BASE+379)
-#define __NR_sched_setattr (__NR_SYSCALL_BASE+380)
-#define __NR_sched_getattr (__NR_SYSCALL_BASE+381)
-#define __NR_renameat2 (__NR_SYSCALL_BASE+382)
-#define __NR_seccomp (__NR_SYSCALL_BASE+383)
-#define __NR_getrandom (__NR_SYSCALL_BASE+384)
-#define __NR_memfd_create (__NR_SYSCALL_BASE+385)
-#define __NR_bpf (__NR_SYSCALL_BASE+386)
-#define __NR_execveat (__NR_SYSCALL_BASE+387)
-#define __NR_userfaultfd (__NR_SYSCALL_BASE+388)
-#define __NR_membarrier (__NR_SYSCALL_BASE+389)
-#define __NR_mlock2 (__NR_SYSCALL_BASE+390)
-#define __NR_copy_file_range (__NR_SYSCALL_BASE+391)
-#define __NR_preadv2 (__NR_SYSCALL_BASE+392)
-#define __NR_pwritev2 (__NR_SYSCALL_BASE+393)
/*
* The following SWIs are ARM private.
@@ -431,22 +36,4 @@
#define __ARM_NR_usr32 (__ARM_NR_BASE+4)
#define __ARM_NR_set_tls (__ARM_NR_BASE+5)
-/*
- * The following syscalls are obsolete and no longer available for EABI.
- */
-#if defined(__ARM_EABI__)
-#undef __NR_time
-#undef __NR_umount
-#undef __NR_stime
-#undef __NR_alarm
-#undef __NR_utime
-#undef __NR_getrlimit
-#undef __NR_select
-#undef __NR_readdir
-#undef __NR_mmap
-#undef __NR_socketcall
-#undef __NR_syscall
-#undef __NR_ipc
-#endif
-
#endif /* __ASM_ARM_UNISTD_H */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index fd5a276..651ec30 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+ (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
/* Device Control API on vcpu fd */
diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index c93cf35..4edbe4b 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -413,6 +413,26 @@ struct kvm_get_htab_header {
__u16 n_invalid;
};
+/* For KVM_PPC_CONFIGURE_V3_MMU */
+struct kvm_ppc_mmuv3_cfg {
+ __u64 flags;
+ __u64 process_table; /* second doubleword of partition table entry */
+};
+
+/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
+#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */
+#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */
+
+/* For KVM_PPC_GET_RMMU_INFO */
+struct kvm_ppc_rmmu_info {
+ struct kvm_ppc_radix_geom {
+ __u8 page_shift;
+ __u8 level_bits[4];
+ __u8 pad[3];
+ } geometries[8];
+ __u32 ap_encodings[8];
+};
+
/* Per-vcpu XICS interrupt controller state */
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
@@ -573,6 +593,10 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
+/* POWER9 registers */
+#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
+#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
*/
@@ -596,6 +620,7 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
#define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
#define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
@@ -608,5 +633,7 @@ struct kvm_get_htab_header {
#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
#define KVM_XICS_MASKED (1ULL << 41)
#define KVM_XICS_PENDING (1ULL << 42)
+#define KVM_XICS_PRESENTED (1ULL << 43)
+#define KVM_XICS_QUEUED (1ULL << 44)
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/linux-headers/asm-powerpc/unistd.h b/linux-headers/asm-powerpc/unistd.h
index 1e66eba..598043c 100644
--- a/linux-headers/asm-powerpc/unistd.h
+++ b/linux-headers/asm-powerpc/unistd.h
@@ -392,5 +392,6 @@
#define __NR_copy_file_range 379
#define __NR_preadv2 380
#define __NR_pwritev2 381
+#define __NR_kexec_file_load 382
#endif /* _ASM_POWERPC_UNISTD_H_ */
diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h
index e41c5c1..3a53979 100644
--- a/linux-headers/asm-x86/kvm_para.h
+++ b/linux-headers/asm-x86/kvm_para.h
@@ -45,7 +45,18 @@ struct kvm_steal_time {
__u64 steal;
__u32 version;
__u32 flags;
- __u32 pad[12];
+ __u8 preempted;
+ __u8 u8_pad[3];
+ __u32 pad[11];
+};
+
+#define KVM_CLOCK_PAIRING_WALLCLOCK 0
+struct kvm_clock_pairing {
+ __s64 sec;
+ __s64 nsec;
+ __u64 tsc;
+ __u32 flags;
+ __u32 pad[9];
};
#define KVM_STEAL_ALIGNMENT_BITS 5
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index bb0ed71..4e082a8 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -218,7 +218,8 @@ struct kvm_hyperv_exit {
struct kvm_run {
/* in */
__u8 request_interrupt_window;
- __u8 padding1[7];
+ __u8 immediate_exit;
+ __u8 padding1[6];
/* out */
__u32 exit_reason;
@@ -651,6 +652,9 @@ struct kvm_enable_cap {
};
/* for KVM_PPC_GET_PVINFO */
+
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
+
struct kvm_ppc_pvinfo {
/* out */
__u32 flags;
@@ -682,7 +686,12 @@ struct kvm_ppc_smmu_info {
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
};
-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+ __u64 flags;
+ __u32 shift;
+ __u32 pad;
+};
#define KVMIO 0xAE
@@ -870,6 +879,10 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_S390_USER_INSTR0 130
#define KVM_CAP_MSI_DEVID 131
#define KVM_CAP_PPC_HTM 132
+#define KVM_CAP_SPAPR_RESIZE_HPT 133
+#define KVM_CAP_PPC_MMU_RADIX 134
+#define KVM_CAP_PPC_MMU_HASH_V3 135
+#define KVM_CAP_IMMEDIATE_EXIT 136
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1186,6 +1199,13 @@ struct kvm_s390_ucas_mapping {
#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr)
/* Available with KVM_CAP_PPC_RTAS */
#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args)
+/* Available with KVM_CAP_SPAPR_RESIZE_HPT */
+#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
+#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
+/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
+#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
+/* Available with KVM_CAP_PPC_RADIX_MMU */
+#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
index e61661e..15b24ff 100644
--- a/linux-headers/linux/kvm_para.h
+++ b/linux-headers/linux/kvm_para.h
@@ -14,6 +14,7 @@
#define KVM_EFAULT EFAULT
#define KVM_E2BIG E2BIG
#define KVM_EPERM EPERM
+#define KVM_EOPNOTSUPP 95
#define KVM_HC_VAPIC_POLL_IRQ 1
#define KVM_HC_MMU_OP 2
@@ -23,6 +24,7 @@
#define KVM_HC_MIPS_GET_CLOCK_FREQ 6
#define KVM_HC_MIPS_EXIT_VM 7
#define KVM_HC_MIPS_CONSOLE_OUTPUT 8
+#define KVM_HC_CLOCK_PAIRING 9
/*
* hypercalls use architecture specific
diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
index 19e8453..2ed5dc3 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -11,13 +11,18 @@
#include <linux/types.h>
-#define UFFD_API ((__u64)0xAA)
/*
- * After implementing the respective features it will become:
- * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
- * UFFD_FEATURE_EVENT_FORK)
+ * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
+ * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In
+ * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ
+ * means the userland is reading).
*/
-#define UFFD_API_FEATURES (0)
+#define UFFD_API ((__u64)0xAA)
+#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \
+ UFFD_FEATURE_EVENT_REMAP | \
+ UFFD_FEATURE_EVENT_MADVDONTNEED | \
+ UFFD_FEATURE_MISSING_HUGETLBFS | \
+ UFFD_FEATURE_MISSING_SHMEM)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -26,6 +31,9 @@
((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY | \
(__u64)1 << _UFFDIO_ZEROPAGE)
+#define UFFD_API_RANGE_IOCTLS_BASIC \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY)
/*
* Valid ioctl command number range with this API is from 0x00 to
@@ -72,6 +80,21 @@ struct uffd_msg {
} pagefault;
struct {
+ __u32 ufd;
+ } fork;
+
+ struct {
+ __u64 from;
+ __u64 to;
+ __u64 len;
+ } remap;
+
+ struct {
+ __u64 start;
+ __u64 end;
+ } madv_dn;
+
+ struct {
/* unused reserved fields */
__u64 reserved1;
__u64 reserved2;
@@ -84,9 +107,9 @@ struct uffd_msg {
* Start at 0x12 and not at 0 to be more strict against bugs.
*/
#define UFFD_EVENT_PAGEFAULT 0x12
-#if 0 /* not available yet */
#define UFFD_EVENT_FORK 0x13
-#endif
+#define UFFD_EVENT_REMAP 0x14
+#define UFFD_EVENT_MADVDONTNEED 0x15
/* flags for UFFD_EVENT_PAGEFAULT */
#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
@@ -104,11 +127,37 @@ struct uffdio_api {
* Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
* are to be considered implicitly always enabled in all kernels as
* long as the uffdio_api.api requested matches UFFD_API.
+ *
+ * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER
+ * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on
+ * hugetlbfs virtual memory ranges. Adding or not adding
+ * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has
+ * no real functional effect after UFFDIO_API returns, but
+ * it's only useful for an initial feature set probe at
+ * UFFDIO_API time. There are two ways to use it:
+ *
+ * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the
+ * uffdio_api.features before calling UFFDIO_API, an error
+ * will be returned by UFFDIO_API on a kernel without
+ * hugetlbfs missing support
+ *
+ * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in
+ * uffdio_api.features and instead it will be set by the
+ * kernel in the uffdio_api.features if the kernel supports
+ * it, so userland can later check if the feature flag is
+ * present in uffdio_api.features after UFFDIO_API
+ * succeeded.
+ *
+ * UFFD_FEATURE_MISSING_SHMEM works the same as
+ * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
+ * (i.e. tmpfs and other shmem based APIs).
*/
-#if 0 /* not available yet */
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
-#endif
+#define UFFD_FEATURE_EVENT_REMAP (1<<2)
+#define UFFD_FEATURE_EVENT_MADVDONTNEED (1<<3)
+#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
+#define UFFD_FEATURE_MISSING_SHMEM (1<<5)
__u64 features;
__u64 ioctls;
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 759b850..531cb2e 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -203,6 +203,16 @@ struct vfio_device_info {
};
#define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7)
+/*
+ * Vendor driver using Mediated device framework should provide device_api
+ * attribute in supported type attribute groups. Device API string should be one
+ * of the following corresponding to device flags in vfio_device_info structure.
+ */
+
+#define VFIO_DEVICE_API_PCI_STRING "vfio-pci"
+#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform"
+#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba"
+
/**
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
* struct vfio_region_info)
diff --git a/migration/block.c b/migration/block.c
index ebc10e6..1941bc2 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -379,7 +379,7 @@ static void unset_dirty_tracking(void)
}
}
-static void init_blk_migration(QEMUFile *f)
+static int init_blk_migration(QEMUFile *f)
{
BlockDriverState *bs;
BlkMigDevState *bmds;
@@ -390,6 +390,8 @@ static void init_blk_migration(QEMUFile *f)
BlkMigDevState *bmds;
BlockDriverState *bs;
} *bmds_bs;
+ Error *local_err = NULL;
+ int ret;
block_mig_state.submitted = 0;
block_mig_state.read_done = 0;
@@ -411,11 +413,12 @@ static void init_blk_migration(QEMUFile *f)
sectors = bdrv_nb_sectors(bs);
if (sectors <= 0) {
+ ret = sectors;
goto out;
}
bmds = g_new0(BlkMigDevState, 1);
- bmds->blk = blk_new();
+ bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
bmds->blk_name = g_strdup(bdrv_get_device_name(bs));
bmds->bulk_completed = 0;
bmds->total_sectors = sectors;
@@ -445,7 +448,11 @@ static void init_blk_migration(QEMUFile *f)
BlockDriverState *bs = bmds_bs[i].bs;
if (bmds) {
- blk_insert_bs(bmds->blk, bs);
+ ret = blk_insert_bs(bmds->blk, bs, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ goto out;
+ }
alloc_aio_bitmap(bmds);
error_setg(&bmds->blocker, "block device is in use by migration");
@@ -453,8 +460,10 @@ static void init_blk_migration(QEMUFile *f)
}
}
+ ret = 0;
out:
g_free(bmds_bs);
+ return ret;
}
/* Called with no lock taken. */
@@ -705,7 +714,11 @@ static int block_save_setup(QEMUFile *f, void *opaque)
block_mig_state.submitted, block_mig_state.transferred);
qemu_mutex_lock_iothread();
- init_blk_migration(f);
+ ret = init_blk_migration(f);
+ if (ret < 0) {
+ qemu_mutex_unlock_iothread();
+ return ret;
+ }
/* start track dirty blocks */
ret = set_dirty_tracking();
diff --git a/migration/colo.c b/migration/colo.c
index 712308e..c19eb3f 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -19,6 +19,8 @@
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "migration/failover.h"
+#include "replication.h"
+#include "qmp-commands.h"
static bool vmstate_loading;
@@ -147,6 +149,53 @@ void colo_do_failover(MigrationState *s)
}
}
+void qmp_xen_set_replication(bool enable, bool primary,
+ bool has_failover, bool failover,
+ Error **errp)
+{
+ ReplicationMode mode = primary ?
+ REPLICATION_MODE_PRIMARY :
+ REPLICATION_MODE_SECONDARY;
+
+ if (has_failover && enable) {
+ error_setg(errp, "Parameter 'failover' is only for"
+ " stopping replication");
+ return;
+ }
+
+ if (enable) {
+ replication_start_all(mode, errp);
+ } else {
+ if (!has_failover) {
+ failover = NULL;
+ }
+ replication_stop_all(failover, failover ? NULL : errp);
+ }
+}
+
+ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
+{
+ Error *err = NULL;
+ ReplicationStatus *s = g_new0(ReplicationStatus, 1);
+
+ replication_get_error_all(&err);
+ if (err) {
+ s->error = true;
+ s->has_desc = true;
+ s->desc = g_strdup(error_get_pretty(err));
+ } else {
+ s->error = false;
+ }
+
+ error_free(err);
+ return s;
+}
+
+void qmp_xen_colo_do_checkpoint(Error **errp)
+{
+ replication_do_checkpoint_all(errp);
+}
+
static void colo_send_message(QEMUFile *f, COLOMessage msg,
Error **errp)
{
diff --git a/migration/migration.c b/migration/migration.c
index c6ae69d..3dab684 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -49,6 +49,10 @@
* for sending the last part */
#define DEFAULT_MIGRATE_SET_DOWNTIME 300
+/* Maximum migrate downtime set to 2000 seconds */
+#define MAX_MIGRATE_DOWNTIME_SECONDS 2000
+#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
+
/* Default compression thread count */
#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
/* Default decompression thread count, usually decompression is at
@@ -383,6 +387,7 @@ static void process_incoming_migration_co(void *opaque)
int ret;
mis->from_src_file = f;
+ mis->largest_page_size = qemu_ram_pagesize_largest();
postcopy_state_set(POSTCOPY_INCOMING_NONE);
migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
MIGRATION_STATUS_ACTIVE);
@@ -843,10 +848,11 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
return;
}
if (params->has_downtime_limit &&
- (params->downtime_limit < 0 || params->downtime_limit > 2000000)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "downtime_limit",
- "an integer in the range of 0 to 2000000 milliseconds");
+ (params->downtime_limit < 0 ||
+ params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
+ error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
+ "the range of 0 to %d milliseconds",
+ MAX_MIGRATE_DOWNTIME);
return;
}
if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
@@ -1145,6 +1151,21 @@ void migrate_del_blocker(Error *reason)
migration_blockers = g_slist_remove(migration_blockers, reason);
}
+int check_migratable(Object *obj, Error **err)
+{
+ DeviceClass *dc = DEVICE_GET_CLASS(obj);
+ if (only_migratable && dc->vmsd) {
+ if (dc->vmsd->unmigratable) {
+ error_setg(err, "Device %s is not migratable, but "
+ "--only-migratable was specified",
+ object_get_typename(obj));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
void qmp_migrate_incoming(const char *uri, Error **errp)
{
Error *local_err = NULL;
@@ -1289,6 +1310,13 @@ void qmp_migrate_set_speed(int64_t value, Error **errp)
void qmp_migrate_set_downtime(double value, Error **errp)
{
+ if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
+ error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
+ "the range of 0 to %d seconds",
+ MAX_MIGRATE_DOWNTIME_SECONDS);
+ return;
+ }
+
value *= 1000; /* Convert to milliseconds */
value = MAX(0, MIN(INT64_MAX, value));
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index a40dddb..effbeb6 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -81,25 +81,18 @@ static bool ufd_version_check(int ufd)
return false;
}
- return true;
-}
-
-/*
- * Check for things that postcopy won't support; returns 0 if the block
- * is fine.
- */
-static int check_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
-{
- RAMBlock *rb = qemu_ram_block_by_name(block_name);
-
- if (qemu_ram_pagesize(rb) > getpagesize()) {
- error_report("Postcopy doesn't support large page sizes yet (%s)",
- block_name);
- return -E2BIG;
+ if (getpagesize() != ram_pagesize_summary()) {
+ bool have_hp = false;
+ /* We've got a huge page */
+#ifdef UFFD_FEATURE_MISSING_HUGETLBFS
+ have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
+#endif
+ if (!have_hp) {
+ error_report("Userfault on this host does not support huge pages");
+ return false;
+ }
}
-
- return 0;
+ return true;
}
/*
@@ -122,12 +115,6 @@ bool postcopy_ram_supported_by_host(void)
goto out;
}
- /* Check for anything about the RAMBlocks we don't support */
- if (qemu_ram_foreach_block(check_range, NULL)) {
- /* check_range will have printed its own error */
- goto out;
- }
-
ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
if (ufd == -1) {
error_report("%s: userfaultfd not available: %s", __func__,
@@ -200,27 +187,6 @@ out:
return ret;
}
-/**
- * postcopy_ram_discard_range: Discard a range of memory.
- * We can assume that if we've been called postcopy_ram_hosttest returned true.
- *
- * @mis: Current incoming migration state.
- * @start, @length: range of memory to discard.
- *
- * returns: 0 on success.
- */
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
- size_t length)
-{
- trace_postcopy_ram_discard_range(start, length);
- if (madvise(start, length, MADV_DONTNEED)) {
- error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
- return -1;
- }
-
- return 0;
-}
-
/*
* Setup an area of RAM so that it *can* be used for postcopy later; this
* must be done right at the start prior to pre-copy.
@@ -239,7 +205,7 @@ static int init_range(const char *block_name, void *host_addr,
* - we're going to get the copy from the source anyway.
* (Precopy will just overwrite this data, so doesn't need the discard)
*/
- if (postcopy_ram_discard_range(mis, host_addr, length)) {
+ if (ram_discard_range(mis, block_name, 0, length)) {
return -1;
}
@@ -342,9 +308,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
if (mis->postcopy_tmp_page) {
- munmap(mis->postcopy_tmp_page, getpagesize());
+ munmap(mis->postcopy_tmp_page, mis->largest_page_size);
mis->postcopy_tmp_page = NULL;
}
+ if (mis->postcopy_tmp_zero_page) {
+ munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
+ mis->postcopy_tmp_zero_page = NULL;
+ }
trace_postcopy_ram_incoming_cleanup_exit();
return 0;
}
@@ -408,6 +378,10 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
error_report("%s userfault register: %s", __func__, strerror(errno));
return -1;
}
+ if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
+ error_report("%s userfault: Region doesn't support COPY", __func__);
+ return -1;
+ }
return 0;
}
@@ -420,7 +394,6 @@ static void *postcopy_ram_fault_thread(void *opaque)
MigrationIncomingState *mis = opaque;
struct uffd_msg msg;
int ret;
- size_t hostpagesize = getpagesize();
RAMBlock *rb = NULL;
RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
@@ -487,7 +460,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
break;
}
- rb_offset &= ~(hostpagesize - 1);
+ rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
qemu_ram_get_idstr(rb),
rb_offset);
@@ -499,11 +472,11 @@ static void *postcopy_ram_fault_thread(void *opaque)
if (rb != last_rb) {
last_rb = rb;
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
- rb_offset, hostpagesize);
+ rb_offset, qemu_ram_pagesize(rb));
} else {
/* Save some space */
migrate_send_rp_req_pages(mis, NULL,
- rb_offset, hostpagesize);
+ rb_offset, qemu_ram_pagesize(rb));
}
}
trace_postcopy_ram_fault_thread_exit();
@@ -564,13 +537,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
* Place a host page (from) at (host) atomically
* returns 0 on success
*/
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+ size_t pagesize)
{
struct uffdio_copy copy_struct;
copy_struct.dst = (uint64_t)(uintptr_t)host;
copy_struct.src = (uint64_t)(uintptr_t)from;
- copy_struct.len = getpagesize();
+ copy_struct.len = pagesize;
copy_struct.mode = 0;
/* copy also acks to the kernel waking the stalled thread up
@@ -580,8 +554,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
*/
if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
int e = errno;
- error_report("%s: %s copy host: %p from: %p",
- __func__, strerror(e), host, from);
+ error_report("%s: %s copy host: %p from: %p (size: %zd)",
+ __func__, strerror(e), host, from, pagesize);
return -e;
}
@@ -594,23 +568,44 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
* Place a zero page at (host) atomically
* returns 0 on success
*/
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+ size_t pagesize)
{
- struct uffdio_zeropage zero_struct;
+ trace_postcopy_place_page_zero(host);
- zero_struct.range.start = (uint64_t)(uintptr_t)host;
- zero_struct.range.len = getpagesize();
- zero_struct.mode = 0;
+ if (pagesize == getpagesize()) {
+ struct uffdio_zeropage zero_struct;
+ zero_struct.range.start = (uint64_t)(uintptr_t)host;
+ zero_struct.range.len = getpagesize();
+ zero_struct.mode = 0;
- if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
- int e = errno;
- error_report("%s: %s zero host: %p",
- __func__, strerror(e), host);
+ if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
+ int e = errno;
+ error_report("%s: %s zero host: %p",
+ __func__, strerror(e), host);
- return -e;
+ return -e;
+ }
+ } else {
+ /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
+ if (!mis->postcopy_tmp_zero_page) {
+ mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
+ int e = errno;
+ mis->postcopy_tmp_zero_page = NULL;
+ error_report("%s: %s mapping large zero page",
+ __func__, strerror(e));
+ return -e;
+ }
+ memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
+ }
+ return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
+ pagesize);
}
- trace_postcopy_place_page_zero(host);
return 0;
}
@@ -625,7 +620,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
void *postcopy_get_tmp_page(MigrationIncomingState *mis)
{
if (!mis->postcopy_tmp_page) {
- mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
+ mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
PROT_READ | PROT_WRITE, MAP_PRIVATE |
MAP_ANONYMOUS, -1, 0);
if (mis->postcopy_tmp_page == MAP_FAILED) {
@@ -658,13 +653,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
return -1;
}
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
- size_t length)
-{
- assert(0);
- return -1;
-}
-
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
{
assert(0);
@@ -677,13 +665,15 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
return -1;
}
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+ size_t pagesize)
{
assert(0);
return -1;
}
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+ size_t pagesize)
{
assert(0);
return -1;
diff --git a/migration/ram.c b/migration/ram.c
index f289fcd..719425b 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -600,6 +600,23 @@ static void migration_bitmap_sync_init(void)
iterations_prev = 0;
}
+/* Returns a summary bitmap of the page sizes of all RAMBlocks;
+ * for VMs with just normal pages this is equivalent to the
+ * host page size. If it's got some huge pages then it's the OR
+ * of all the different page sizes.
+ */
+uint64_t ram_pagesize_summary(void)
+{
+ RAMBlock *block;
+ uint64_t summary = 0;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ summary |= block->page_size;
+ }
+
+ return summary;
+}
+
static void migration_bitmap_sync(void)
{
RAMBlock *block;
@@ -1285,6 +1302,8 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
* offset to point into the middle of a host page
* in which case the remainder of the hostpage is sent.
* Only dirty target pages are sent.
+ * Note that the host page size may be a huge page for this
+ * block.
*
* Returns: Number of pages written.
*
@@ -1303,6 +1322,8 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
ram_addr_t dirty_ram_abs)
{
int tmppages, pages = 0;
+ size_t pagesize = qemu_ram_pagesize(pss->block);
+
do {
tmppages = ram_save_target_page(ms, f, pss, last_stage,
bytes_transferred, dirty_ram_abs);
@@ -1313,7 +1334,7 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
pages += tmppages;
pss->offset += TARGET_PAGE_SIZE;
dirty_ram_abs += TARGET_PAGE_SIZE;
- } while (pss->offset & (qemu_host_page_size - 1));
+ } while (pss->offset & (pagesize - 1));
/* The offset we leave with is the last one we looked at */
pss->offset -= TARGET_PAGE_SIZE;
@@ -1655,12 +1676,17 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
{
unsigned long *bitmap;
unsigned long *unsentmap;
- unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
+ unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
unsigned long first = block->offset >> TARGET_PAGE_BITS;
unsigned long len = block->used_length >> TARGET_PAGE_BITS;
unsigned long last = first + (len - 1);
unsigned long run_start;
+ if (block->page_size == TARGET_PAGE_SIZE) {
+ /* Easy case - TPS==HPS for a non-huge page RAMBlock */
+ return;
+ }
+
bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
@@ -1764,7 +1790,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
* Utility for the outgoing postcopy code.
*
* Discard any partially sent host-page size chunks, mark any partially
- * dirty host-page size chunks as all dirty.
+ * dirty host-page size chunks as all dirty. In this case the host-page
+ * is the host-page for the particular RAMBlock, i.e. it might be a huge page
*
* Returns: 0 on success
*/
@@ -1772,11 +1799,6 @@ static int postcopy_chunk_hostpages(MigrationState *ms)
{
struct RAMBlock *block;
- if (qemu_host_page_size == TARGET_PAGE_SIZE) {
- /* Easy case - TPS==HPS - nothing to be done */
- return 0;
- }
-
/* Easiest way to make sure we don't resume in the middle of a host-page */
last_seen_block = NULL;
last_sent_block = NULL;
@@ -1832,7 +1854,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
return -EINVAL;
}
- /* Deal with TPS != HPS */
+ /* Deal with TPS != HPS and huge pages */
ret = postcopy_chunk_hostpages(ms);
if (ret) {
rcu_read_unlock();
@@ -1872,6 +1894,8 @@ int ram_discard_range(MigrationIncomingState *mis,
{
int ret = -1;
+ trace_ram_discard_range(block_name, start, length);
+
rcu_read_lock();
RAMBlock *rb = qemu_ram_block_by_name(block_name);
@@ -1881,27 +1905,7 @@ int ram_discard_range(MigrationIncomingState *mis,
goto err;
}
- uint8_t *host_startaddr = rb->host + start;
-
- if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
- error_report("ram_discard_range: Unaligned start address: %p",
- host_startaddr);
- goto err;
- }
-
- if ((start + length) <= rb->used_length) {
- uint8_t *host_endaddr = host_startaddr + length;
- if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
- error_report("ram_discard_range: Unaligned end address: %p",
- host_endaddr);
- goto err;
- }
- ret = postcopy_ram_discard_range(mis, host_startaddr, length);
- } else {
- error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
- "/%zx/" RAM_ADDR_FMT")",
- block_name, start, length, rb->used_length);
- }
+ ret = ram_block_discard_range(rb, start, length);
err:
rcu_read_unlock();
@@ -2010,6 +2014,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
qemu_put_byte(f, strlen(block->idstr));
qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
qemu_put_be64(f, block->used_length);
+ if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
+ qemu_put_be64(f, block->page_size);
+ }
}
rcu_read_unlock();
@@ -2387,7 +2394,7 @@ static int ram_load_postcopy(QEMUFile *f)
{
int flags = 0, ret = 0;
bool place_needed = false;
- bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
+ bool matching_page_sizes = false;
MigrationIncomingState *mis = migration_incoming_get_current();
/* Temporary page that is later 'placed' */
void *postcopy_host_page = postcopy_get_tmp_page(mis);
@@ -2399,6 +2406,7 @@ static int ram_load_postcopy(QEMUFile *f)
void *host = NULL;
void *page_buffer = NULL;
void *place_source = NULL;
+ RAMBlock *block = NULL;
uint8_t ch;
addr = qemu_get_be64(f);
@@ -2408,7 +2416,7 @@ static int ram_load_postcopy(QEMUFile *f)
trace_ram_load_postcopy_loop((uint64_t)addr, flags);
place_needed = false;
if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
- RAMBlock *block = ram_block_from_stream(f, flags);
+ block = ram_block_from_stream(f, flags);
host = host_from_ram_block_offset(block, addr);
if (!host) {
@@ -2416,8 +2424,11 @@ static int ram_load_postcopy(QEMUFile *f)
ret = -EINVAL;
break;
}
+ matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
/*
- * Postcopy requires that we place whole host pages atomically.
+ * Postcopy requires that we place whole host pages atomically;
+ * these may be huge pages for RAMBlocks that are backed by
+ * hugetlbfs.
* To make it atomic, the data is read into a temporary page
* that's moved into place later.
* The migration protocol uses, possibly smaller, target-pages
@@ -2425,9 +2436,9 @@ static int ram_load_postcopy(QEMUFile *f)
* of a host page in order.
*/
page_buffer = postcopy_host_page +
- ((uintptr_t)host & ~qemu_host_page_mask);
+ ((uintptr_t)host & (block->page_size - 1));
/* If all TP are zero then we can optimise the place */
- if (!((uintptr_t)host & ~qemu_host_page_mask)) {
+ if (!((uintptr_t)host & (block->page_size - 1))) {
all_zero = true;
} else {
/* not the 1st TP within the HP */
@@ -2445,7 +2456,7 @@ static int ram_load_postcopy(QEMUFile *f)
* page
*/
place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
- ~qemu_host_page_mask) == 0;
+ (block->page_size - 1)) == 0;
place_source = postcopy_host_page;
}
last_host = host;
@@ -2483,14 +2494,14 @@ static int ram_load_postcopy(QEMUFile *f)
if (place_needed) {
/* This gets called at the last target page in the host page */
+ void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
+
if (all_zero) {
- ret = postcopy_place_page_zero(mis,
- host + TARGET_PAGE_SIZE -
- qemu_host_page_size);
+ ret = postcopy_place_page_zero(mis, place_dest,
+ block->page_size);
} else {
- ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
- qemu_host_page_size,
- place_source);
+ ret = postcopy_place_page(mis, place_dest,
+ place_source, block->page_size);
}
}
if (!ret) {
@@ -2511,6 +2522,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
* be atomic
*/
bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
+ /* ADVISE is earlier, it shows the source has the postcopy capability on */
+ bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
seq_iter++;
@@ -2575,6 +2588,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
error_report_err(local_err);
}
}
+ /* For postcopy we need to check hugepage sizes match */
+ if (postcopy_advised &&
+ block->page_size != qemu_host_page_size) {
+ uint64_t remote_page_size = qemu_get_be64(f);
+ if (remote_page_size != block->page_size) {
+ error_report("Mismatched RAM page size %s "
+ "(local) %zd != %" PRId64,
+ id, block->page_size,
+ remote_page_size);
+ ret = -EINVAL;
+ }
+ }
ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
block->idstr);
} else {
diff --git a/migration/savevm.c b/migration/savevm.c
index 5ecd264..3b19a4a 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -688,6 +688,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
return -1;
}
+ g_free(id);
se->compat = g_new0(CompatEntry, 1);
pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
@@ -869,7 +870,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
void qemu_savevm_send_postcopy_advise(QEMUFile *f)
{
uint64_t tmp[2];
- tmp[0] = cpu_to_be64(getpagesize());
+ tmp[0] = cpu_to_be64(ram_pagesize_summary());
tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
trace_qemu_savevm_send_postcopy_advise();
@@ -1276,6 +1277,11 @@ done:
status = MIGRATION_STATUS_COMPLETED;
}
migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
+
+ /* f is outer parameter, it should not stay in global migration state after
+ * this function finished */
+ ms->to_dst_file = NULL;
+
return ret;
}
@@ -1346,7 +1352,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
{
PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
- uint64_t remote_hps, remote_tps;
+ uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
trace_loadvm_postcopy_handle_advise();
if (ps != POSTCOPY_INCOMING_NONE) {
@@ -1359,17 +1365,27 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
return -1;
}
- remote_hps = qemu_get_be64(mis->from_src_file);
- if (remote_hps != getpagesize()) {
+ remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
+ local_pagesize_summary = ram_pagesize_summary();
+
+ if (remote_pagesize_summary != local_pagesize_summary) {
/*
- * Some combinations of mismatch are probably possible but it gets
- * a bit more complicated. In particular we need to place whole
- * host pages on the dest at once, and we need to ensure that we
- * handle dirtying to make sure we never end up sending part of
- * a hostpage on it's own.
+ * This detects two potential causes of mismatch:
+ * a) A mismatch in host page sizes
+ * Some combinations of mismatch are probably possible but it gets
+ * a bit more complicated. In particular we need to place whole
+ * host pages on the dest at once, and we need to ensure that we
+ * handle dirtying to make sure we never end up sending part of
+ * a hostpage on it's own.
+ * b) The use of different huge page sizes on source/destination
+ * a more fine grain test is performed during RAM block migration
+ * but this test here causes a nice early clear failure, and
+ * also fails when passed to an older qemu that doesn't
+ * do huge pages.
*/
- error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
- (int)remote_hps, getpagesize());
+ error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
+ " d=%" PRIx64 ")",
+ remote_pagesize_summary, local_pagesize_summary);
return -1;
}
diff --git a/migration/trace-events b/migration/trace-events
index fa660e3..7372ce2 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -68,6 +68,7 @@ get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t
migration_bitmap_sync_start(void) ""
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
migration_throttle(void) ""
+ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx"
ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
ram_postcopy_send_discard_bitmap(void) ""
ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx"
@@ -176,7 +177,6 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) ""
# migration/postcopy-ram.c
postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx"
-postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx"
postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
diff --git a/migration/vmstate.c b/migration/vmstate.c
index b4d8ae9..78b3cd4 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -52,29 +52,15 @@ static int vmstate_size(void *opaque, VMStateField *field)
return size;
}
-static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
+static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque)
{
- void *base_addr = opaque + field->offset;
-
- if (field->flags & VMS_POINTER) {
- if (alloc && (field->flags & VMS_ALLOC)) {
- gsize size = 0;
- if (field->flags & VMS_VBUFFER) {
- size = vmstate_size(opaque, field);
- } else {
- int n_elems = vmstate_n_elems(opaque, field);
- if (n_elems) {
- size = n_elems * field->size;
- }
- }
- if (size) {
- *(void **)base_addr = g_malloc(size);
- }
+ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) {
+ gsize size = vmstate_size(opaque, field);
+ size *= vmstate_n_elems(opaque, field);
+ if (size) {
+ *(void **)ptr = g_malloc(size);
}
- base_addr = *(void **)base_addr;
}
-
- return base_addr;
}
int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
@@ -116,21 +102,30 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
field->field_exists(opaque, version_id)) ||
(!field->field_exists &&
field->version_id <= version_id)) {
- void *base_addr = vmstate_base_addr(opaque, field, true);
+ void *first_elem = opaque + field->offset;
int i, n_elems = vmstate_n_elems(opaque, field);
int size = vmstate_size(opaque, field);
+ vmstate_handle_alloc(first_elem, field, opaque);
+ if (field->flags & VMS_POINTER) {
+ first_elem = *(void **)first_elem;
+ assert(first_elem || !n_elems);
+ }
for (i = 0; i < n_elems; i++) {
- void *addr = base_addr + size * i;
+ void *curr_elem = first_elem + size * i;
if (field->flags & VMS_ARRAY_OF_POINTER) {
- addr = *(void **)addr;
+ curr_elem = *(void **)curr_elem;
}
- if (field->flags & VMS_STRUCT) {
- ret = vmstate_load_state(f, field->vmsd, addr,
+ if (!curr_elem) {
+ /* if null pointer check placeholder and do not follow */
+ assert(field->flags & VMS_ARRAY_OF_POINTER);
+ ret = vmstate_info_nullptr.get(f, curr_elem, size, NULL);
+ } else if (field->flags & VMS_STRUCT) {
+ ret = vmstate_load_state(f, field->vmsd, curr_elem,
field->vmsd->version_id);
} else {
- ret = field->info->get(f, addr, size, field);
+ ret = field->info->get(f, curr_elem, size, field);
}
if (ret >= 0) {
ret = qemu_file_get_error(f);
@@ -321,26 +316,34 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
while (field->name) {
if (!field->field_exists ||
field->field_exists(opaque, vmsd->version_id)) {
- void *base_addr = vmstate_base_addr(opaque, field, false);
+ void *first_elem = opaque + field->offset;
int i, n_elems = vmstate_n_elems(opaque, field);
int size = vmstate_size(opaque, field);
int64_t old_offset, written_bytes;
QJSON *vmdesc_loop = vmdesc;
trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems);
+ if (field->flags & VMS_POINTER) {
+ first_elem = *(void **)first_elem;
+ assert(first_elem || !n_elems);
+ }
for (i = 0; i < n_elems; i++) {
- void *addr = base_addr + size * i;
+ void *curr_elem = first_elem + size * i;
vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems);
old_offset = qemu_ftell_fast(f);
-
if (field->flags & VMS_ARRAY_OF_POINTER) {
- addr = *(void **)addr;
+ assert(curr_elem);
+ curr_elem = *(void **)curr_elem;
}
- if (field->flags & VMS_STRUCT) {
- vmstate_save_state(f, field->vmsd, addr, vmdesc_loop);
+ if (!curr_elem) {
+ /* if null pointer write placeholder and do not follow */
+ assert(field->flags & VMS_ARRAY_OF_POINTER);
+ vmstate_info_nullptr.put(f, curr_elem, size, NULL, NULL);
+ } else if (field->flags & VMS_STRUCT) {
+ vmstate_save_state(f, field->vmsd, curr_elem, vmdesc_loop);
} else {
- field->info->put(f, addr, size, field, vmdesc_loop);
+ field->info->put(f, curr_elem, size, field, vmdesc_loop);
}
written_bytes = qemu_ftell_fast(f) - old_offset;
@@ -752,6 +755,34 @@ const VMStateInfo vmstate_info_uint64 = {
.put = put_uint64,
};
+static int get_nullptr(QEMUFile *f, void *pv, size_t size, VMStateField *field)
+
+{
+ if (qemu_get_byte(f) == VMS_NULLPTR_MARKER) {
+ return 0;
+ }
+ error_report("vmstate: get_nullptr expected VMS_NULLPTR_MARKER");
+ return -EINVAL;
+}
+
+static int put_nullptr(QEMUFile *f, void *pv, size_t size,
+ VMStateField *field, QJSON *vmdesc)
+
+{
+ if (pv == NULL) {
+ qemu_put_byte(f, VMS_NULLPTR_MARKER);
+ return 0;
+ }
+ error_report("vmstate: put_nullptr must be called with pv == NULL");
+ return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_nullptr = {
+ .name = "uint64",
+ .get = get_nullptr,
+ .put = put_nullptr,
+};
+
/* 64 bit unsigned int. See that the received value is the same than the one
in the field */
diff --git a/monitor.c b/monitor.c
index f8f4a07..b68944d 100644
--- a/monitor.c
+++ b/monitor.c
@@ -984,8 +984,10 @@ static void qmp_unregister_commands_hack(void)
#ifndef TARGET_ARM
qmp_unregister_command("query-gic-capabilities");
#endif
-#if !defined(TARGET_S390X)
+#if !defined(TARGET_S390X) && !defined(TARGET_I386)
qmp_unregister_command("query-cpu-model-expansion");
+#endif
+#if !defined(TARGET_S390X)
qmp_unregister_command("query-cpu-model-baseline");
qmp_unregister_command("query-cpu-model-comparison");
#endif
diff --git a/nbd/server.c b/nbd/server.c
index ac92fa0..924a1fe 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -891,9 +891,21 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
{
BlockBackend *blk;
NBDExport *exp = g_malloc0(sizeof(NBDExport));
+ uint64_t perm;
+ int ret;
- blk = blk_new();
- blk_insert_bs(blk, bs);
+ /* Don't allow resize while the NBD server is running, otherwise we don't
+ * care what happens with the node. */
+ perm = BLK_PERM_CONSISTENT_READ;
+ if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
+ perm |= BLK_PERM_WRITE;
+ }
+ blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
+ ret = blk_insert_bs(blk, bs, errp);
+ if (ret < 0) {
+ goto fail;
+ }
blk_set_enable_write_cache(blk, !writethrough);
exp->refcount = 1;
diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin
index 229b5af..18666c9 100644
--- a/pc-bios/bios-256k.bin
+++ b/pc-bios/bios-256k.bin
Binary files differ
diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index 9a9b0f0..a394411 100644
--- a/pc-bios/bios.bin
+++ b/pc-bios/bios.bin
Binary files differ
diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index 95f1167..4869c9d 100644
--- a/pc-bios/openbios-ppc
+++ b/pc-bios/openbios-ppc
Binary files differ
diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index 675968e..aada55e 100644
--- a/pc-bios/openbios-sparc32
+++ b/pc-bios/openbios-sparc32
Binary files differ
diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index d4b9532..cf466f6 100644
--- a/pc-bios/openbios-sparc64
+++ b/pc-bios/openbios-sparc64
Binary files differ
diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index cf05bf0..2a4adfa 100644
--- a/pc-bios/s390-ccw.img
+++ b/pc-bios/s390-ccw.img
Binary files differ
diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 611102e..b21c877 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -724,11 +724,17 @@ static void zipl_load_vscsi(void)
void zipl_load(void)
{
- if (virtio_get_device()->is_cdrom) {
+ VDev *vdev = virtio_get_device();
+
+ if (vdev->is_cdrom) {
ipl_iso_el_torito();
panic("\n! Cannot IPL this ISO image !\n");
}
+ if (virtio_get_device_type() == VIRTIO_ID_NET) {
+ jump_to_IPL_code(vdev->netboot_start_addr);
+ }
+
ipl_scsi();
switch (virtio_get_device_type()) {
diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h
index 86abc56..890aed9 100644
--- a/pc-bios/s390-ccw/iplb.h
+++ b/pc-bios/s390-ccw/iplb.h
@@ -13,7 +13,8 @@
#define IPLB_H
struct IplBlockCcw {
- uint8_t reserved0[85];
+ uint64_t netboot_start_addr;
+ uint8_t reserved0[77];
uint8_t ssid;
uint16_t devno;
uint8_t vm_flags;
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 345b848..0946766 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -53,6 +53,12 @@ static bool find_dev(Schib *schib, int dev_no)
if (!virtio_is_supported(blk_schid)) {
continue;
}
+ /* Skip net devices since no IPLB is created and therefore no
+ * no network bootloader has been loaded
+ */
+ if (virtio_get_device_type() == VIRTIO_ID_NET && dev_no < 0) {
+ continue;
+ }
if ((dev_no < 0) || (schib->pmcw.dev == dev_no)) {
return true;
}
@@ -67,6 +73,7 @@ static void virtio_setup(void)
int ssid;
bool found = false;
uint16_t dev_no;
+ VDev *vdev = virtio_get_device();
/*
* We unconditionally enable mss support. In every sane configuration,
@@ -85,9 +92,6 @@ static void virtio_setup(void)
found = find_dev(&schib, dev_no);
break;
case S390_IPL_TYPE_QEMU_SCSI:
- {
- VDev *vdev = virtio_get_device();
-
vdev->scsi_device_selected = true;
vdev->selected_scsi_device.channel = iplb.scsi.channel;
vdev->selected_scsi_device.target = iplb.scsi.target;
@@ -95,7 +99,6 @@ static void virtio_setup(void)
blk_schid.ssid = iplb.scsi.ssid & 0x3;
found = find_dev(&schib, iplb.scsi.devno);
break;
- }
default:
panic("List-directed IPL not supported yet!\n");
}
@@ -111,9 +114,14 @@ static void virtio_setup(void)
IPL_assert(found, "No virtio device found");
- virtio_setup_device(blk_schid);
+ if (virtio_get_device_type() == VIRTIO_ID_NET) {
+ sclp_print("Network boot device detected\n");
+ vdev->netboot_start_addr = iplb.ccw.netboot_start_addr;
+ } else {
+ virtio_setup_device(blk_schid);
- IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected");
+ IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected");
+ }
}
int main(void)
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index b333734..6ee93d5 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -585,6 +585,7 @@ bool virtio_is_supported(SubChannelId schid)
switch (vdev.senseid.cu_model) {
case VIRTIO_ID_BLOCK:
case VIRTIO_ID_SCSI:
+ case VIRTIO_ID_NET:
return true;
}
}
diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
index eb35ea5..3388a42 100644
--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
@@ -276,6 +276,7 @@ struct VDev {
uint8_t scsi_dev_heads;
bool scsi_device_selected;
ScsiDevice selected_scsi_device;
+ uint64_t netboot_start_addr;
};
typedef struct VDev VDev;
diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin
index 9dadce2..e6c42bd 100644
--- a/pc-bios/vgabios-cirrus.bin
+++ b/pc-bios/vgabios-cirrus.bin
Binary files differ
diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin
index a89725c..915eba7 100644
--- a/pc-bios/vgabios-qxl.bin
+++ b/pc-bios/vgabios-qxl.bin
Binary files differ
diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin
index ea04141..40eca8c 100644
--- a/pc-bios/vgabios-stdvga.bin
+++ b/pc-bios/vgabios-stdvga.bin
Binary files differ
diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin
index 71e22fc..8b3abfa 100644
--- a/pc-bios/vgabios-virtio.bin
+++ b/pc-bios/vgabios-virtio.bin
Binary files differ
diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin
index ad239cb..6a90b94 100644
--- a/pc-bios/vgabios-vmware.bin
+++ b/pc-bios/vgabios-vmware.bin
Binary files differ
diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin
index 9947c2c..d3ed89d 100644
--- a/pc-bios/vgabios.bin
+++ b/pc-bios/vgabios.bin
Binary files differ
diff --git a/qapi-schema.json b/qapi-schema.json
index 150ee98..5eb1b8b 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4274,6 +4274,15 @@
# migration-safe, but allows tooling to get an insight and work with
# model details.
#
+# Note: When a non-migration-safe CPU model is expanded in static mode, some
+# features enabled by the CPU model may be omitted, because they can't be
+# implemented by a static CPU model definition (e.g. cache info passthrough and
+# PMU passthrough in x86). If you need an accurate representation of the
+# features enabled by a non-migration-safe CPU model, use @full. If you need a
+# static representation that will keep ABI compatibility even when changing QEMU
+# version or machine-type, use @static (but keep in mind that some features may
+# be omitted).
+#
# Since: 2.8.0
##
{ 'enum': 'CpuModelExpansionType',
@@ -5990,6 +5999,79 @@
{ 'command': 'xen-load-devices-state', 'data': {'filename': 'str'} }
##
+# @xen-set-replication:
+#
+# Enable or disable replication.
+#
+# @enable: true to enable, false to disable.
+#
+# @primary: true for primary or false for secondary.
+#
+# @failover: #optional true to do failover, false to stop. but cannot be
+# specified if 'enable' is true. default value is false.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "xen-set-replication",
+# "arguments": {"enable": true, "primary": false} }
+# <- { "return": {} }
+#
+# Since: 2.9
+##
+{ 'command': 'xen-set-replication',
+ 'data': { 'enable': 'bool', 'primary': 'bool', '*failover' : 'bool' } }
+
+##
+# @ReplicationStatus:
+#
+# The result format for 'query-xen-replication-status'.
+#
+# @error: true if an error happened, false if replication is normal.
+#
+# @desc: #optional the human readable error description string, when
+# @error is 'true'.
+#
+# Since: 2.9
+##
+{ 'struct': 'ReplicationStatus',
+ 'data': { 'error': 'bool', '*desc': 'str' } }
+
+##
+# @query-xen-replication-status:
+#
+# Query replication status while the vm is running.
+#
+# Returns: A @ReplicationResult object showing the status.
+#
+# Example:
+#
+# -> { "execute": "query-xen-replication-status" }
+# <- { "return": { "error": false } }
+#
+# Since: 2.9
+##
+{ 'command': 'query-xen-replication-status',
+ 'returns': 'ReplicationStatus' }
+
+##
+# @xen-colo-do-checkpoint:
+#
+# Xen uses this command to notify replication to trigger a checkpoint.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "xen-colo-do-checkpoint" }
+# <- { "return": {} }
+#
+# Since: 2.9
+##
+{ 'command': 'xen-colo-do-checkpoint' }
+
+##
# @GICCapability:
#
# The struct describes capability for a specific GIC (Generic
diff --git a/qapi/block-core.json b/qapi/block-core.json
index cf24c04..5cc992f 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1304,6 +1304,11 @@
#
# @speed: #optional the maximum speed, in bytes per second
#
+# @filter-node-name: #optional the node name that should be assigned to the
+# filter driver that the commit job inserts into the graph
+# above @top. If this option is not given, a node name is
+# autogenerated. (Since: 2.9)
+#
# Returns: Nothing on success
# If commit or stream is already active on this device, DeviceInUse
# If @device does not exist, DeviceNotFound
@@ -1323,7 +1328,8 @@
##
{ 'command': 'block-commit',
'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str',
- '*backing-file': 'str', '*speed': 'int' } }
+ '*backing-file': 'str', '*speed': 'int',
+ '*filter-node-name': 'str' } }
##
# @drive-backup:
@@ -1671,6 +1677,11 @@
# default 'report' (no limitations, since this applies to
# a different block device than @device).
#
+# @filter-node-name: #optional the node name that should be assigned to the
+# filter driver that the mirror job inserts into the graph
+# above @device. If this option is not given, a node name is
+# autogenerated. (Since: 2.9)
+#
# Returns: nothing on success.
#
# Since: 2.6
@@ -1690,7 +1701,8 @@
'sync': 'MirrorSyncMode',
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
- '*on-target-error': 'BlockdevOnError' } }
+ '*on-target-error': 'BlockdevOnError',
+ '*filter-node-name': 'str' } }
##
# @block_set_io_throttle:
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 549f45f..5f2fcdf 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -29,7 +29,6 @@
#include "qemu/error-report.h"
#include "qemu/help_option.h"
#include "sysemu/block-backend.h"
-#include "migration/migration.h"
/*
* Aliases were a bad idea from the start. Let's keep them
@@ -579,14 +578,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
return NULL;
}
- if (only_migratable) {
- if (dc->vmsd->unmigratable) {
- error_setg(errp, "Device %s is not migratable, but "
- "--only-migratable was specified", driver);
- return NULL;
- }
- }
-
/* find bus */
path = qemu_opt_get(opts, "bus");
if (path != NULL) {
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index f054599..9c9702c 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -40,9 +40,9 @@ STEXI
ETEXI
DEF("convert", img_convert,
- "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename")
+ "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename")
STEXI
-@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename}
ETEXI
DEF("dd", img_dd,
diff --git a/qemu-img.c b/qemu-img.c
index df3aefd..98b836b 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -156,6 +156,11 @@ static void QEMU_NORETURN help(void)
" kinds of errors, with a higher risk of choosing the wrong fix or\n"
" hiding corruption that has already occurred.\n"
"\n"
+ "Parameters to convert subcommand:\n"
+ " '-m' specifies how many coroutines work in parallel during the convert\n"
+ " process (defaults to 8)\n"
+ " '-W' allow to write to the target out of order rather than sequential\n"
+ "\n"
"Parameters to snapshot subcommand:\n"
" 'snapshot' is the name of the snapshot to create, apply or delete\n"
" '-a' applies a snapshot (revert disk to saved state)\n"
@@ -809,6 +814,8 @@ static void run_block_job(BlockJob *job, Error **errp)
{
AioContext *aio_context = blk_get_aio_context(job->blk);
+ /* FIXME In error cases, the job simply goes away and we access a dangling
+ * pointer below. */
aio_context_acquire(aio_context);
do {
aio_poll(aio_context, true);
@@ -830,6 +837,7 @@ static int img_commit(int argc, char **argv)
const char *filename, *fmt, *cache, *base;
BlockBackend *blk;
BlockDriverState *bs, *base_bs;
+ BlockJob *job;
bool progress = false, quiet = false, drop = false;
bool writethrough;
Error *local_err = NULL;
@@ -950,8 +958,8 @@ static int img_commit(int argc, char **argv)
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0,
- BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi,
- &local_err, false);
+ BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
+ &cbi, &local_err, false);
aio_context_release(aio_context);
if (local_err) {
goto done;
@@ -965,7 +973,8 @@ static int img_commit(int argc, char **argv)
bdrv_ref(bs);
}
- run_block_job(bs->job, &local_err);
+ job = block_job_get("commit");
+ run_block_job(job, &local_err);
if (local_err) {
goto unref_backing;
}
@@ -1462,48 +1471,61 @@ enum ImgConvertBlockStatus {
BLK_BACKING_FILE,
};
+#define MAX_COROUTINES 16
+
typedef struct ImgConvertState {
BlockBackend **src;
int64_t *src_sectors;
- int src_cur, src_num;
- int64_t src_cur_offset;
+ int src_num;
int64_t total_sectors;
int64_t allocated_sectors;
+ int64_t allocated_done;
+ int64_t sector_num;
+ int64_t wr_offs;
enum ImgConvertBlockStatus status;
int64_t sector_next_status;
BlockBackend *target;
bool has_zero_init;
bool compressed;
bool target_has_backing;
+ bool wr_in_order;
int min_sparse;
size_t cluster_sectors;
size_t buf_sectors;
+ int num_coroutines;
+ int running_coroutines;
+ Coroutine *co[MAX_COROUTINES];
+ int64_t wait_sector_num[MAX_COROUTINES];
+ CoMutex lock;
+ int ret;
} ImgConvertState;
-static void convert_select_part(ImgConvertState *s, int64_t sector_num)
+static void convert_select_part(ImgConvertState *s, int64_t sector_num,
+ int *src_cur, int64_t *src_cur_offset)
{
- assert(sector_num >= s->src_cur_offset);
- while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) {
- s->src_cur_offset += s->src_sectors[s->src_cur];
- s->src_cur++;
- assert(s->src_cur < s->src_num);
+ *src_cur = 0;
+ *src_cur_offset = 0;
+ while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
+ *src_cur_offset += s->src_sectors[*src_cur];
+ (*src_cur)++;
+ assert(*src_cur < s->src_num);
}
}
static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
{
- int64_t ret;
- int n;
+ int64_t ret, src_cur_offset;
+ int n, src_cur;
- convert_select_part(s, sector_num);
+ convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
assert(s->total_sectors > sector_num);
n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
if (s->sector_next_status <= sector_num) {
BlockDriverState *file;
- ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]),
- sector_num - s->src_cur_offset,
+ ret = bdrv_get_block_status(blk_bs(s->src[src_cur]),
+ sector_num - src_cur_offset,
n, &n, &file);
if (ret < 0) {
return ret;
@@ -1519,8 +1541,8 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
/* Check block status of the backing file chain to avoid
* needlessly reading zeroes and limiting the iteration to the
* buffer size */
- ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL,
- sector_num - s->src_cur_offset,
+ ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL,
+ sector_num - src_cur_offset,
n, &n, &file);
if (ret < 0) {
return ret;
@@ -1558,28 +1580,34 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
return n;
}
-static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
- uint8_t *buf)
+static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
+ int nb_sectors, uint8_t *buf)
{
- int n;
- int ret;
+ int n, ret;
+ QEMUIOVector qiov;
+ struct iovec iov;
assert(nb_sectors <= s->buf_sectors);
while (nb_sectors > 0) {
BlockBackend *blk;
- int64_t bs_sectors;
+ int src_cur;
+ int64_t bs_sectors, src_cur_offset;
/* In the case of compression with multiple source files, we can get a
* nb_sectors that spreads into the next part. So we must be able to
* read across multiple BDSes for one convert_read() call. */
- convert_select_part(s, sector_num);
- blk = s->src[s->src_cur];
- bs_sectors = s->src_sectors[s->src_cur];
-
- n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
- ret = blk_pread(blk,
- (sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS,
- buf, n << BDRV_SECTOR_BITS);
+ convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
+ blk = s->src[src_cur];
+ bs_sectors = s->src_sectors[src_cur];
+
+ n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
+ iov.iov_base = buf;
+ iov.iov_len = n << BDRV_SECTOR_BITS;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = blk_co_preadv(
+ blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, &qiov, 0);
if (ret < 0) {
return ret;
}
@@ -1592,15 +1620,18 @@ static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
return 0;
}
-static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
- const uint8_t *buf)
+
+static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
+ int nb_sectors, uint8_t *buf,
+ enum ImgConvertBlockStatus status)
{
int ret;
+ QEMUIOVector qiov;
+ struct iovec iov;
while (nb_sectors > 0) {
int n = nb_sectors;
-
- switch (s->status) {
+ switch (status) {
case BLK_BACKING_FILE:
/* If we have a backing file, leave clusters unallocated that are
* unallocated in the source image, so that the backing file is
@@ -1621,9 +1652,13 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
break;
}
- ret = blk_pwrite_compressed(s->target,
- sector_num << BDRV_SECTOR_BITS,
- buf, n << BDRV_SECTOR_BITS);
+ iov.iov_base = buf;
+ iov.iov_len = n << BDRV_SECTOR_BITS;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, &qiov,
+ BDRV_REQ_WRITE_COMPRESSED);
if (ret < 0) {
return ret;
}
@@ -1636,8 +1671,12 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
if (!s->min_sparse ||
is_allocated_sectors_min(buf, n, &n, s->min_sparse))
{
- ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
- buf, n << BDRV_SECTOR_BITS, 0);
+ iov.iov_base = buf;
+ iov.iov_len = n << BDRV_SECTOR_BITS;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, &qiov, 0);
if (ret < 0) {
return ret;
}
@@ -1649,8 +1688,9 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
if (s->has_zero_init) {
break;
}
- ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
- n << BDRV_SECTOR_BITS, 0);
+ ret = blk_co_pwrite_zeroes(s->target,
+ sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, 0);
if (ret < 0) {
return ret;
}
@@ -1665,12 +1705,122 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
return 0;
}
-static int convert_do_copy(ImgConvertState *s)
+static void coroutine_fn convert_co_do_copy(void *opaque)
{
+ ImgConvertState *s = opaque;
uint8_t *buf = NULL;
- int64_t sector_num, allocated_done;
- int ret;
- int n;
+ int ret, i;
+ int index = -1;
+
+ for (i = 0; i < s->num_coroutines; i++) {
+ if (s->co[i] == qemu_coroutine_self()) {
+ index = i;
+ break;
+ }
+ }
+ assert(index >= 0);
+
+ s->running_coroutines++;
+ buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
+
+ while (1) {
+ int n;
+ int64_t sector_num;
+ enum ImgConvertBlockStatus status;
+
+ qemu_co_mutex_lock(&s->lock);
+ if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
+ qemu_co_mutex_unlock(&s->lock);
+ goto out;
+ }
+ n = convert_iteration_sectors(s, s->sector_num);
+ if (n < 0) {
+ qemu_co_mutex_unlock(&s->lock);
+ s->ret = n;
+ goto out;
+ }
+ /* save current sector and allocation status to local variables */
+ sector_num = s->sector_num;
+ status = s->status;
+ if (!s->min_sparse && s->status == BLK_ZERO) {
+ n = MIN(n, s->buf_sectors);
+ }
+ /* increment global sector counter so that other coroutines can
+ * already continue reading beyond this request */
+ s->sector_num += n;
+ qemu_co_mutex_unlock(&s->lock);
+
+ if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
+ s->allocated_done += n;
+ qemu_progress_print(100.0 * s->allocated_done /
+ s->allocated_sectors, 0);
+ }
+
+ if (status == BLK_DATA) {
+ ret = convert_co_read(s, sector_num, n, buf);
+ if (ret < 0) {
+ error_report("error while reading sector %" PRId64
+ ": %s", sector_num, strerror(-ret));
+ s->ret = ret;
+ goto out;
+ }
+ } else if (!s->min_sparse && status == BLK_ZERO) {
+ status = BLK_DATA;
+ memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
+ }
+
+ if (s->wr_in_order) {
+ /* keep writes in order */
+ while (s->wr_offs != sector_num) {
+ if (s->ret != -EINPROGRESS) {
+ goto out;
+ }
+ s->wait_sector_num[index] = sector_num;
+ qemu_coroutine_yield();
+ }
+ s->wait_sector_num[index] = -1;
+ }
+
+ ret = convert_co_write(s, sector_num, n, buf, status);
+ if (ret < 0) {
+ error_report("error while writing sector %" PRId64
+ ": %s", sector_num, strerror(-ret));
+ s->ret = ret;
+ goto out;
+ }
+
+ if (s->wr_in_order) {
+ /* reenter the coroutine that might have waited
+ * for this write to complete */
+ s->wr_offs = sector_num + n;
+ for (i = 0; i < s->num_coroutines; i++) {
+ if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
+ /*
+ * A -> B -> A cannot occur because A has
+ * s->wait_sector_num[i] == -1 during A -> B. Therefore
+ * B will never enter A during this time window.
+ */
+ qemu_coroutine_enter(s->co[i]);
+ break;
+ }
+ }
+ }
+ }
+
+out:
+ qemu_vfree(buf);
+ s->co[index] = NULL;
+ s->running_coroutines--;
+ if (!s->running_coroutines && s->ret == -EINPROGRESS) {
+ /* the convert job finished successfully */
+ s->ret = 0;
+ }
+}
+
+static int convert_do_copy(ImgConvertState *s)
+{
+ int ret, i, n;
+ int64_t sector_num = 0;
/* Check whether we have zero initialisation or can get it efficiently */
s->has_zero_init = s->min_sparse && !s->target_has_backing
@@ -1691,21 +1841,15 @@ static int convert_do_copy(ImgConvertState *s)
if (s->compressed) {
if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
error_report("invalid cluster size");
- ret = -EINVAL;
- goto fail;
+ return -EINVAL;
}
s->buf_sectors = s->cluster_sectors;
}
- buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
- /* Calculate allocated sectors for progress */
- s->allocated_sectors = 0;
- sector_num = 0;
while (sector_num < s->total_sectors) {
n = convert_iteration_sectors(s, sector_num);
if (n < 0) {
- ret = n;
- goto fail;
+ return n;
}
if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
{
@@ -1715,61 +1859,29 @@ static int convert_do_copy(ImgConvertState *s)
}
/* Do the copy */
- s->src_cur = 0;
- s->src_cur_offset = 0;
s->sector_next_status = 0;
+ s->ret = -EINPROGRESS;
- sector_num = 0;
- allocated_done = 0;
-
- while (sector_num < s->total_sectors) {
- n = convert_iteration_sectors(s, sector_num);
- if (n < 0) {
- ret = n;
- goto fail;
- }
- if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
- {
- allocated_done += n;
- qemu_progress_print(100.0 * allocated_done / s->allocated_sectors,
- 0);
- }
-
- if (s->status == BLK_DATA) {
- ret = convert_read(s, sector_num, n, buf);
- if (ret < 0) {
- error_report("error while reading sector %" PRId64
- ": %s", sector_num, strerror(-ret));
- goto fail;
- }
- } else if (!s->min_sparse && s->status == BLK_ZERO) {
- n = MIN(n, s->buf_sectors);
- memset(buf, 0, n * BDRV_SECTOR_SIZE);
- s->status = BLK_DATA;
- }
-
- ret = convert_write(s, sector_num, n, buf);
- if (ret < 0) {
- error_report("error while writing sector %" PRId64
- ": %s", sector_num, strerror(-ret));
- goto fail;
- }
+ qemu_co_mutex_init(&s->lock);
+ for (i = 0; i < s->num_coroutines; i++) {
+ s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
+ s->wait_sector_num[i] = -1;
+ qemu_coroutine_enter(s->co[i]);
+ }
- sector_num += n;
+ while (s->ret == -EINPROGRESS) {
+ main_loop_wait(false);
}
- if (s->compressed) {
+ if (s->compressed && !s->ret) {
/* signal EOF to align */
ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
if (ret < 0) {
- goto fail;
+ return ret;
}
}
- ret = 0;
-fail:
- qemu_vfree(buf);
- return ret;
+ return s->ret;
}
static int img_convert(int argc, char **argv)
@@ -1797,6 +1909,8 @@ static int img_convert(int argc, char **argv)
QemuOpts *sn_opts = NULL;
ImgConvertState state;
bool image_opts = false;
+ bool wr_in_order = true;
+ long num_coroutines = 8;
fmt = NULL;
out_fmt = "raw";
@@ -1812,7 +1926,7 @@ static int img_convert(int argc, char **argv)
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
- c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn",
+ c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W",
long_options, NULL);
if (c == -1) {
break;
@@ -1904,6 +2018,18 @@ static int img_convert(int argc, char **argv)
case 'n':
skip_create = 1;
break;
+ case 'm':
+ if (qemu_strtol(optarg, NULL, 0, &num_coroutines) ||
+ num_coroutines < 1 || num_coroutines > MAX_COROUTINES) {
+ error_report("Invalid number of coroutines. Allowed number of"
+ " coroutines is between 1 and %d", MAX_COROUTINES);
+ ret = -1;
+ goto fail_getopt;
+ }
+ break;
+ case 'W':
+ wr_in_order = false;
+ break;
case OPTION_OBJECT:
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
@@ -1923,6 +2049,12 @@ static int img_convert(int argc, char **argv)
goto fail_getopt;
}
+ if (!wr_in_order && compress) {
+ error_report("Out of order write and compress are mutually exclusive");
+ ret = -1;
+ goto fail_getopt;
+ }
+
/* Initialize before goto out */
if (quiet) {
progress = 0;
@@ -2163,6 +2295,8 @@ static int img_convert(int argc, char **argv)
.min_sparse = min_sparse,
.cluster_sectors = cluster_sectors,
.buf_sectors = bufsectors,
+ .wr_in_order = wr_in_order,
+ .num_coroutines = num_coroutines,
};
ret = convert_do_copy(&state);
@@ -3289,7 +3423,7 @@ static int img_resize(int argc, char **argv)
qemu_opts_del(param);
blk = img_open(image_opts, filename, fmt,
- BDRV_O_RDWR, false, quiet);
+ BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet);
if (!blk) {
ret = -1;
goto out;
diff --git a/qemu-img.texi b/qemu-img.texi
index 174aae3..c81db3e 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -137,6 +137,12 @@ Parameters to convert subcommand:
@item -n
Skip the creation of the target volume
+@item -m
+Number of parallel coroutines for the convert process
+@item -W
+Allow out-of-order writes to the destination. This option improves performance,
+but is only recommended for preallocated devices like host devices or other
+raw block devices.
@end table
Parameters to dd subcommand:
@@ -296,7 +302,7 @@ Error on reading data
@end table
-@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-m @var{num_coroutines}] [-W] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
Convert the disk image @var{filename} or a snapshot @var{snapshot_param}(@var{snapshot_id_or_name} is deprecated)
to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c}
@@ -326,6 +332,14 @@ skipped. This is useful for formats such as @code{rbd} if the target
volume has already been created with site specific options that cannot
be supplied through qemu-img.
+Out of order writes can be enabled with @code{-W} to improve performance.
+This is only recommended for preallocated devices like host devices or other
+raw block devices. Out of order write does not work in combination with
+creating compressed images.
+
+@var{num_coroutines} specifies how many coroutines work in parallel during
+the convert process (defaults to 8).
+
@item dd [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output}
Dd copies from @var{input} file to @var{output} file converting it from
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 7ac1576..2c48f9c 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -83,6 +83,29 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc,
}
return 0;
}
+
+ /* Request additional permissions if necessary for this command. The caller
+ * is responsible for restoring the original permissions afterwards if this
+ * is what it wants. */
+ if (ct->perm && blk_is_available(blk)) {
+ uint64_t orig_perm, orig_shared_perm;
+ blk_get_perm(blk, &orig_perm, &orig_shared_perm);
+
+ if (ct->perm & ~orig_perm) {
+ uint64_t new_perm;
+ Error *local_err = NULL;
+ int ret;
+
+ new_perm = orig_perm | ct->perm;
+
+ ret = blk_set_perm(blk, new_perm, orig_shared_perm, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ return 0;
+ }
+ }
+ }
+
optind = 0;
return ct->cfunc(blk, argc, argv);
}
@@ -918,6 +941,7 @@ static const cmdinfo_t write_cmd = {
.name = "write",
.altname = "w",
.cfunc = write_f,
+ .perm = BLK_PERM_WRITE,
.argmin = 2,
.argmax = -1,
.args = "[-bcCfquz] [-P pattern] off len",
@@ -1093,6 +1117,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv);
static const cmdinfo_t writev_cmd = {
.name = "writev",
.cfunc = writev_f,
+ .perm = BLK_PERM_WRITE,
.argmin = 2,
.argmax = -1,
.args = "[-Cfq] [-P pattern] off len [len..]",
@@ -1392,6 +1417,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv);
static const cmdinfo_t aio_write_cmd = {
.name = "aio_write",
.cfunc = aio_write_f,
+ .perm = BLK_PERM_WRITE,
.argmin = 2,
.argmax = -1,
.args = "[-Cfiquz] [-P pattern] off len [len..]",
@@ -1556,6 +1582,7 @@ static const cmdinfo_t truncate_cmd = {
.name = "truncate",
.altname = "t",
.cfunc = truncate_f,
+ .perm = BLK_PERM_WRITE | BLK_PERM_RESIZE,
.argmin = 1,
.argmax = 1,
.args = "off",
@@ -1653,6 +1680,7 @@ static const cmdinfo_t discard_cmd = {
.name = "discard",
.altname = "d",
.cfunc = discard_f,
+ .perm = BLK_PERM_WRITE,
.argmin = 2,
.argmax = -1,
.args = "[-Cq] off len",
diff --git a/qtest.c b/qtest.c
index a685827..5aa6636 100644
--- a/qtest.c
+++ b/qtest.c
@@ -240,6 +240,7 @@ static void GCC_FMT_ATTR(2, 3) qtest_sendf(CharBackend *chr,
va_start(ap, fmt);
buffer = g_strdup_vprintf(fmt, ap);
qtest_send(chr, buffer);
+ g_free(buffer);
va_end(ap);
}
diff --git a/roms/openbios b/roms/openbios
-Subproject ef8a14e8afb47635c9c5f7524a52c3251827e29
+Subproject 0cd97cc904e71fbb461112f6756934ec6af890b
diff --git a/roms/seabios b/roms/seabios
-Subproject 8891697e3f7d84355420573efd98e94f1473676
+Subproject 5f4c7b13cdf9c450eb55645f4362ea58fa61b79
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 72cf1fb..6a370a8 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -75,7 +75,13 @@ for arch in $ARCHLIST; do
continue
fi
- make -C "$linux" INSTALL_HDR_PATH="$tmpdir" SRCARCH=$arch headers_install
+ if [ "$arch" = x86 ]; then
+ arch_var=SRCARCH
+ else
+ arch_var=ARCH
+ fi
+
+ make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install
rm -rf "$output/linux-headers/asm-$arch"
mkdir -p "$output/linux-headers/asm-$arch"
@@ -92,6 +98,11 @@ for arch in $ARCHLIST; do
cp_portable "$tmpdir/include/asm/kvm_virtio.h" "$output/include/standard-headers/asm-s390/"
cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/"
fi
+ if [ $arch = arm ]; then
+ cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/"
+ cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/"
+ cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/"
+ fi
if [ $arch = x86 ]; then
cp_portable "$tmpdir/include/asm/hyperv.h" "$output/include/standard-headers/asm-x86/"
cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/"
diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py
index 14a27e7..bcef7ee 100755
--- a/scripts/vmstate-static-checker.py
+++ b/scripts/vmstate-static-checker.py
@@ -85,6 +85,11 @@ def check_fields_match(name, s_field, d_field):
'xio3130-express-upstream-port': ['br.dev', 'parent_obj.parent_obj',
'br.dev.exp.aer_log',
'parent_obj.parent_obj.exp.aer_log'],
+ 'spapr_pci': ['dma_liobn[0]', 'mig_liobn',
+ 'mem_win_addr', 'mig_mem_win_addr',
+ 'mem_win_size', 'mig_mem_win_size',
+ 'io_win_addr', 'mig_io_win_addr',
+ 'io_win_size', 'mig_io_win_size'],
}
if not name in changed_names:
diff --git a/stubs/vmstate.c b/stubs/vmstate.c
index bbe158f..6d52f29 100644
--- a/stubs/vmstate.c
+++ b/stubs/vmstate.c
@@ -1,6 +1,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "migration/vmstate.h"
+#include "migration/migration.h"
const VMStateDescription vmstate_dummy = {};
@@ -19,3 +20,8 @@ void vmstate_unregister(DeviceState *dev,
void *opaque)
{
}
+
+int check_migratable(Object *obj, Error **err)
+{
+ return 0;
+}
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 9e7b2df..25ceaab 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -521,6 +521,8 @@ typedef struct CPUARMState {
void *nvic;
const struct arm_boot_info *boot_info;
+ /* Store GICv3CPUState to access from this struct */
+ void *gicv3state;
} CPUARMState;
/**
diff --git a/target/i386/cpu-qom.h b/target/i386/cpu-qom.h
index 8cd607e..c2205e6 100644
--- a/target/i386/cpu-qom.h
+++ b/target/i386/cpu-qom.h
@@ -48,7 +48,9 @@ typedef struct X86CPUDefinition X86CPUDefinition;
* X86CPUClass:
* @cpu_def: CPU model definition
* @kvm_required: Whether CPU model requires KVM to be enabled.
+ * @ordering: Ordering on the "-cpu help" CPU model list.
* @migration_safe: See CpuDefinitionInfo::migration_safe
+ * @static_model: See CpuDefinitionInfo::static
* @parent_realize: The parent class' realize handler.
* @parent_reset: The parent class' reset handler.
*
@@ -59,11 +61,15 @@ typedef struct X86CPUClass {
CPUClass parent_class;
/*< public >*/
- /* Should be eventually replaced by subclass-specific property defaults. */
+ /* CPU definition, automatically loaded by instance_init if not NULL.
+ * Should be eventually replaced by subclass-specific property defaults.
+ */
X86CPUDefinition *cpu_def;
bool kvm_required;
+ int ordering;
bool migration_safe;
+ bool static_model;
/* Optional description of CPU model.
* If unavailable, cpu_def->model_id is used */
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index b6f157d..89421c8 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -29,10 +29,16 @@
#include "qemu/option.h"
#include "qemu/config-file.h"
#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qfloat.h"
#include "qapi-types.h"
#include "qapi-visit.h"
#include "qapi/visitor.h"
+#include "qom/qom-qobject.h"
#include "sysemu/arch_init.h"
#if defined(CONFIG_KVM)
@@ -1503,15 +1509,15 @@ void x86_cpu_change_kvm_default(const char *prop, const char *value)
static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
bool migratable_only);
-#ifdef CONFIG_KVM
-
static bool lmce_supported(void)
{
- uint64_t mce_cap;
+ uint64_t mce_cap = 0;
+#ifdef CONFIG_KVM
if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) {
return false;
}
+#endif
return !!(mce_cap & MCG_LMCE_P);
}
@@ -1531,51 +1537,28 @@ static int cpu_x86_fill_model_id(char *str)
return 0;
}
-static X86CPUDefinition host_cpudef;
-
-static Property host_x86_cpu_properties[] = {
+static Property max_x86_cpu_properties[] = {
DEFINE_PROP_BOOL("migratable", X86CPU, migratable, true),
DEFINE_PROP_BOOL("host-cache-info", X86CPU, cache_info_passthrough, false),
DEFINE_PROP_END_OF_LIST()
};
-/* class_init for the "host" CPU model
- *
- * This function may be called before KVM is initialized.
- */
-static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
+static void max_x86_cpu_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
X86CPUClass *xcc = X86_CPU_CLASS(oc);
- uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
-
- xcc->kvm_required = true;
-
- host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
- x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx);
-
- host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
- host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
- host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12);
- host_cpudef.stepping = eax & 0x0F;
- cpu_x86_fill_model_id(host_cpudef.model_id);
+ xcc->ordering = 9;
- xcc->cpu_def = &host_cpudef;
xcc->model_description =
- "KVM processor with all supported host features "
- "(only available in KVM mode)";
-
- /* level, xlevel, xlevel2, and the feature words are initialized on
- * instance_init, because they require KVM to be initialized.
- */
+ "Enables all features supported by the accelerator in the current host";
- dc->props = host_x86_cpu_properties;
- /* Reason: host_x86_cpu_initfn() dies when !kvm_enabled() */
- dc->cannot_destroy_with_object_finalize_yet = true;
+ dc->props = max_x86_cpu_properties;
}
-static void host_x86_cpu_initfn(Object *obj)
+static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp);
+
+static void max_x86_cpu_initfn(Object *obj)
{
X86CPU *cpu = X86_CPU(obj);
CPUX86State *env = &cpu->env;
@@ -1584,10 +1567,24 @@ static void host_x86_cpu_initfn(Object *obj)
/* We can't fill the features array here because we don't know yet if
* "migratable" is true or false.
*/
- cpu->host_features = true;
+ cpu->max_features = true;
- /* If KVM is disabled, x86_cpu_realizefn() will report an error later */
if (kvm_enabled()) {
+ X86CPUDefinition host_cpudef = { };
+ uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+ host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
+ x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx);
+
+ host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
+ host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
+ host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12);
+ host_cpudef.stepping = eax & 0x0F;
+
+ cpu_x86_fill_model_id(host_cpudef.model_id);
+
+ x86_cpu_load_def(cpu, &host_cpudef, &error_abort);
+
env->cpuid_min_level =
kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
env->cpuid_min_xlevel =
@@ -1598,15 +1595,44 @@ static void host_x86_cpu_initfn(Object *obj)
if (lmce_supported()) {
object_property_set_bool(OBJECT(cpu), true, "lmce", &error_abort);
}
+ } else {
+ object_property_set_str(OBJECT(cpu), CPUID_VENDOR_AMD,
+ "vendor", &error_abort);
+ object_property_set_int(OBJECT(cpu), 6, "family", &error_abort);
+ object_property_set_int(OBJECT(cpu), 6, "model", &error_abort);
+ object_property_set_int(OBJECT(cpu), 3, "stepping", &error_abort);
+ object_property_set_str(OBJECT(cpu),
+ "QEMU TCG CPU version " QEMU_HW_VERSION,
+ "model-id", &error_abort);
}
object_property_set_bool(OBJECT(cpu), true, "pmu", &error_abort);
}
+static const TypeInfo max_x86_cpu_type_info = {
+ .name = X86_CPU_TYPE_NAME("max"),
+ .parent = TYPE_X86_CPU,
+ .instance_init = max_x86_cpu_initfn,
+ .class_init = max_x86_cpu_class_init,
+};
+
+#ifdef CONFIG_KVM
+
+static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
+{
+ X86CPUClass *xcc = X86_CPU_CLASS(oc);
+
+ xcc->kvm_required = true;
+ xcc->ordering = 8;
+
+ xcc->model_description =
+ "KVM processor with all supported host features "
+ "(only available in KVM mode)";
+}
+
static const TypeInfo host_x86_cpu_type_info = {
.name = X86_CPU_TYPE_NAME("host"),
- .parent = TYPE_X86_CPU,
- .instance_init = host_x86_cpu_initfn,
+ .parent = X86_CPU_TYPE_NAME("max"),
.class_init = host_x86_cpu_class_init,
};
@@ -2060,7 +2086,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features,
}
}
-static void x86_cpu_load_features(X86CPU *cpu, Error **errp);
+static void x86_cpu_expand_features(X86CPU *cpu, Error **errp);
static int x86_cpu_filter_features(X86CPU *cpu);
/* Check for missing features that may prevent the CPU class from
@@ -2083,9 +2109,9 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc,
xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc))));
- x86_cpu_load_features(xc, &err);
+ x86_cpu_expand_features(xc, &err);
if (err) {
- /* Errors at x86_cpu_load_features should never happen,
+ /* Errors at x86_cpu_expand_features should never happen,
* but in case it does, just report the model as not
* runnable at all using the "type" property.
*/
@@ -2128,7 +2154,7 @@ static void listflags(FILE *f, fprintf_function print, const char **featureset)
}
}
-/* Sort alphabetically by type name, listing kvm_required models last. */
+/* Sort alphabetically by type name, respecting X86CPUClass::ordering. */
static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
{
ObjectClass *class_a = (ObjectClass *)a;
@@ -2137,9 +2163,8 @@ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
X86CPUClass *cc_b = X86_CPU_CLASS(class_b);
const char *name_a, *name_b;
- if (cc_a->kvm_required != cc_b->kvm_required) {
- /* kvm_required items go last */
- return cc_a->kvm_required ? 1 : -1;
+ if (cc_a->ordering != cc_b->ordering) {
+ return cc_a->ordering - cc_b->ordering;
} else {
name_a = object_class_get_name(class_a);
name_b = object_class_get_name(class_b);
@@ -2161,7 +2186,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data)
CPUListState *s = user_data;
char *name = x86_cpu_class_get_model_name(cc);
const char *desc = cc->model_description;
- if (!desc) {
+ if (!desc && cc->cpu_def) {
desc = cc->cpu_def->model_id;
}
@@ -2210,6 +2235,7 @@ static void x86_cpu_definition_entry(gpointer data, gpointer user_data)
info->q_typename = g_strdup(object_class_get_name(oc));
info->migration_safe = cc->migration_safe;
info->has_migration_safe = true;
+ info->q_static = cc->static_model;
entry = g_malloc0(sizeof(*entry));
entry->value = info;
@@ -2247,31 +2273,6 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
return r;
}
-/*
- * Filters CPU feature words based on host availability of each feature.
- *
- * Returns: 0 if all flags are supported by the host, non-zero otherwise.
- */
-static int x86_cpu_filter_features(X86CPU *cpu)
-{
- CPUX86State *env = &cpu->env;
- FeatureWord w;
- int rv = 0;
-
- for (w = 0; w < FEATURE_WORDS; w++) {
- uint32_t host_feat =
- x86_cpu_get_supported_feature_word(w, false);
- uint32_t requested_features = env->features[w];
- env->features[w] &= host_feat;
- cpu->filtered_features[w] = requested_features & ~env->features[w];
- if (cpu->filtered_features[w]) {
- rv = 1;
- }
- }
-
- return rv;
-}
-
static void x86_cpu_report_filtered_features(X86CPU *cpu)
{
FeatureWord w;
@@ -2293,7 +2294,7 @@ static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
}
}
-/* Load data from X86CPUDefinition
+/* Load data from X86CPUDefinition into a X86CPU object
*/
static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
{
@@ -2302,6 +2303,11 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
char host_vendor[CPUID_VENDOR_SZ + 1];
FeatureWord w;
+ /*NOTE: any property set by this function should be returned by
+ * x86_cpu_static_props(), so static expansion of
+ * query-cpu-model-expansion is always complete.
+ */
+
/* CPU models only set _minimum_ values for level/xlevel: */
object_property_set_int(OBJECT(cpu), def->level, "min-level", errp);
object_property_set_int(OBJECT(cpu), def->xlevel, "min-xlevel", errp);
@@ -2346,6 +2352,212 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
}
+/* Return a QDict containing keys for all properties that can be included
+ * in static expansion of CPU models. All properties set by x86_cpu_load_def()
+ * must be included in the dictionary.
+ */
+static QDict *x86_cpu_static_props(void)
+{
+ FeatureWord w;
+ int i;
+ static const char *props[] = {
+ "min-level",
+ "min-xlevel",
+ "family",
+ "model",
+ "stepping",
+ "model-id",
+ "vendor",
+ "lmce",
+ NULL,
+ };
+ static QDict *d;
+
+ if (d) {
+ return d;
+ }
+
+ d = qdict_new();
+ for (i = 0; props[i]; i++) {
+ qdict_put_obj(d, props[i], qnull());
+ }
+
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ FeatureWordInfo *fi = &feature_word_info[w];
+ int bit;
+ for (bit = 0; bit < 32; bit++) {
+ if (!fi->feat_names[bit]) {
+ continue;
+ }
+ qdict_put_obj(d, fi->feat_names[bit], qnull());
+ }
+ }
+
+ return d;
+}
+
+/* Add an entry to @props dict, with the value for property. */
+static void x86_cpu_expand_prop(X86CPU *cpu, QDict *props, const char *prop)
+{
+ QObject *value = object_property_get_qobject(OBJECT(cpu), prop,
+ &error_abort);
+
+ qdict_put_obj(props, prop, value);
+}
+
+/* Convert CPU model data from X86CPU object to a property dictionary
+ * that can recreate exactly the same CPU model.
+ */
+static void x86_cpu_to_dict(X86CPU *cpu, QDict *props)
+{
+ QDict *sprops = x86_cpu_static_props();
+ const QDictEntry *e;
+
+ for (e = qdict_first(sprops); e; e = qdict_next(sprops, e)) {
+ const char *prop = qdict_entry_key(e);
+ x86_cpu_expand_prop(cpu, props, prop);
+ }
+}
+
+/* Convert CPU model data from X86CPU object to a property dictionary
+ * that can recreate exactly the same CPU model, including every
+ * writeable QOM property.
+ */
+static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props)
+{
+ ObjectPropertyIterator iter;
+ ObjectProperty *prop;
+
+ object_property_iter_init(&iter, OBJECT(cpu));
+ while ((prop = object_property_iter_next(&iter))) {
+ /* skip read-only or write-only properties */
+ if (!prop->get || !prop->set) {
+ continue;
+ }
+
+ /* "hotplugged" is the only property that is configurable
+ * on the command-line but will be set differently on CPUs
+ * created using "-cpu ... -smp ..." and by CPUs created
+ * on the fly by x86_cpu_from_model() for querying. Skip it.
+ */
+ if (!strcmp(prop->name, "hotplugged")) {
+ continue;
+ }
+ x86_cpu_expand_prop(cpu, props, prop->name);
+ }
+}
+
+static void object_apply_props(Object *obj, QDict *props, Error **errp)
+{
+ const QDictEntry *prop;
+ Error *err = NULL;
+
+ for (prop = qdict_first(props); prop; prop = qdict_next(props, prop)) {
+ object_property_set_qobject(obj, qdict_entry_value(prop),
+ qdict_entry_key(prop), &err);
+ if (err) {
+ break;
+ }
+ }
+
+ error_propagate(errp, err);
+}
+
+/* Create X86CPU object according to model+props specification */
+static X86CPU *x86_cpu_from_model(const char *model, QDict *props, Error **errp)
+{
+ X86CPU *xc = NULL;
+ X86CPUClass *xcc;
+ Error *err = NULL;
+
+ xcc = X86_CPU_CLASS(cpu_class_by_name(TYPE_X86_CPU, model));
+ if (xcc == NULL) {
+ error_setg(&err, "CPU model '%s' not found", model);
+ goto out;
+ }
+
+ xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc))));
+ if (props) {
+ object_apply_props(OBJECT(xc), props, &err);
+ if (err) {
+ goto out;
+ }
+ }
+
+ x86_cpu_expand_features(xc, &err);
+ if (err) {
+ goto out;
+ }
+
+out:
+ if (err) {
+ error_propagate(errp, err);
+ object_unref(OBJECT(xc));
+ xc = NULL;
+ }
+ return xc;
+}
+
+CpuModelExpansionInfo *
+arch_query_cpu_model_expansion(CpuModelExpansionType type,
+ CpuModelInfo *model,
+ Error **errp)
+{
+ X86CPU *xc = NULL;
+ Error *err = NULL;
+ CpuModelExpansionInfo *ret = g_new0(CpuModelExpansionInfo, 1);
+ QDict *props = NULL;
+ const char *base_name;
+
+ xc = x86_cpu_from_model(model->name,
+ model->has_props ?
+ qobject_to_qdict(model->props) :
+ NULL, &err);
+ if (err) {
+ goto out;
+ }
+
+ props = qdict_new();
+
+ switch (type) {
+ case CPU_MODEL_EXPANSION_TYPE_STATIC:
+ /* Static expansion will be based on "base" only */
+ base_name = "base";
+ x86_cpu_to_dict(xc, props);
+ break;
+ case CPU_MODEL_EXPANSION_TYPE_FULL:
+ /* As we don't return every single property, full expansion needs
+ * to keep the original model name+props, and add extra
+ * properties on top of that.
+ */
+ base_name = model->name;
+ x86_cpu_to_dict_full(xc, props);
+ break;
+ default:
+ error_setg(&err, "Unsupportted expansion type");
+ goto out;
+ }
+
+ if (!props) {
+ props = qdict_new();
+ }
+ x86_cpu_to_dict(xc, props);
+
+ ret->model = g_new0(CpuModelInfo, 1);
+ ret->model->name = g_strdup(base_name);
+ ret->model->props = QOBJECT(props);
+ ret->model->has_props = true;
+
+out:
+ object_unref(OBJECT(xc));
+ if (err) {
+ error_propagate(errp, err);
+ qapi_free_CpuModelExpansionInfo(ret);
+ ret = NULL;
+ }
+ return ret;
+}
+
X86CPU *cpu_x86_init(const char *cpu_model)
{
return X86_CPU(cpu_generic_init(TYPE_X86_CPU, cpu_model));
@@ -3095,20 +3307,59 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
}
-/* Load CPUID data based on configured features */
-static void x86_cpu_load_features(X86CPU *cpu, Error **errp)
+/***** Steps involved on loading and filtering CPUID data
+ *
+ * When initializing and realizing a CPU object, the steps
+ * involved in setting up CPUID data are:
+ *
+ * 1) Loading CPU model definition (X86CPUDefinition). This is
+ * implemented by x86_cpu_load_def() and should be completely
+ * transparent, as it is done automatically by instance_init.
+ * No code should need to look at X86CPUDefinition structs
+ * outside instance_init.
+ *
+ * 2) CPU expansion. This is done by realize before CPUID
+ * filtering, and will make sure host/accelerator data is
+ * loaded for CPU models that depend on host capabilities
+ * (e.g. "host"). Done by x86_cpu_expand_features().
+ *
+ * 3) CPUID filtering. This initializes extra data related to
+ * CPUID, and checks if the host supports all capabilities
+ * required by the CPU. Runnability of a CPU model is
+ * determined at this step. Done by x86_cpu_filter_features().
+ *
+ * Some operations don't require all steps to be performed.
+ * More precisely:
+ *
+ * - CPU instance creation (instance_init) will run only CPU
+ * model loading. CPU expansion can't run at instance_init-time
+ * because host/accelerator data may be not available yet.
+ * - CPU realization will perform both CPU model expansion and CPUID
+ * filtering, and return an error in case one of them fails.
+ * - query-cpu-definitions needs to run all 3 steps. It needs
+ * to run CPUID filtering, as the 'unavailable-features'
+ * field is set based on the filtering results.
+ * - The query-cpu-model-expansion QMP command only needs to run
+ * CPU model loading and CPU expansion. It should not filter
+ * any CPUID data based on host capabilities.
+ */
+
+/* Expand CPU configuration data, based on configured features
+ * and host/accelerator capabilities when appropriate.
+ */
+static void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
{
CPUX86State *env = &cpu->env;
FeatureWord w;
GList *l;
Error *local_err = NULL;
- /*TODO: cpu->host_features incorrectly overwrites features
+ /*TODO: cpu->max_features incorrectly overwrites features
* set using "feat=on|off". Once we fix this, we can convert
* plus_features & minus_features to global properties
* inside x86_cpu_parse_featurestr() too.
*/
- if (cpu->host_features) {
+ if (cpu->max_features) {
for (w = 0; w < FEATURE_WORDS; w++) {
env->features[w] =
x86_cpu_get_supported_feature_word(w, cpu->migratable);
@@ -3173,6 +3424,32 @@ out:
}
}
+/*
+ * Finishes initialization of CPUID data, filters CPU feature
+ * words based on host availability of each feature.
+ *
+ * Returns: 0 if all flags are supported by the host, non-zero otherwise.
+ */
+static int x86_cpu_filter_features(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ FeatureWord w;
+ int rv = 0;
+
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ uint32_t host_feat =
+ x86_cpu_get_supported_feature_word(w, false);
+ uint32_t requested_features = env->features[w];
+ env->features[w] &= host_feat;
+ cpu->filtered_features[w] = requested_features & ~env->features[w];
+ if (cpu->filtered_features[w]) {
+ rv = 1;
+ }
+ }
+
+ return rv;
+}
+
#define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \
(env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \
(env)->cpuid_vendor3 == CPUID_VENDOR_INTEL_3)
@@ -3200,7 +3477,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
return;
}
- x86_cpu_load_features(cpu, &local_err);
+ x86_cpu_expand_features(cpu, &local_err);
if (local_err) {
goto out;
}
@@ -3619,7 +3896,9 @@ static void x86_cpu_initfn(Object *obj)
object_property_add_alias(obj, "sse4_1", obj, "sse4.1", &error_abort);
object_property_add_alias(obj, "sse4_2", obj, "sse4.2", &error_abort);
- x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort);
+ if (xcc->cpu_def) {
+ x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort);
+ }
}
static int64_t x86_cpu_get_arch_id(CPUState *cs)
@@ -3774,6 +4053,24 @@ static const TypeInfo x86_cpu_type_info = {
.class_init = x86_cpu_common_class_init,
};
+
+/* "base" CPU model, used by query-cpu-model-expansion */
+static void x86_cpu_base_class_init(ObjectClass *oc, void *data)
+{
+ X86CPUClass *xcc = X86_CPU_CLASS(oc);
+
+ xcc->static_model = true;
+ xcc->migration_safe = true;
+ xcc->model_description = "base CPU model type with no features enabled";
+ xcc->ordering = 8;
+}
+
+static const TypeInfo x86_base_cpu_type_info = {
+ .name = X86_CPU_TYPE_NAME("base"),
+ .parent = TYPE_X86_CPU,
+ .class_init = x86_cpu_base_class_init,
+};
+
static void x86_cpu_register_types(void)
{
int i;
@@ -3782,6 +4079,8 @@ static void x86_cpu_register_types(void)
for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
x86_register_cpudef_type(&builtin_x86_defs[i]);
}
+ type_register_static(&max_x86_cpu_type_info);
+ type_register_static(&x86_base_cpu_type_info);
#ifdef CONFIG_KVM
type_register_static(&host_x86_cpu_type_info);
#endif
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 573f2aa..12a39d5 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1211,7 +1211,7 @@ struct X86CPU {
bool enforce_cpuid;
bool expose_kvm;
bool migratable;
- bool host_features;
+ bool max_features; /* Enable all supported features automatically */
uint32_t apic_id;
/* Enables publishing of TSC increment and Local APIC bus frequencies to
diff --git a/target/ppc/Makefile.objs b/target/ppc/Makefile.objs
index a8c7a30..0057b31 100644
--- a/target/ppc/Makefile.objs
+++ b/target/ppc/Makefile.objs
@@ -1,8 +1,9 @@
obj-y += cpu-models.o
+obj-y += cpu.o
obj-y += translate.o
ifeq ($(CONFIG_SOFTMMU),y)
-obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o
-obj-$(TARGET_PPC64) += mmu-hash64.o arch_dump.o compat.o
+obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o arch_dump.o
+obj-$(TARGET_PPC64) += mmu-hash64.o compat.o
endif
obj-$(CONFIG_KVM) += kvm.o
obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c
index 40282a1..28d9cc7 100644
--- a/target/ppc/arch_dump.c
+++ b/target/ppc/arch_dump.c
@@ -1,5 +1,5 @@
/*
- * writing ELF notes for ppc64 arch
+ * writing ELF notes for ppc{64,} arch
*
*
* Copyright IBM, Corp. 2013
@@ -19,36 +19,48 @@
#include "sysemu/dump.h"
#include "sysemu/kvm.h"
-struct PPC64UserRegStruct {
- uint64_t gpr[32];
- uint64_t nip;
- uint64_t msr;
- uint64_t orig_gpr3;
- uint64_t ctr;
- uint64_t link;
- uint64_t xer;
- uint64_t ccr;
- uint64_t softe;
- uint64_t trap;
- uint64_t dar;
- uint64_t dsisr;
- uint64_t result;
+#ifdef TARGET_PPC64
+#define ELFCLASS ELFCLASS64
+#define cpu_to_dump_reg cpu_to_dump64
+typedef uint64_t reg_t;
+typedef Elf64_Nhdr Elf_Nhdr;
+#else
+#define ELFCLASS ELFCLASS32
+#define cpu_to_dump_reg cpu_to_dump32
+typedef uint32_t reg_t;
+typedef Elf32_Nhdr Elf_Nhdr;
+#endif /* TARGET_PPC64 */
+
+struct PPCUserRegStruct {
+ reg_t gpr[32];
+ reg_t nip;
+ reg_t msr;
+ reg_t orig_gpr3;
+ reg_t ctr;
+ reg_t link;
+ reg_t xer;
+ reg_t ccr;
+ reg_t softe;
+ reg_t trap;
+ reg_t dar;
+ reg_t dsisr;
+ reg_t result;
} QEMU_PACKED;
-struct PPC64ElfPrstatus {
+struct PPCElfPrstatus {
char pad1[112];
- struct PPC64UserRegStruct pr_reg;
- uint64_t pad2[4];
+ struct PPCUserRegStruct pr_reg;
+ reg_t pad2[4];
} QEMU_PACKED;
-struct PPC64ElfFpregset {
+struct PPCElfFpregset {
uint64_t fpr[32];
- uint64_t fpscr;
+ reg_t fpscr;
} QEMU_PACKED;
-struct PPC64ElfVmxregset {
+struct PPCElfVmxregset {
ppc_avr_t avr[32];
ppc_avr_t vscr;
union {
@@ -57,26 +69,26 @@ struct PPC64ElfVmxregset {
} vrsave;
} QEMU_PACKED;
-struct PPC64ElfVsxregset {
+struct PPCElfVsxregset {
uint64_t vsr[32];
} QEMU_PACKED;
-struct PPC64ElfSperegset {
+struct PPCElfSperegset {
uint32_t evr[32];
uint64_t spe_acc;
uint32_t spe_fscr;
} QEMU_PACKED;
typedef struct noteStruct {
- Elf64_Nhdr hdr;
+ Elf_Nhdr hdr;
char name[5];
char pad3[3];
union {
- struct PPC64ElfPrstatus prstatus;
- struct PPC64ElfFpregset fpregset;
- struct PPC64ElfVmxregset vmxregset;
- struct PPC64ElfVsxregset vsxregset;
- struct PPC64ElfSperegset speregset;
+ struct PPCElfPrstatus prstatus;
+ struct PPCElfFpregset fpregset;
+ struct PPCElfVmxregset vmxregset;
+ struct PPCElfVsxregset vsxregset;
+ struct PPCElfSperegset speregset;
} contents;
} QEMU_PACKED Note;
@@ -85,12 +97,12 @@ typedef struct NoteFuncArg {
DumpState *state;
} NoteFuncArg;
-static void ppc64_write_elf64_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
{
int i;
- uint64_t cr;
- struct PPC64ElfPrstatus *prstatus;
- struct PPC64UserRegStruct *reg;
+ reg_t cr;
+ struct PPCElfPrstatus *prstatus;
+ struct PPCUserRegStruct *reg;
Note *note = &arg->note;
DumpState *s = arg->state;
@@ -101,25 +113,25 @@ static void ppc64_write_elf64_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
reg = &prstatus->pr_reg;
for (i = 0; i < 32; i++) {
- reg->gpr[i] = cpu_to_dump64(s, cpu->env.gpr[i]);
+ reg->gpr[i] = cpu_to_dump_reg(s, cpu->env.gpr[i]);
}
- reg->nip = cpu_to_dump64(s, cpu->env.nip);
- reg->msr = cpu_to_dump64(s, cpu->env.msr);
- reg->ctr = cpu_to_dump64(s, cpu->env.ctr);
- reg->link = cpu_to_dump64(s, cpu->env.lr);
- reg->xer = cpu_to_dump64(s, cpu_read_xer(&cpu->env));
+ reg->nip = cpu_to_dump_reg(s, cpu->env.nip);
+ reg->msr = cpu_to_dump_reg(s, cpu->env.msr);
+ reg->ctr = cpu_to_dump_reg(s, cpu->env.ctr);
+ reg->link = cpu_to_dump_reg(s, cpu->env.lr);
+ reg->xer = cpu_to_dump_reg(s, cpu_read_xer(&cpu->env));
cr = 0;
for (i = 0; i < 8; i++) {
cr |= (cpu->env.crf[i] & 15) << (4 * (7 - i));
}
- reg->ccr = cpu_to_dump64(s, cr);
+ reg->ccr = cpu_to_dump_reg(s, cr);
}
-static void ppc64_write_elf64_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
{
int i;
- struct PPC64ElfFpregset *fpregset;
+ struct PPCElfFpregset *fpregset;
Note *note = &arg->note;
DumpState *s = arg->state;
@@ -131,13 +143,13 @@ static void ppc64_write_elf64_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
for (i = 0; i < 32; i++) {
fpregset->fpr[i] = cpu_to_dump64(s, cpu->env.fpr[i]);
}
- fpregset->fpscr = cpu_to_dump64(s, cpu->env.fpscr);
+ fpregset->fpscr = cpu_to_dump_reg(s, cpu->env.fpscr);
}
-static void ppc64_write_elf64_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
{
int i;
- struct PPC64ElfVmxregset *vmxregset;
+ struct PPCElfVmxregset *vmxregset;
Note *note = &arg->note;
DumpState *s = arg->state;
@@ -164,10 +176,11 @@ static void ppc64_write_elf64_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
}
vmxregset->vscr.u32[3] = cpu_to_dump32(s, cpu->env.vscr);
}
-static void ppc64_write_elf64_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+
+static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
{
int i;
- struct PPC64ElfVsxregset *vsxregset;
+ struct PPCElfVsxregset *vsxregset;
Note *note = &arg->note;
DumpState *s = arg->state;
@@ -179,9 +192,10 @@ static void ppc64_write_elf64_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
vsxregset->vsr[i] = cpu_to_dump64(s, cpu->env.vsr[i]);
}
}
-static void ppc64_write_elf64_speregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+
+static void ppc_write_elf_speregset(NoteFuncArg *arg, PowerPCCPU *cpu)
{
- struct PPC64ElfSperegset *speregset;
+ struct PPCElfSperegset *speregset;
Note *note = &arg->note;
DumpState *s = arg->state;
@@ -197,11 +211,11 @@ static const struct NoteFuncDescStruct {
int contents_size;
void (*note_contents_func)(NoteFuncArg *arg, PowerPCCPU *cpu);
} note_func[] = {
- {sizeof(((Note *)0)->contents.prstatus), ppc64_write_elf64_prstatus},
- {sizeof(((Note *)0)->contents.fpregset), ppc64_write_elf64_fpregset},
- {sizeof(((Note *)0)->contents.vmxregset), ppc64_write_elf64_vmxregset},
- {sizeof(((Note *)0)->contents.vsxregset), ppc64_write_elf64_vsxregset},
- {sizeof(((Note *)0)->contents.speregset), ppc64_write_elf64_speregset},
+ {sizeof(((Note *)0)->contents.prstatus), ppc_write_elf_prstatus},
+ {sizeof(((Note *)0)->contents.fpregset), ppc_write_elf_fpregset},
+ {sizeof(((Note *)0)->contents.vmxregset), ppc_write_elf_vmxregset},
+ {sizeof(((Note *)0)->contents.vsxregset), ppc_write_elf_vsxregset},
+ {sizeof(((Note *)0)->contents.speregset), ppc_write_elf_speregset},
{ 0, NULL}
};
@@ -213,8 +227,9 @@ int cpu_get_dump_info(ArchDumpInfo *info,
PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
- info->d_machine = EM_PPC64;
- info->d_class = ELFCLASS64;
+ info->d_machine = PPC_ELF_MACHINE;
+ info->d_class = ELFCLASS;
+
if ((*pcc->interrupts_big_endian)(cpu)) {
info->d_endian = ELFDATA2MSB;
} else {
@@ -236,25 +251,19 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
int note_head_size;
const NoteFuncDesc *nf;
- if (class != ELFCLASS64) {
- return -1;
- }
- assert(machine == EM_PPC64);
-
- note_head_size = sizeof(Elf64_Nhdr);
-
+ note_head_size = sizeof(Elf_Nhdr);
for (nf = note_func; nf->note_contents_func; nf++) {
elf_note_size = elf_note_size + note_head_size + name_size +
- nf->contents_size;
+ nf->contents_size;
}
return (elf_note_size) * nr_cpus;
}
-static int ppc64_write_all_elf64_notes(const char *note_name,
- WriteCoreDumpFunction f,
- PowerPCCPU *cpu, int id,
- void *opaque)
+static int ppc_write_all_elf_notes(const char *note_name,
+ WriteCoreDumpFunction f,
+ PowerPCCPU *cpu, int id,
+ void *opaque)
{
NoteFuncArg arg = { .state = opaque };
int ret = -1;
@@ -282,5 +291,12 @@ int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
int cpuid, void *opaque)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
- return ppc64_write_all_elf64_notes("CORE", f, cpu, cpuid, opaque);
+ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque);
+}
+
+int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
+ int cpuid, void *opaque)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque);
}
diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
new file mode 100644
index 0000000..2801166
--- /dev/null
+++ b/target/ppc/cpu.c
@@ -0,0 +1,47 @@
+/*
+ * PowerPC CPU routines for qemu.
+ *
+ * Copyright (c) 2017 Nikunj A Dadhania, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "cpu-models.h"
+
+target_ulong cpu_read_xer(CPUPPCState *env)
+{
+ if (is_isa300(env)) {
+ return env->xer | (env->so << XER_SO) |
+ (env->ov << XER_OV) | (env->ca << XER_CA) |
+ (env->ov32 << XER_OV32) | (env->ca32 << XER_CA32);
+ }
+
+ return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) |
+ (env->ca << XER_CA);
+}
+
+void cpu_write_xer(CPUPPCState *env, target_ulong xer)
+{
+ env->so = (xer >> XER_SO) & 1;
+ env->ov = (xer >> XER_OV) & 1;
+ env->ca = (xer >> XER_CA) & 1;
+ /* write all the flags, while reading back check of isa300 */
+ env->ov32 = (xer >> XER_OV32) & 1;
+ env->ca32 = (xer >> XER_CA32) & 1;
+ env->xer = xer & ~((1ul << XER_SO) |
+ (1ul << XER_OV) | (1ul << XER_CA) |
+ (1ul << XER_OV32) | (1ul << XER_CA32));
+}
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 425e79d..d33c17e 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -223,11 +223,12 @@ enum {
typedef struct opc_handler_t opc_handler_t;
/*****************************************************************************/
-/* Types used to describe some PowerPC registers */
+/* Types used to describe some PowerPC registers etc. */
typedef struct DisasContext DisasContext;
typedef struct ppc_spr_t ppc_spr_t;
typedef union ppc_avr_t ppc_avr_t;
typedef union ppc_tlb_t ppc_tlb_t;
+typedef struct ppc_hash_pte64 ppc_hash_pte64_t;
/* SPR access micro-ops generations callbacks */
struct ppc_spr_t {
@@ -305,14 +306,6 @@ union ppc_tlb_t {
#define TLB_MAS 3
#endif
-#define SDR_32_HTABORG 0xFFFF0000UL
-#define SDR_32_HTABMASK 0x000001FFUL
-
-#if defined(TARGET_PPC64)
-#define SDR_64_HTABORG 0xFFFFFFFFFFFC0000ULL
-#define SDR_64_HTABSIZE 0x000000000000001FULL
-#endif /* defined(TARGET_PPC64 */
-
typedef struct ppc_slb_t ppc_slb_t;
struct ppc_slb_t {
uint64_t esid;
@@ -965,6 +958,8 @@ struct CPUPPCState {
target_ulong so;
target_ulong ov;
target_ulong ca;
+ target_ulong ov32;
+ target_ulong ca32;
/* Reservation address */
target_ulong reserve_addr;
/* Reservation value */
@@ -1005,12 +1000,7 @@ struct CPUPPCState {
/* tcg TLB needs flush (deferred slb inval instruction typically) */
#endif
/* segment registers */
- hwaddr htab_base;
- /* mask used to normalize hash value to PTEG index */
- hwaddr htab_mask;
target_ulong sr[32];
- /* externally stored hash table */
- uint8_t *external_htab;
/* BATs */
uint32_t nb_BATs;
target_ulong DBAT[2][8];
@@ -1218,6 +1208,14 @@ struct PPCVirtualHypervisor {
struct PPCVirtualHypervisorClass {
InterfaceClass parent;
void (*hypercall)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu);
+ hwaddr (*hpt_mask)(PPCVirtualHypervisor *vhyp);
+ const ppc_hash_pte64_t *(*map_hptes)(PPCVirtualHypervisor *vhyp,
+ hwaddr ptex, int n);
+ void (*unmap_hptes)(PPCVirtualHypervisor *vhyp,
+ const ppc_hash_pte64_t *hptes,
+ hwaddr ptex, int n);
+ void (*store_hpte)(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+ uint64_t pte0, uint64_t pte1);
};
#define TYPE_PPC_VIRTUAL_HYPERVISOR "ppc-virtual-hypervisor"
@@ -1243,6 +1241,8 @@ int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg);
int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
int cpuid, void *opaque);
+int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
+ int cpuid, void *opaque);
#ifndef CONFIG_USER_ONLY
void ppc_cpu_do_system_reset(CPUState *cs);
extern const struct VMStateDescription vmstate_ppc_cpu;
@@ -1300,8 +1300,7 @@ void store_booke_tcr (CPUPPCState *env, target_ulong val);
void store_booke_tsr (CPUPPCState *env, target_ulong val);
void ppc_tlb_invalidate_all (CPUPPCState *env);
void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr);
-void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp);
-void cpu_ppc_set_papr(PowerPCCPU *cpu);
+void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp);
#endif
#endif
@@ -1372,11 +1371,15 @@ int ppc_compat_max_threads(PowerPCCPU *cpu);
#define XER_SO 31
#define XER_OV 30
#define XER_CA 29
+#define XER_OV32 19
+#define XER_CA32 18
#define XER_CMP 8
#define XER_BC 0
#define xer_so (env->so)
#define xer_ov (env->ov)
#define xer_ca (env->ca)
+#define xer_ov32 (env->ov)
+#define xer_ca32 (env->ca)
#define xer_cmp ((env->xer >> XER_CMP) & 0xFF)
#define xer_bc ((env->xer >> XER_BC) & 0x7F)
@@ -2343,18 +2346,9 @@ enum {
/*****************************************************************************/
-static inline target_ulong cpu_read_xer(CPUPPCState *env)
-{
- return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) | (env->ca << XER_CA);
-}
-
-static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer)
-{
- env->so = (xer >> XER_SO) & 1;
- env->ov = (xer >> XER_OV) & 1;
- env->ca = (xer >> XER_CA) & 1;
- env->xer = xer & ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA));
-}
+#define is_isa300(ctx) (!!(ctx->insns_flags2 & PPC2_ISA300))
+target_ulong cpu_read_xer(CPUPPCState *env);
+void cpu_write_xer(CPUPPCState *env, target_ulong xer);
static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
target_ulong *cs_base, uint32_t *flags)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index dd0a892..da4e1a6 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -28,6 +28,15 @@
/*****************************************************************************/
/* Fixed point operations helpers */
+static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
+{
+ if (unlikely(ov)) {
+ env->so = env->ov = 1;
+ } else {
+ env->ov = 0;
+ }
+}
+
target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
uint32_t oe)
{
@@ -49,11 +58,7 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
}
if (oe) {
- if (unlikely(overflow)) {
- env->so = env->ov = 1;
- } else {
- env->ov = 0;
- }
+ helper_update_ov_legacy(env, overflow);
}
return (target_ulong)rt;
@@ -81,11 +86,7 @@ target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
}
if (oe) {
- if (unlikely(overflow)) {
- env->so = env->ov = 1;
- } else {
- env->ov = 0;
- }
+ helper_update_ov_legacy(env, overflow);
}
return (target_ulong)rt;
@@ -105,11 +106,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
}
if (oe) {
- if (unlikely(overflow)) {
- env->so = env->ov = 1;
- } else {
- env->ov = 0;
- }
+ helper_update_ov_legacy(env, overflow);
}
return rt;
@@ -127,12 +124,7 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
}
if (oe) {
-
- if (unlikely(overflow)) {
- env->so = env->ov = 1;
- } else {
- env->ov = 0;
- }
+ helper_update_ov_legacy(env, overflow);
}
return rt;
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 52bbea5..acc40ec 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1251,7 +1251,7 @@ static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
return ret;
}
- if (!env->external_htab) {
+ if (!cpu->vhyp) {
ppc_store_sdr1(env, sregs.u.s.sdr1);
}
@@ -2596,89 +2596,85 @@ void kvm_arch_init_irq_routing(KVMState *s)
{
}
-struct kvm_get_htab_buf {
- struct kvm_get_htab_header header;
- /*
- * We require one extra byte for read
- */
- target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
-};
-
-uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
+void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
{
- int htab_fd;
- struct kvm_get_htab_fd ghf;
- struct kvm_get_htab_buf *hpte_buf;
+ struct kvm_get_htab_fd ghf = {
+ .flags = 0,
+ .start_index = ptex,
+ };
+ int fd, rc;
+ int i;
- ghf.flags = 0;
- ghf.start_index = pte_index;
- htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
- if (htab_fd < 0) {
- goto error_out;
+ fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
+ if (fd < 0) {
+ hw_error("kvmppc_read_hptes: Unable to open HPT fd");
}
- hpte_buf = g_malloc0(sizeof(*hpte_buf));
- /*
- * Read the hpte group
- */
- if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
- goto out_close;
- }
+ i = 0;
+ while (i < n) {
+ struct kvm_get_htab_header *hdr;
+ int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
+ char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
- close(htab_fd);
- return (uint64_t)(uintptr_t) hpte_buf->hpte;
+ rc = read(fd, buf, sizeof(buf));
+ if (rc < 0) {
+ hw_error("kvmppc_read_hptes: Unable to read HPTEs");
+ }
-out_close:
- g_free(hpte_buf);
- close(htab_fd);
-error_out:
- return 0;
-}
+ hdr = (struct kvm_get_htab_header *)buf;
+ while ((i < n) && ((char *)hdr < (buf + rc))) {
+ int invalid = hdr->n_invalid;
-void kvmppc_hash64_free_pteg(uint64_t token)
-{
- struct kvm_get_htab_buf *htab_buf;
+ if (hdr->index != (ptex + i)) {
+ hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
+ " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
+ }
+
+ memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
+ i += hdr->n_valid;
- htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
- hpte);
- g_free(htab_buf);
- return;
+ if ((n - i) < invalid) {
+ invalid = n - i;
+ }
+ memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
+ i += hdr->n_invalid;
+
+ hdr = (struct kvm_get_htab_header *)
+ ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
+ }
+ }
+
+ close(fd);
}
-void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
- target_ulong pte0, target_ulong pte1)
+void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
{
- int htab_fd;
+ int fd, rc;
struct kvm_get_htab_fd ghf;
- struct kvm_get_htab_buf hpte_buf;
+ struct {
+ struct kvm_get_htab_header hdr;
+ uint64_t pte0;
+ uint64_t pte1;
+ } buf;
ghf.flags = 0;
ghf.start_index = 0; /* Ignored */
- htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
- if (htab_fd < 0) {
- goto error_out;
- }
-
- hpte_buf.header.n_valid = 1;
- hpte_buf.header.n_invalid = 0;
- hpte_buf.header.index = pte_index;
- hpte_buf.hpte[0] = pte0;
- hpte_buf.hpte[1] = pte1;
- /*
- * Write the hpte entry.
- * CAUTION: write() has the warn_unused_result attribute. Hence we
- * need to check the return value, even though we do nothing.
- */
- if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
- goto out_close;
+ fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
+ if (fd < 0) {
+ hw_error("kvmppc_write_hpte: Unable to open HPT fd");
}
-out_close:
- close(htab_fd);
- return;
+ buf.hdr.n_valid = 1;
+ buf.hdr.n_invalid = 0;
+ buf.hdr.index = ptex;
+ buf.pte0 = cpu_to_be64(pte0);
+ buf.pte1 = cpu_to_be64(pte1);
-error_out:
- return;
+ rc = write(fd, &buf, sizeof(buf));
+ if (rc != sizeof(buf)) {
+ hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
+ }
+ close(fd);
}
int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 8da2ee4..8e9f42d 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -49,11 +49,8 @@ int kvmppc_get_htab_fd(bool write);
int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
uint16_t n_valid, uint16_t n_invalid);
-uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index);
-void kvmppc_hash64_free_pteg(uint64_t token);
-
-void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
- target_ulong pte0, target_ulong pte1);
+void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n);
+void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1);
bool kvmppc_has_cap_fixup_hcalls(void);
bool kvmppc_has_cap_htm(void);
int kvmppc_enable_hwrng(void);
@@ -234,20 +231,13 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
abort();
}
-static inline uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu,
- target_ulong pte_index)
-{
- abort();
-}
-
-static inline void kvmppc_hash64_free_pteg(uint64_t token)
+static inline void kvmppc_read_hptes(ppc_hash_pte64_t *hptes,
+ hwaddr ptex, int n)
{
abort();
}
-static inline void kvmppc_hash64_write_pte(CPUPPCState *env,
- target_ulong pte_index,
- target_ulong pte0, target_ulong pte1)
+static inline void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
{
abort();
}
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index df9f7a4..6cb3a48 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -76,7 +76,7 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
qemu_get_betls(f, &env->pb[i]);
for (i = 0; i < 1024; i++)
qemu_get_betls(f, &env->spr[i]);
- if (!env->external_htab) {
+ if (!cpu->vhyp) {
ppc_store_sdr1(env, sdr1);
}
qemu_get_be32s(f, &env->vscr);
@@ -228,8 +228,7 @@ static int cpu_post_load(void *opaque, int version_id)
env->IBAT[1][i+4] = env->spr[SPR_IBAT4U + 2*i + 1];
}
- if (!env->external_htab) {
- /* Restore htab_base and htab_mask variables */
+ if (!cpu->vhyp) {
ppc_store_sdr1(env, env->spr[SPR_SDR1]);
}
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index ab432ba..fa573dd 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -82,11 +82,9 @@ void helper_store_sdr1(CPUPPCState *env, target_ulong val)
{
PowerPCCPU *cpu = ppc_env_get_cpu(env);
- if (!env->external_htab) {
- if (env->spr[SPR_SDR1] != val) {
- ppc_store_sdr1(env, val);
- tlb_flush(CPU(cpu));
- }
+ if (env->spr[SPR_SDR1] != val) {
+ ppc_store_sdr1(env, val);
+ tlb_flush(CPU(cpu));
}
}
diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index 29bace6..03ae3c1 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -304,9 +304,9 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
{
- CPUPPCState *env = &cpu->env;
+ target_ulong mask = ppc_hash32_hpt_mask(cpu);
- return (hash * HASH_PTEG_SIZE_32) & env->htab_mask;
+ return (hash * HASH_PTEG_SIZE_32) & mask;
}
static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
@@ -339,7 +339,6 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
target_ulong sr, target_ulong eaddr,
ppc_hash_pte32_t *pte)
{
- CPUPPCState *env = &cpu->env;
hwaddr pteg_off, pte_offset;
hwaddr hash;
uint32_t vsid, pgidx, ptem;
@@ -353,21 +352,22 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
" htab_mask " TARGET_FMT_plx
" hash " TARGET_FMT_plx "\n",
- env->htab_base, env->htab_mask, hash);
+ ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
/* Primary PTEG lookup */
qemu_log_mask(CPU_LOG_MMU, "0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=%" PRIx32 " ptem=%" PRIx32
" hash=" TARGET_FMT_plx "\n",
- env->htab_base, env->htab_mask, vsid, ptem, hash);
+ ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu),
+ vsid, ptem, hash);
pteg_off = get_pteg_offset32(cpu, hash);
pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 0, ptem, pte);
if (pte_offset == -1) {
/* Secondary PTEG lookup */
qemu_log_mask(CPU_LOG_MMU, "1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=%" PRIx32 " api=%" PRIx32
- " hash=" TARGET_FMT_plx "\n", env->htab_base,
- env->htab_mask, vsid, ptem, ~hash);
+ " hash=" TARGET_FMT_plx "\n", ppc_hash32_hpt_base(cpu),
+ ppc_hash32_hpt_mask(cpu), vsid, ptem, ~hash);
pteg_off = get_pteg_offset32(cpu, ~hash);
pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 1, ptem, pte);
}
diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
index 5b9fb08..898021f 100644
--- a/target/ppc/mmu-hash32.h
+++ b/target/ppc/mmu-hash32.h
@@ -44,6 +44,8 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
/*
* Hash page table definitions
*/
+#define SDR_32_HTABORG 0xFFFF0000UL
+#define SDR_32_HTABMASK 0x000001FFUL
#define HPTES_PER_GROUP 8
#define HASH_PTE_SIZE_32 8
@@ -65,42 +67,46 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
#define HPTE32_R_WIMG 0x00000078
#define HPTE32_R_PP 0x00000003
+static inline hwaddr ppc_hash32_hpt_base(PowerPCCPU *cpu)
+{
+ return cpu->env.spr[SPR_SDR1] & SDR_32_HTABORG;
+}
+
+static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu)
+{
+ return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0xFFFF;
+}
+
static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu,
hwaddr pte_offset)
{
- CPUPPCState *env = &cpu->env;
+ target_ulong base = ppc_hash32_hpt_base(cpu);
- assert(!env->external_htab); /* Not supported on 32-bit for now */
- return ldl_phys(CPU(cpu)->as, env->htab_base + pte_offset);
+ return ldl_phys(CPU(cpu)->as, base + pte_offset);
}
static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu,
hwaddr pte_offset)
{
- CPUPPCState *env = &cpu->env;
+ target_ulong base = ppc_hash32_hpt_base(cpu);
- assert(!env->external_htab); /* Not supported on 32-bit for now */
- return ldl_phys(CPU(cpu)->as,
- env->htab_base + pte_offset + HASH_PTE_SIZE_32 / 2);
+ return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
}
static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu,
hwaddr pte_offset, target_ulong pte0)
{
- CPUPPCState *env = &cpu->env;
+ target_ulong base = ppc_hash32_hpt_base(cpu);
- assert(!env->external_htab); /* Not supported on 32-bit for now */
- stl_phys(CPU(cpu)->as, env->htab_base + pte_offset, pte0);
+ stl_phys(CPU(cpu)->as, base + pte_offset, pte0);
}
static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu,
hwaddr pte_offset, target_ulong pte1)
{
- CPUPPCState *env = &cpu->env;
+ target_ulong base = ppc_hash32_hpt_base(cpu);
- assert(!env->external_htab); /* Not supported on 32-bit for now */
- stl_phys(CPU(cpu)->as,
- env->htab_base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
+ stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
}
typedef struct {
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index 76669ed..d44f2bb 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -27,6 +27,7 @@
#include "kvm_ppc.h"
#include "mmu-hash64.h"
#include "exec/log.h"
+#include "hw/hw.h"
//#define DEBUG_SLB
@@ -37,12 +38,6 @@
#endif
/*
- * Used to indicate that a CPU has its hash page table (HPT) managed
- * within the host kernel
- */
-#define MMU_HASH64_KVM_MANAGED_HPT ((void *)-1)
-
-/*
* SLB handling
*/
@@ -294,55 +289,6 @@ target_ulong helper_load_slb_vsid(CPUPPCState *env, target_ulong rb)
return rt;
}
-/*
- * 64-bit hash table MMU handling
- */
-void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value,
- Error **errp)
-{
- CPUPPCState *env = &cpu->env;
- target_ulong htabsize = value & SDR_64_HTABSIZE;
-
- env->spr[SPR_SDR1] = value;
- if (htabsize > 28) {
- error_setg(errp,
- "Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
- htabsize);
- htabsize = 28;
- }
- env->htab_mask = (1ULL << (htabsize + 18 - 7)) - 1;
- env->htab_base = value & SDR_64_HTABORG;
-}
-
-void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift,
- Error **errp)
-{
- CPUPPCState *env = &cpu->env;
- Error *local_err = NULL;
-
- if (hpt) {
- env->external_htab = hpt;
- } else {
- env->external_htab = MMU_HASH64_KVM_MANAGED_HPT;
- }
- ppc_hash64_set_sdr1(cpu, (target_ulong)(uintptr_t)hpt | (shift - 18),
- &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
-
- /* Not strictly necessary, but makes it clearer that an external
- * htab is in use when debugging */
- env->htab_base = -1;
-
- if (kvm_enabled()) {
- if (kvmppc_put_books_sregs(cpu) < 0) {
- error_setg(errp, "Unable to update SDR1 in KVM");
- }
- }
-}
-
static int ppc_hash64_pte_prot(PowerPCCPU *cpu,
ppc_slb_t *slb, ppc_hash_pte64_t pte)
{
@@ -431,34 +377,43 @@ static int ppc_hash64_amr_prot(PowerPCCPU *cpu, ppc_hash_pte64_t pte)
return prot;
}
-uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index)
+const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
+ hwaddr ptex, int n)
{
- uint64_t token = 0;
- hwaddr pte_offset;
+ hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
+ hwaddr base = ppc_hash64_hpt_base(cpu);
+ hwaddr plen = n * HASH_PTE_SIZE_64;
+ const ppc_hash_pte64_t *hptes;
+
+ if (cpu->vhyp) {
+ PPCVirtualHypervisorClass *vhc =
+ PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+ return vhc->map_hptes(cpu->vhyp, ptex, n);
+ }
- pte_offset = pte_index * HASH_PTE_SIZE_64;
- if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
- /*
- * HTAB is controlled by KVM. Fetch the PTEG into a new buffer.
- */
- token = kvmppc_hash64_read_pteg(cpu, pte_index);
- } else if (cpu->env.external_htab) {
- /*
- * HTAB is controlled by QEMU. Just point to the internally
- * accessible PTEG.
- */
- token = (uint64_t)(uintptr_t) cpu->env.external_htab + pte_offset;
- } else if (cpu->env.htab_base) {
- token = cpu->env.htab_base + pte_offset;
+ if (!base) {
+ return NULL;
}
- return token;
+
+ hptes = address_space_map(CPU(cpu)->as, base + pte_offset, &plen, false);
+ if (plen < (n * HASH_PTE_SIZE_64)) {
+ hw_error("%s: Unable to map all requested HPTEs\n", __func__);
+ }
+ return hptes;
}
-void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token)
+void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
+ hwaddr ptex, int n)
{
- if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
- kvmppc_hash64_free_pteg(token);
+ if (cpu->vhyp) {
+ PPCVirtualHypervisorClass *vhc =
+ PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+ vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n);
+ return;
}
+
+ address_space_unmap(CPU(cpu)->as, (void *)hptes, n * HASH_PTE_SIZE_64,
+ false, n * HASH_PTE_SIZE_64);
}
static unsigned hpte_page_shift(const struct ppc_one_seg_page_size *sps,
@@ -503,20 +458,19 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
target_ulong ptem,
ppc_hash_pte64_t *pte, unsigned *pshift)
{
- CPUPPCState *env = &cpu->env;
int i;
- uint64_t token;
+ const ppc_hash_pte64_t *pteg;
target_ulong pte0, pte1;
- target_ulong pte_index;
+ target_ulong ptex;
- pte_index = (hash & env->htab_mask) * HPTES_PER_GROUP;
- token = ppc_hash64_start_access(cpu, pte_index);
- if (!token) {
+ ptex = (hash & ppc_hash64_hpt_mask(cpu)) * HPTES_PER_GROUP;
+ pteg = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+ if (!pteg) {
return -1;
}
for (i = 0; i < HPTES_PER_GROUP; i++) {
- pte0 = ppc_hash64_load_hpte0(cpu, token, i);
- pte1 = ppc_hash64_load_hpte1(cpu, token, i);
+ pte0 = ppc_hash64_hpte0(cpu, pteg, i);
+ pte1 = ppc_hash64_hpte1(cpu, pteg, i);
/* This compares V, B, H (secondary) and the AVPN */
if (HPTE64_V_COMPARE(pte0, ptem)) {
@@ -536,11 +490,11 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
*/
pte->pte0 = pte0;
pte->pte1 = pte1;
- ppc_hash64_stop_access(cpu, token);
- return (pte_index + i) * HASH_PTE_SIZE_64;
+ ppc_hash64_unmap_hptes(cpu, pteg, ptex, HPTES_PER_GROUP);
+ return ptex + i;
}
}
- ppc_hash64_stop_access(cpu, token);
+ ppc_hash64_unmap_hptes(cpu, pteg, ptex, HPTES_PER_GROUP);
/*
* We didn't find a valid entry.
*/
@@ -552,8 +506,7 @@ static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu,
ppc_hash_pte64_t *pte, unsigned *pshift)
{
CPUPPCState *env = &cpu->env;
- hwaddr pte_offset;
- hwaddr hash;
+ hwaddr hash, ptex;
uint64_t vsid, epnmask, epn, ptem;
const struct ppc_one_seg_page_size *sps = slb->sps;
@@ -588,29 +541,30 @@ static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu,
qemu_log_mask(CPU_LOG_MMU,
"htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx
" hash " TARGET_FMT_plx "\n",
- env->htab_base, env->htab_mask, hash);
+ ppc_hash64_hpt_base(cpu), ppc_hash64_hpt_mask(cpu), hash);
/* Primary PTEG lookup */
qemu_log_mask(CPU_LOG_MMU,
"0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
" hash=" TARGET_FMT_plx "\n",
- env->htab_base, env->htab_mask, vsid, ptem, hash);
- pte_offset = ppc_hash64_pteg_search(cpu, hash, sps, ptem, pte, pshift);
+ ppc_hash64_hpt_base(cpu), ppc_hash64_hpt_mask(cpu),
+ vsid, ptem, hash);
+ ptex = ppc_hash64_pteg_search(cpu, hash, sps, ptem, pte, pshift);
- if (pte_offset == -1) {
+ if (ptex == -1) {
/* Secondary PTEG lookup */
ptem |= HPTE64_V_SECONDARY;
qemu_log_mask(CPU_LOG_MMU,
"1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=" TARGET_FMT_lx " api=" TARGET_FMT_lx
- " hash=" TARGET_FMT_plx "\n", env->htab_base,
- env->htab_mask, vsid, ptem, ~hash);
+ " hash=" TARGET_FMT_plx "\n", ppc_hash64_hpt_base(cpu),
+ ppc_hash64_hpt_mask(cpu), vsid, ptem, ~hash);
- pte_offset = ppc_hash64_pteg_search(cpu, ~hash, sps, ptem, pte, pshift);
+ ptex = ppc_hash64_pteg_search(cpu, ~hash, sps, ptem, pte, pshift);
}
- return pte_offset;
+ return ptex;
}
unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
@@ -708,7 +662,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
CPUPPCState *env = &cpu->env;
ppc_slb_t *slb;
unsigned apshift;
- hwaddr pte_offset;
+ hwaddr ptex;
ppc_hash_pte64_t pte;
int pp_prot, amr_prot, prot;
uint64_t new_pte1, dsisr;
@@ -792,8 +746,8 @@ skip_slb_search:
}
/* 4. Locate the PTE in the hash table */
- pte_offset = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
- if (pte_offset == -1) {
+ ptex = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
+ if (ptex == -1) {
dsisr = 0x40000000;
if (rwx == 2) {
ppc_hash64_set_isi(cs, env, dsisr);
@@ -806,7 +760,7 @@ skip_slb_search:
return 1;
}
qemu_log_mask(CPU_LOG_MMU,
- "found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
+ "found PTE at index %08" HWADDR_PRIx "\n", ptex);
/* 5. Check access permissions */
@@ -849,8 +803,7 @@ skip_slb_search:
}
if (new_pte1 != pte.pte1) {
- ppc_hash64_store_hpte(cpu, pte_offset / HASH_PTE_SIZE_64,
- pte.pte0, new_pte1);
+ ppc_hash64_store_hpte(cpu, ptex, pte.pte0, new_pte1);
}
/* 7. Determine the real address from the PTE */
@@ -867,7 +820,7 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
{
CPUPPCState *env = &cpu->env;
ppc_slb_t *slb;
- hwaddr pte_offset, raddr;
+ hwaddr ptex, raddr;
ppc_hash_pte64_t pte;
unsigned apshift;
@@ -900,8 +853,8 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
}
}
- pte_offset = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift);
- if (pte_offset == -1) {
+ ptex = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift);
+ if (ptex == -1) {
return -1;
}
@@ -909,30 +862,24 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
& TARGET_PAGE_MASK;
}
-void ppc_hash64_store_hpte(PowerPCCPU *cpu,
- target_ulong pte_index,
- target_ulong pte0, target_ulong pte1)
+void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+ uint64_t pte0, uint64_t pte1)
{
- CPUPPCState *env = &cpu->env;
+ hwaddr base = ppc_hash64_hpt_base(cpu);
+ hwaddr offset = ptex * HASH_PTE_SIZE_64;
- if (env->external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
- kvmppc_hash64_write_pte(env, pte_index, pte0, pte1);
+ if (cpu->vhyp) {
+ PPCVirtualHypervisorClass *vhc =
+ PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+ vhc->store_hpte(cpu->vhyp, ptex, pte0, pte1);
return;
}
- pte_index *= HASH_PTE_SIZE_64;
- if (env->external_htab) {
- stq_p(env->external_htab + pte_index, pte0);
- stq_p(env->external_htab + pte_index + HASH_PTE_SIZE_64 / 2, pte1);
- } else {
- stq_phys(CPU(cpu)->as, env->htab_base + pte_index, pte0);
- stq_phys(CPU(cpu)->as,
- env->htab_base + pte_index + HASH_PTE_SIZE_64 / 2, pte1);
- }
+ stq_phys(CPU(cpu)->as, base + offset, pte0);
+ stq_phys(CPU(cpu)->as, base + offset + HASH_PTE_SIZE_64 / 2, pte1);
}
-void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
- target_ulong pte_index,
+void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, target_ulong ptex,
target_ulong pte0, target_ulong pte1)
{
/*
diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h
index 7a0b7fc..54f1e37 100644
--- a/target/ppc/mmu-hash64.h
+++ b/target/ppc/mmu-hash64.h
@@ -10,8 +10,8 @@ int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
int mmu_idx);
-void ppc_hash64_store_hpte(PowerPCCPU *cpu, target_ulong index,
- target_ulong pte0, target_ulong pte1);
+void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+ uint64_t pte0, uint64_t pte1);
void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
target_ulong pte_index,
target_ulong pte0, target_ulong pte1);
@@ -56,6 +56,9 @@ void ppc_hash64_update_rmls(CPUPPCState *env);
* Hash page table definitions
*/
+#define SDR_64_HTABORG 0x0FFFFFFFFFFC0000ULL
+#define SDR_64_HTABSIZE 0x000000000000001FULL
+
#define HPTES_PER_GROUP 8
#define HASH_PTE_SIZE_64 16
#define HASH_PTEG_SIZE_64 (HASH_PTE_SIZE_64 * HPTES_PER_GROUP)
@@ -91,45 +94,41 @@ void ppc_hash64_update_rmls(CPUPPCState *env);
#define HPTE64_V_1TB_SEG 0x4000000000000000ULL
#define HPTE64_V_VRMA_MASK 0x4001ffffff000000ULL
-void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value,
- Error **errp);
-void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift,
- Error **errp);
-
-uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index);
-void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token);
-
-static inline target_ulong ppc_hash64_load_hpte0(PowerPCCPU *cpu,
- uint64_t token, int index)
+static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
{
- CPUPPCState *env = &cpu->env;
- uint64_t addr;
-
- addr = token + (index * HASH_PTE_SIZE_64);
- if (env->external_htab) {
- return ldq_p((const void *)(uintptr_t)addr);
- } else {
- return ldq_phys(CPU(cpu)->as, addr);
- }
+ return cpu->env.spr[SPR_SDR1] & SDR_64_HTABORG;
}
-static inline target_ulong ppc_hash64_load_hpte1(PowerPCCPU *cpu,
- uint64_t token, int index)
+static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
{
- CPUPPCState *env = &cpu->env;
- uint64_t addr;
-
- addr = token + (index * HASH_PTE_SIZE_64) + HASH_PTE_SIZE_64/2;
- if (env->external_htab) {
- return ldq_p((const void *)(uintptr_t)addr);
- } else {
- return ldq_phys(CPU(cpu)->as, addr);
+ if (cpu->vhyp) {
+ PPCVirtualHypervisorClass *vhc =
+ PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+ return vhc->hpt_mask(cpu->vhyp);
}
+ return (1ULL << ((cpu->env.spr[SPR_SDR1] & SDR_64_HTABSIZE) + 18 - 7)) - 1;
}
-typedef struct {
+struct ppc_hash_pte64 {
uint64_t pte0, pte1;
-} ppc_hash_pte64_t;
+};
+
+const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
+ hwaddr ptex, int n);
+void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
+ hwaddr ptex, int n);
+
+static inline uint64_t ppc_hash64_hpte0(PowerPCCPU *cpu,
+ const ppc_hash_pte64_t *hptes, int i)
+{
+ return ldq_p(&(hptes[i].pte0));
+}
+
+static inline uint64_t ppc_hash64_hpte1(PowerPCCPU *cpu,
+ const ppc_hash_pte64_t *hptes, int i)
+{
+ return ldq_p(&(hptes[i].pte1));
+}
#endif /* CONFIG_USER_ONLY */
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index eb2d482..a1af3d6 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -28,6 +28,7 @@
#include "exec/cpu_ldst.h"
#include "exec/log.h"
#include "helper_regs.h"
+#include "qemu/error-report.h"
//#define DEBUG_MMU
//#define DEBUG_BATS
@@ -466,6 +467,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
target_ulong eaddr, int rw, int type)
{
+ PowerPCCPU *cpu = ppc_env_get_cpu(env);
hwaddr hash;
target_ulong vsid;
int ds, pr, target_page_bits;
@@ -503,7 +505,7 @@ static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
" htab_mask " TARGET_FMT_plx
" hash " TARGET_FMT_plx "\n",
- env->htab_base, env->htab_mask, hash);
+ ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
ctx->hash[0] = hash;
ctx->hash[1] = ~hash;
@@ -518,9 +520,11 @@ static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
uint32_t a0, a1, a2, a3;
qemu_log("Page table: " TARGET_FMT_plx " len " TARGET_FMT_plx
- "\n", env->htab_base, env->htab_mask + 0x80);
- for (curaddr = env->htab_base;
- curaddr < (env->htab_base + env->htab_mask + 0x80);
+ "\n", ppc_hash32_hpt_base(cpu),
+ ppc_hash32_hpt_mask(env) + 0x80);
+ for (curaddr = ppc_hash32_hpt_base(cpu);
+ curaddr < (ppc_hash32_hpt_base(cpu)
+ + ppc_hash32_hpt_mask(cpu) + 0x80);
curaddr += 16) {
a0 = ldl_phys(cs->as, curaddr);
a1 = ldl_phys(cs->as, curaddr + 4);
@@ -1205,12 +1209,13 @@ static void mmu6xx_dump_BATs(FILE *f, fprintf_function cpu_fprintf,
static void mmu6xx_dump_mmu(FILE *f, fprintf_function cpu_fprintf,
CPUPPCState *env)
{
+ PowerPCCPU *cpu = ppc_env_get_cpu(env);
ppc6xx_tlb_t *tlb;
target_ulong sr;
int type, way, entry, i;
- cpu_fprintf(f, "HTAB base = 0x%"HWADDR_PRIx"\n", env->htab_base);
- cpu_fprintf(f, "HTAB mask = 0x%"HWADDR_PRIx"\n", env->htab_mask);
+ cpu_fprintf(f, "HTAB base = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_base(cpu));
+ cpu_fprintf(f, "HTAB mask = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_mask(cpu));
cpu_fprintf(f, "\nSegment registers:\n");
for (i = 0; i < 32; i++) {
@@ -1592,9 +1597,9 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem;
tlb_miss:
env->error_code |= ctx.key << 19;
- env->spr[SPR_HASH1] = env->htab_base +
+ env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) +
get_pteg_offset32(cpu, ctx.hash[0]);
- env->spr[SPR_HASH2] = env->htab_base +
+ env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) +
get_pteg_offset32(cpu, ctx.hash[1]);
break;
case POWERPC_MMU_SOFT_74xx:
@@ -1997,26 +2002,28 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
/* Special registers manipulation */
void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
{
+ PowerPCCPU *cpu = ppc_env_get_cpu(env);
qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
- assert(!env->external_htab);
- env->spr[SPR_SDR1] = value;
+ assert(!cpu->vhyp);
#if defined(TARGET_PPC64)
if (env->mmu_model & POWERPC_MMU_64) {
- PowerPCCPU *cpu = ppc_env_get_cpu(env);
- Error *local_err = NULL;
+ target_ulong sdr_mask = SDR_64_HTABORG | SDR_64_HTABSIZE;
+ target_ulong htabsize = value & SDR_64_HTABSIZE;
- ppc_hash64_set_sdr1(cpu, value, &local_err);
- if (local_err) {
- error_report_err(local_err);
- error_free(local_err);
+ if (value & ~sdr_mask) {
+ error_report("Invalid bits 0x"TARGET_FMT_lx" set in SDR1",
+ value & ~sdr_mask);
+ value &= sdr_mask;
+ }
+ if (htabsize > 28) {
+ error_report("Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
+ htabsize);
+ return;
}
- } else
-#endif /* defined(TARGET_PPC64) */
- {
- /* FIXME: Should check for valid HTABMASK values */
- env->htab_mask = ((value & SDR_32_HTABMASK) << 16) | 0xFFFF;
- env->htab_base = value & SDR_32_HTABORG;
}
+#endif /* defined(TARGET_PPC64) */
+ /* FIXME: Should check for valid HTABMASK values in 32-bit case */
+ env->spr[SPR_SDR1] = value;
}
/* Segment registers load and store */
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 3ba2616..6e6868b 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -71,7 +71,7 @@ static TCGv cpu_lr;
#if defined(TARGET_PPC64)
static TCGv cpu_cfar;
#endif
-static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca;
+static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
static TCGv cpu_reserve;
static TCGv cpu_fpscr;
static TCGv_i32 cpu_access_type;
@@ -173,6 +173,10 @@ void ppc_translate_init(void)
offsetof(CPUPPCState, ov), "OV");
cpu_ca = tcg_global_mem_new(cpu_env,
offsetof(CPUPPCState, ca), "CA");
+ cpu_ov32 = tcg_global_mem_new(cpu_env,
+ offsetof(CPUPPCState, ov32), "OV32");
+ cpu_ca32 = tcg_global_mem_new(cpu_env,
+ offsetof(CPUPPCState, ca32), "CA32");
cpu_reserve = tcg_global_mem_new(cpu_env,
offsetof(CPUPPCState, reserve_addr),
@@ -806,12 +810,40 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
}
tcg_temp_free(t0);
if (NARROW_MODE(ctx)) {
- tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
+ tcg_gen_extract_tl(cpu_ov, cpu_ov, 31, 1);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+ }
+ } else {
+ if (is_isa300(ctx)) {
+ tcg_gen_extract_tl(cpu_ov32, cpu_ov, 31, 1);
+ }
+ tcg_gen_extract_tl(cpu_ov, cpu_ov, 63, 1);
}
- tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
}
+static inline void gen_op_arith_compute_ca32(DisasContext *ctx,
+ TCGv res, TCGv arg0, TCGv arg1,
+ int sub)
+{
+ TCGv t0;
+
+ if (!is_isa300(ctx)) {
+ return;
+ }
+
+ t0 = tcg_temp_new();
+ if (sub) {
+ tcg_gen_eqv_tl(t0, arg0, arg1);
+ } else {
+ tcg_gen_xor_tl(t0, arg0, arg1);
+ }
+ tcg_gen_xor_tl(t0, t0, res);
+ tcg_gen_extract_tl(cpu_ca32, t0, 32, 1);
+ tcg_temp_free(t0);
+}
+
/* Common add function */
static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
TCGv arg2, bool add_ca, bool compute_ca,
@@ -838,6 +870,9 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
tcg_temp_free(t1);
tcg_gen_shri_tl(cpu_ca, cpu_ca, 32); /* extract bit 32 */
tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ca32, cpu_ca);
+ }
} else {
TCGv zero = tcg_const_tl(0);
if (add_ca) {
@@ -846,6 +881,7 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
} else {
tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, arg2, zero);
}
+ gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 0);
tcg_temp_free(zero);
}
} else {
@@ -985,6 +1021,9 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
}
if (compute_ov) {
tcg_gen_extu_i32_tl(cpu_ov, t2);
+ if (is_isa300(ctx)) {
+ tcg_gen_extu_i32_tl(cpu_ov32, t2);
+ }
tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
}
tcg_temp_free_i32(t0);
@@ -1056,6 +1095,9 @@ static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
}
if (compute_ov) {
tcg_gen_mov_tl(cpu_ov, t2);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ov32, t2);
+ }
tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
}
tcg_temp_free_i64(t0);
@@ -1074,10 +1116,10 @@ static void glue(gen_, name)(DisasContext *ctx)
cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \
sign, compute_ov); \
}
-/* divwu divwu. divwuo divwuo. */
+/* divdu divdu. divduo divduo. */
GEN_INT_ARITH_DIVD(divdu, 0x0E, 0, 0);
GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1);
-/* divw divw. divwo divwo. */
+/* divd divd. divdo divdo. */
GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0);
GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1);
@@ -1249,6 +1291,9 @@ static void gen_mullwo(DisasContext *ctx)
tcg_gen_sari_i32(t0, t0, 31);
tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
tcg_gen_extu_i32_tl(cpu_ov, t0);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+ }
tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
tcg_temp_free_i32(t0);
@@ -1310,6 +1355,9 @@ static void gen_mulldo(DisasContext *ctx)
tcg_gen_sari_i64(t0, t0, 63);
tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+ }
tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
tcg_temp_free_i64(t0);
@@ -1353,17 +1401,22 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
tcg_temp_free(t1);
tcg_gen_shri_tl(cpu_ca, cpu_ca, 32); /* extract bit 32 */
tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ca32, cpu_ca);
+ }
} else if (add_ca) {
TCGv zero, inv1 = tcg_temp_new();
tcg_gen_not_tl(inv1, arg1);
zero = tcg_const_tl(0);
tcg_gen_add2_tl(t0, cpu_ca, arg2, zero, cpu_ca, zero);
tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, inv1, zero);
+ gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, 0);
tcg_temp_free(zero);
tcg_temp_free(inv1);
} else {
tcg_gen_setcond_tl(TCG_COND_GEU, cpu_ca, arg2, arg1);
tcg_gen_sub_tl(t0, arg2, arg1);
+ gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 1);
}
} else if (add_ca) {
/* Since we're ignoring carry-out, we can simplify the
@@ -1442,7 +1495,10 @@ static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov)
static void gen_neg(DisasContext *ctx)
{
- gen_op_arith_neg(ctx, 0);
+ tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+ if (unlikely(Rc(ctx->opcode))) {
+ gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
+ }
}
static void gen_nego(DisasContext *ctx)
@@ -3703,7 +3759,7 @@ static void gen_tdi(DisasContext *ctx)
/*** Processor control ***/
-static void gen_read_xer(TCGv dst)
+static void gen_read_xer(DisasContext *ctx, TCGv dst)
{
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
@@ -3715,6 +3771,12 @@ static void gen_read_xer(TCGv dst)
tcg_gen_or_tl(t0, t0, t1);
tcg_gen_or_tl(dst, dst, t2);
tcg_gen_or_tl(dst, dst, t0);
+ if (is_isa300(ctx)) {
+ tcg_gen_shli_tl(t0, cpu_ov32, XER_OV32);
+ tcg_gen_or_tl(dst, dst, t0);
+ tcg_gen_shli_tl(t0, cpu_ca32, XER_CA32);
+ tcg_gen_or_tl(dst, dst, t0);
+ }
tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
@@ -3722,14 +3784,16 @@ static void gen_read_xer(TCGv dst)
static void gen_write_xer(TCGv src)
{
+ /* Write all flags, while reading back check for isa300 */
tcg_gen_andi_tl(cpu_xer, src,
- ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA)));
- tcg_gen_shri_tl(cpu_so, src, XER_SO);
- tcg_gen_shri_tl(cpu_ov, src, XER_OV);
- tcg_gen_shri_tl(cpu_ca, src, XER_CA);
- tcg_gen_andi_tl(cpu_so, cpu_so, 1);
- tcg_gen_andi_tl(cpu_ov, cpu_ov, 1);
- tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+ ~((1u << XER_SO) |
+ (1u << XER_OV) | (1u << XER_OV32) |
+ (1u << XER_CA) | (1u << XER_CA32)));
+ tcg_gen_extract_tl(cpu_ov32, src, XER_OV32, 1);
+ tcg_gen_extract_tl(cpu_ca32, src, XER_CA32, 1);
+ tcg_gen_extract_tl(cpu_so, src, XER_SO, 1);
+ tcg_gen_extract_tl(cpu_ov, src, XER_OV, 1);
+ tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1);
}
/* mcrxr */
@@ -3755,6 +3819,28 @@ static void gen_mcrxr(DisasContext *ctx)
tcg_gen_movi_tl(cpu_ca, 0);
}
+#ifdef TARGET_PPC64
+/* mcrxrx */
+static void gen_mcrxrx(DisasContext *ctx)
+{
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
+
+ /* copy OV and OV32 */
+ tcg_gen_shli_tl(t0, cpu_ov, 1);
+ tcg_gen_or_tl(t0, t0, cpu_ov32);
+ tcg_gen_shli_tl(t0, t0, 2);
+ /* copy CA and CA32 */
+ tcg_gen_shli_tl(t1, cpu_ca, 1);
+ tcg_gen_or_tl(t1, t1, cpu_ca32);
+ tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_trunc_tl_i32(dst, t0);
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+}
+#endif
+
/* mfcr mfocrf */
static void gen_mfcr(DisasContext *ctx)
{
@@ -6424,6 +6510,7 @@ GEN_HANDLER(mtcrf, 0x1F, 0x10, 0x04, 0x00000801, PPC_MISC),
#if defined(TARGET_PPC64)
GEN_HANDLER(mtmsrd, 0x1F, 0x12, 0x05, 0x001EF801, PPC_64B),
GEN_HANDLER_E(setb, 0x1F, 0x00, 0x04, 0x0003F801, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(mcrxrx, 0x1F, 0x00, 0x12, 0x007FF801, PPC_NONE, PPC2_ISA300),
#endif
GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001EF801, PPC_MISC),
GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x00000000, PPC_MISC),
diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c
index be35cbd..37f74be 100644
--- a/target/ppc/translate_init.c
+++ b/target/ppc/translate_init.c
@@ -107,7 +107,7 @@ static void spr_access_nop(DisasContext *ctx, int sprn, int gprn)
/* XER */
static void spr_read_xer (DisasContext *ctx, int gprn, int sprn)
{
- gen_read_xer(cpu_gpr[gprn]);
+ gen_read_xer(ctx, cpu_gpr[gprn]);
}
static void spr_write_xer (DisasContext *ctx, int sprn, int gprn)
@@ -740,10 +740,22 @@ static void gen_spr_ne_601 (CPUPPCState *env)
&spr_read_decr, &spr_write_decr,
0x00000000);
/* Memory management */
- spr_register(env, SPR_SDR1, "SDR1",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_sdr1,
- 0x00000000);
+#ifndef CONFIG_USER_ONLY
+ if (env->has_hv_mode) {
+ /* SDR1 is a hypervisor resource on CPUs which have a
+ * hypervisor mode */
+ spr_register_hv(env, SPR_SDR1, "SDR1",
+ SPR_NOACCESS, SPR_NOACCESS,
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_sdr1,
+ 0x00000000);
+ } else {
+ spr_register(env, SPR_SDR1, "SDR1",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_sdr1,
+ 0x00000000);
+ }
+#endif
}
/* BATs 0-3 */
@@ -8835,18 +8847,14 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
}
#if !defined(CONFIG_USER_ONLY)
-
-void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
-{
- cpu->vhyp = vhyp;
-}
-
-void cpu_ppc_set_papr(PowerPCCPU *cpu)
+void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
{
CPUPPCState *env = &cpu->env;
ppc_spr_t *lpcr = &env->spr_cb[SPR_LPCR];
ppc_spr_t *amor = &env->spr_cb[SPR_AMOR];
+ cpu->vhyp = vhyp;
+
/* PAPR always has exception vectors in RAM not ROM. To ensure this,
* MSR[IP] should never be set.
*
@@ -10489,11 +10497,12 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
#else
cc->get_phys_page_debug = ppc_cpu_get_phys_page_debug;
cc->vmsd = &vmstate_ppc_cpu;
-#if defined(TARGET_PPC64)
- cc->write_elf64_note = ppc64_cpu_write_elf64_note;
-#endif
#endif
cc->cpu_exec_enter = ppc_cpu_exec_enter;
+#if defined(CONFIG_SOFTMMU)
+ cc->write_elf64_note = ppc64_cpu_write_elf64_note;
+ cc->write_elf32_note = ppc32_cpu_write_elf32_note;
+#endif
cc->gdb_num_core_regs = 71;
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 6d227a5..290de6d 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -866,7 +866,7 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
}
}
-static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
+static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
TCGArg b, bool b_const, TCGLabel *l)
{
intptr_t offset;
@@ -937,7 +937,7 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
}
}
-static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
+static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
TCGReg rh, TCGReg al, TCGReg ah,
tcg_target_long bl, tcg_target_long bh,
bool const_bl, bool const_bh, bool sub)
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 5404805..d54018d 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -144,7 +144,7 @@ static void free_test_data(test_data *data)
g_free(temp->asl_file);
}
- g_array_free(data->tables, false);
+ g_array_free(data->tables, true);
}
static uint8_t acpi_checksum(const uint8_t *data, int len)
diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker
new file mode 100644
index 0000000..bbb21ed
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-s390x-cross.docker
@@ -0,0 +1,22 @@
+#
+# Docker s390 cross-compiler target
+#
+# This docker target is based on stretch (testing) as the stable build
+# doesn't have the cross compiler available.
+#
+FROM debian:testing-slim
+
+# Duplicate deb line as deb-src
+RUN cat /etc/apt/sources.list | sed "s/deb/deb-src/" >> /etc/apt/sources.list
+
+# Add the s390x architecture
+RUN dpkg --add-architecture s390x
+
+# Grab the updated list of packages
+RUN apt update
+RUN apt dist-upgrade -yy
+RUN apt-get build-dep -yy -a s390x qemu || apt-get -f install
+RUN apt install -yy gcc-multilib-s390x-linux-gnu binutils-multiarch
+
+# Specify the cross prefix for this image (see tests/docker/common.rc)
+ENV QEMU_CONFIGURE_OPTS --cross-prefix=s390x-linux-gnu-
diff --git a/tests/e1000-test.c b/tests/e1000-test.c
index 59cab68..0c5fcdc 100644
--- a/tests/e1000-test.c
+++ b/tests/e1000-test.c
@@ -44,6 +44,7 @@ int main(int argc, char **argv)
path = g_strdup_printf("e1000/%s", models[i]);
qtest_add_data_func(path, models[i], test_device);
+ g_free(path);
}
return g_test_run();
diff --git a/tests/e1000e-test.c b/tests/e1000e-test.c
index 8c42ca9..c612dc6 100644
--- a/tests/e1000e-test.c
+++ b/tests/e1000e-test.c
@@ -99,7 +99,10 @@ static QPCIBus *test_bus;
static void e1000e_pci_foreach_callback(QPCIDevice *dev, int devfn, void *data)
{
- *(QPCIDevice **) data = dev;
+ QPCIDevice **res = data;
+
+ g_assert_null(*res);
+ *res = dev;
}
static QPCIDevice *e1000e_device_find(QPCIBus *bus)
@@ -403,6 +406,7 @@ static void data_test_clear(e1000e_device *d)
e1000e_device_clear(test_bus, d);
close(test_sockets[0]);
pc_alloc_uninit(test_alloc);
+ g_free(d->pci_dev);
qpci_free_pc(test_bus);
qtest_end();
}
diff --git a/tests/eepro100-test.c b/tests/eepro100-test.c
index ed23258..bdc8a67 100644
--- a/tests/eepro100-test.c
+++ b/tests/eepro100-test.c
@@ -54,6 +54,7 @@ int main(int argc, char **argv)
path = g_strdup_printf("eepro100/%s", models[i]);
qtest_add_data_func(path, models[i], test_device);
+ g_free(path);
}
return g_test_run();
diff --git a/tests/endianness-test.c b/tests/endianness-test.c
index cf8d41b..ed0bf52 100644
--- a/tests/endianness-test.c
+++ b/tests/endianness-test.c
@@ -295,14 +295,17 @@ int main(int argc, char **argv)
path = g_strdup_printf("endianness/%s",
test_cases[i].machine);
qtest_add_data_func(path, &test_cases[i], test_endianness);
+ g_free(path);
path = g_strdup_printf("endianness/split/%s",
test_cases[i].machine);
qtest_add_data_func(path, &test_cases[i], test_endianness_split);
+ g_free(path);
path = g_strdup_printf("endianness/combine/%s",
test_cases[i].machine);
qtest_add_data_func(path, &test_cases[i], test_endianness_combine);
+ g_free(path);
}
return g_test_run();
diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c
index 6176e81..24870b3 100644
--- a/tests/hd-geo-test.c
+++ b/tests/hd-geo-test.c
@@ -19,6 +19,8 @@
#include "qemu-common.h"
#include "libqtest.h"
+#define ARGV_SIZE 256
+
static char *create_test_img(int secs)
{
char *template = strdup("/tmp/qtest.XXXXXX");
@@ -66,7 +68,7 @@ static const CHST hd_chst[backend_last][mbr_last] = {
},
};
-static const char *img_file_name[backend_last];
+static char *img_file_name[backend_last];
static const CHST *cur_ide[4];
@@ -234,28 +236,36 @@ static int setup_ide(int argc, char *argv[], int argv_sz,
*/
static void test_ide_none(void)
{
- char *argv[256];
-
- setup_common(argv, ARRAY_SIZE(argv));
- qtest_start(g_strjoinv(" ", argv));
+ char **argv = g_new0(char *, ARGV_SIZE);
+ char *args;
+
+ setup_common(argv, ARGV_SIZE);
+ args = g_strjoinv(" ", argv);
+ qtest_start(args);
+ g_strfreev(argv);
+ g_free(args);
test_cmos();
qtest_end();
}
static void test_ide_mbr(bool use_device, MBRcontents mbr)
{
- char *argv[256];
+ char **argv = g_new0(char *, ARGV_SIZE);
+ char *args;
int argc;
Backend i;
const char *dev;
- argc = setup_common(argv, ARRAY_SIZE(argv));
+ argc = setup_common(argv, ARGV_SIZE);
for (i = 0; i < backend_last; i++) {
cur_ide[i] = &hd_chst[i][mbr];
dev = use_device ? (is_hd(cur_ide[i]) ? "ide-hd" : "ide-cd") : NULL;
- argc = setup_ide(argc, argv, ARRAY_SIZE(argv), i, dev, i, mbr, "");
+ argc = setup_ide(argc, argv, ARGV_SIZE, i, dev, i, mbr, "");
}
- qtest_start(g_strjoinv(" ", argv));
+ args = g_strjoinv(" ", argv);
+ qtest_start(args);
+ g_strfreev(argv);
+ g_free(args);
test_cmos();
qtest_end();
}
@@ -310,12 +320,13 @@ static void test_ide_device_mbr_chs(void)
static void test_ide_drive_user(const char *dev, bool trans)
{
- char *argv[256], *opts;
+ char **argv = g_new0(char *, ARGV_SIZE);
+ char *args, *opts;
int argc;
int secs = img_secs[backend_small];
const CHST expected_chst = { secs / (4 * 32) , 4, 32, trans };
- argc = setup_common(argv, ARRAY_SIZE(argv));
+ argc = setup_common(argv, ARGV_SIZE);
opts = g_strdup_printf("%s,%s%scyls=%d,heads=%d,secs=%d",
dev ?: "",
trans && dev ? "bios-chs-" : "",
@@ -323,11 +334,14 @@ static void test_ide_drive_user(const char *dev, bool trans)
expected_chst.cyls, expected_chst.heads,
expected_chst.secs);
cur_ide[0] = &expected_chst;
- argc = setup_ide(argc, argv, ARRAY_SIZE(argv),
+ argc = setup_ide(argc, argv, ARGV_SIZE,
0, dev ? opts : NULL, backend_small, mbr_chs,
dev ? "" : opts);
g_free(opts);
- qtest_start(g_strjoinv(" ", argv));
+ args = g_strjoinv(" ", argv);
+ qtest_start(args);
+ g_strfreev(argv);
+ g_free(args);
test_cmos();
qtest_end();
}
@@ -369,18 +383,22 @@ static void test_ide_device_user_chst(void)
*/
static void test_ide_drive_cd_0(void)
{
- char *argv[256];
+ char **argv = g_new0(char *, ARGV_SIZE);
+ char *args;
int argc, ide_idx;
Backend i;
- argc = setup_common(argv, ARRAY_SIZE(argv));
+ argc = setup_common(argv, ARGV_SIZE);
for (i = 0; i <= backend_empty; i++) {
ide_idx = backend_empty - i;
cur_ide[ide_idx] = &hd_chst[i][mbr_blank];
- argc = setup_ide(argc, argv, ARRAY_SIZE(argv),
+ argc = setup_ide(argc, argv, ARGV_SIZE,
ide_idx, NULL, i, mbr_blank, "");
}
- qtest_start(g_strjoinv(" ", argv));
+ args = g_strjoinv(" ", argv);
+ qtest_start(args);
+ g_strfreev(argv);
+ g_free(args);
test_cmos();
qtest_end();
}
@@ -418,6 +436,7 @@ int main(int argc, char **argv)
for (i = 0; i < backend_last; i++) {
if (img_file_name[i]) {
unlink(img_file_name[i]);
+ free(img_file_name[i]);
}
}
diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c
index da2d5a5..e9d05c8 100644
--- a/tests/i440fx-test.c
+++ b/tests/i440fx-test.c
@@ -134,6 +134,8 @@ static void test_i440fx_defaults(gconstpointer opaque)
/* 3.2.26 */
g_assert_cmpint(qpci_config_readb(dev, 0x93), ==, 0x00); /* TRC */
+ g_free(dev);
+ qpci_free_pc(bus);
qtest_end();
}
@@ -270,6 +272,9 @@ static void test_i440fx_pam(gconstpointer opaque)
/* Verify the area is not our new mask */
g_assert(!verify_area(pam_area[i].start, pam_area[i].end, 0x82));
}
+
+ g_free(dev);
+ qpci_free_pc(bus);
qtest_end();
}
diff --git a/tests/ide-test.c b/tests/ide-test.c
index b57c2b1..139ebc0 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -339,6 +339,7 @@ static void test_bmdma_simple_rw(void)
g_assert(memcmp(buf, cmpbuf, len) == 0);
+ free_pci_device(dev);
g_free(buf);
g_free(cmpbuf);
}
@@ -369,6 +370,7 @@ static void test_bmdma_short_prdt(void)
prdt, ARRAY_SIZE(prdt), NULL);
g_assert_cmphex(status, ==, 0);
assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+ free_pci_device(dev);
}
static void test_bmdma_one_sector_short_prdt(void)
@@ -398,6 +400,7 @@ static void test_bmdma_one_sector_short_prdt(void)
prdt, ARRAY_SIZE(prdt), NULL);
g_assert_cmphex(status, ==, 0);
assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+ free_pci_device(dev);
}
static void test_bmdma_long_prdt(void)
@@ -426,6 +429,7 @@ static void test_bmdma_long_prdt(void)
prdt, ARRAY_SIZE(prdt), NULL);
g_assert_cmphex(status, ==, BM_STS_INTR);
assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+ free_pci_device(dev);
}
static void test_bmdma_no_busmaster(void)
@@ -449,6 +453,7 @@ static void test_bmdma_no_busmaster(void)
* in practice. At least we want to be aware of any changes. */
g_assert_cmphex(status, ==, BM_STS_ACTIVE | BM_STS_INTR);
assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+ free_pci_device(dev);
}
static void test_bmdma_setup(void)
@@ -525,6 +530,7 @@ static void test_identify(void)
assert_bit_set(buf[85], 0x20);
ide_test_quit();
+ free_pci_device(dev);
}
/*
@@ -563,6 +569,7 @@ static void make_dirty(uint8_t device)
assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
g_free(buf);
+ free_pci_device(dev);
}
static void test_flush(void)
@@ -609,6 +616,7 @@ static void test_flush(void)
assert_bit_clear(data, BSY | DF | ERR | DRQ);
ide_test_quit();
+ free_pci_device(dev);
}
static void test_retry_flush(const char *machine)
@@ -659,6 +667,7 @@ static void test_retry_flush(const char *machine)
assert_bit_clear(data, BSY | DF | ERR | DRQ);
ide_test_quit();
+ free_pci_device(dev);
}
static void test_flush_nodev(void)
@@ -676,6 +685,7 @@ static void test_flush_nodev(void)
/* Just testing that qemu doesn't crash... */
+ free_pci_device(dev);
ide_test_quit();
}
@@ -742,6 +752,7 @@ static uint8_t ide_wait_clear(uint8_t flag)
while (true) {
data = qpci_io_readb(dev, ide_bar, reg_status);
if (!(data & flag)) {
+ free_pci_device(dev);
return data;
}
if (difftime(time(NULL), st) > 5.0) {
@@ -851,6 +862,7 @@ static void cdrom_pio_impl(int nblocks)
g_free(pattern);
g_free(rx);
test_bmdma_teardown();
+ free_pci_device(dev);
}
static void test_cdrom_pio(void)
diff --git a/tests/ipmi-bt-test.c b/tests/ipmi-bt-test.c
index e84dd68..7e21a9b 100644
--- a/tests/ipmi-bt-test.c
+++ b/tests/ipmi-bt-test.c
@@ -420,6 +420,7 @@ int main(int argc, char **argv)
" -device ipmi-bmc-extern,chardev=ipmi0,id=bmc0"
" -device isa-ipmi-bt,bmc=bmc0", emu_port);
qtest_start(cmdline);
+ g_free(cmdline);
qtest_irq_intercept_in(global_qtest, "ioapic");
qtest_add_func("/ipmi/extern/connect", test_connect);
qtest_add_func("/ipmi/extern/bt_base", test_bt_base);
diff --git a/tests/ipmi-kcs-test.c b/tests/ipmi-kcs-test.c
index 9cf0b34..178ffc1 100644
--- a/tests/ipmi-kcs-test.c
+++ b/tests/ipmi-kcs-test.c
@@ -279,6 +279,7 @@ int main(int argc, char **argv)
cmdline = g_strdup_printf("-device ipmi-bmc-sim,id=bmc0"
" -device isa-ipmi-kcs,bmc=bmc0");
qtest_start(cmdline);
+ g_free(cmdline);
qtest_irq_intercept_in(global_qtest, "ioapic");
qtest_add_func("/ipmi/local/kcs_base", test_kcs_base);
qtest_add_func("/ipmi/local/kcs_abort", test_kcs_abort);
diff --git a/tests/libqos/usb.c b/tests/libqos/usb.c
index 72d7a96..0cdfaec 100644
--- a/tests/libqos/usb.c
+++ b/tests/libqos/usb.c
@@ -24,6 +24,11 @@ void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc, uint32_t devfn, int bar)
hc->bar = qpci_iomap(hc->dev, bar, NULL);
}
+void uhci_deinit(struct qhc *hc)
+{
+ g_free(hc->dev);
+}
+
void uhci_port_test(struct qhc *hc, int port, uint16_t expect)
{
uint16_t value = qpci_io_readw(hc->dev, hc->bar, 0x10 + 2 * port);
@@ -64,4 +69,5 @@ void usb_test_hotplug(const char *hcd_id, const int port,
g_assert(response);
g_assert(qdict_haskey(response, "event"));
g_assert(!strcmp(qdict_get_str(response, "event"), "DEVICE_DELETED"));
+ QDECREF(response);
}
diff --git a/tests/libqos/usb.h b/tests/libqos/usb.h
index 423dcfd..297cfc5 100644
--- a/tests/libqos/usb.h
+++ b/tests/libqos/usb.h
@@ -11,6 +11,7 @@ struct qhc {
void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc,
uint32_t devfn, int bar);
void uhci_port_test(struct qhc *hc, int port, uint16_t expect);
+void uhci_deinit(struct qhc *hc);
void usb_test_hotplug(const char *bus_name, const int port,
void (*port_check)(void));
diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
index d4bf841..7ac15c0 100644
--- a/tests/libqos/virtio-pci.c
+++ b/tests/libqos/virtio-pci.c
@@ -24,9 +24,17 @@
typedef struct QVirtioPCIForeachData {
void (*func)(QVirtioDevice *d, void *data);
uint16_t device_type;
+ bool has_slot;
+ int slot;
void *user_data;
} QVirtioPCIForeachData;
+void qvirtio_pci_device_free(QVirtioPCIDevice *dev)
+{
+ g_free(dev->pdev);
+ g_free(dev);
+}
+
static QVirtioPCIDevice *qpcidevice_to_qvirtiodevice(QPCIDevice *pdev)
{
QVirtioPCIDevice *vpcidev;
@@ -49,16 +57,18 @@ static void qvirtio_pci_foreach_callback(
QVirtioPCIForeachData *d = data;
QVirtioPCIDevice *vpcidev = qpcidevice_to_qvirtiodevice(dev);
- if (vpcidev->vdev.device_type == d->device_type) {
+ if (vpcidev->vdev.device_type == d->device_type &&
+ (!d->has_slot || vpcidev->pdev->devfn == d->slot << 3)) {
d->func(&vpcidev->vdev, d->user_data);
} else {
- g_free(vpcidev);
+ qvirtio_pci_device_free(vpcidev);
}
}
static void qvirtio_pci_assign_device(QVirtioDevice *d, void *data)
{
QVirtioPCIDevice **vpcidev = data;
+ assert(!*vpcidev);
*vpcidev = (QVirtioPCIDevice *)d;
}
@@ -284,21 +294,39 @@ const QVirtioBus qvirtio_pci = {
.virtqueue_kick = qvirtio_pci_virtqueue_kick,
};
-void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
+static void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
+ bool has_slot, int slot,
void (*func)(QVirtioDevice *d, void *data), void *data)
{
QVirtioPCIForeachData d = { .func = func,
.device_type = device_type,
+ .has_slot = has_slot,
+ .slot = slot,
.user_data = data };
qpci_device_foreach(bus, PCI_VENDOR_ID_REDHAT_QUMRANET, -1,
- qvirtio_pci_foreach_callback, &d);
+ qvirtio_pci_foreach_callback, &d);
}
QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type)
{
QVirtioPCIDevice *dev = NULL;
- qvirtio_pci_foreach(bus, device_type, qvirtio_pci_assign_device, &dev);
+
+ qvirtio_pci_foreach(bus, device_type, false, 0,
+ qvirtio_pci_assign_device, &dev);
+
+ dev->vdev.bus = &qvirtio_pci;
+
+ return dev;
+}
+
+QVirtioPCIDevice *qvirtio_pci_device_find_slot(QPCIBus *bus,
+ uint16_t device_type, int slot)
+{
+ QVirtioPCIDevice *dev = NULL;
+
+ qvirtio_pci_foreach(bus, device_type, true, slot,
+ qvirtio_pci_assign_device, &dev);
dev->vdev.bus = &qvirtio_pci;
diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h
index 38c54c6..6ef1909 100644
--- a/tests/libqos/virtio-pci.h
+++ b/tests/libqos/virtio-pci.h
@@ -31,9 +31,11 @@ typedef struct QVirtQueuePCI {
extern const QVirtioBus qvirtio_pci;
-void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
- void (*func)(QVirtioDevice *d, void *data), void *data);
QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type);
+QVirtioPCIDevice *qvirtio_pci_device_find_slot(QPCIBus *bus,
+ uint16_t device_type, int slot);
+void qvirtio_pci_device_free(QVirtioPCIDevice *dev);
+
void qvirtio_pci_device_enable(QVirtioPCIDevice *d);
void qvirtio_pci_device_disable(QVirtioPCIDevice *d);
diff --git a/tests/libqtest.c b/tests/libqtest.c
index e54354d..3a0e0d6 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -805,17 +805,7 @@ void qtest_add_data_func_full(const char *str, void *data,
GDestroyNotify data_free_func)
{
gchar *path = g_strdup_printf("/%s/%s", qtest_get_arch(), str);
-#if GLIB_CHECK_VERSION(2, 34, 0)
g_test_add_data_func_full(path, data, fn, data_free_func);
-#elif GLIB_CHECK_VERSION(2, 26, 0)
- /* back-compat casts, remove this once we can require new-enough glib */
- g_test_add_vtable(path, 0, data, NULL,
- (GTestFixtureFunc)fn, (GTestFixtureFunc) data_free_func);
-#else
- /* back-compat casts, remove this once we can require new-enough glib */
- g_test_add_vtable(path, 0, data, NULL,
- (void (*)(void)) fn, (void (*)(void)) data_free_func);
-#endif
g_free(path);
}
diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c
index dafe8be..de35a18 100644
--- a/tests/postcopy-test.c
+++ b/tests/postcopy-test.c
@@ -482,7 +482,7 @@ static void test_migrate(void)
usleep(10 * 1000);
} while (dest_byte_a == dest_byte_b);
- qmp("{ 'execute' : 'stop'}");
+ qmp_discard_response("{ 'execute' : 'stop'}");
/* With it stopped, check nothing changes */
qtest_memread(to, start_address, &dest_byte_c, 1);
sleep(1);
diff --git a/tests/ptimer-test-stubs.c b/tests/ptimer-test-stubs.c
index 21d4ebb..8a1b0a3 100644
--- a/tests/ptimer-test-stubs.c
+++ b/tests/ptimer-test-stubs.c
@@ -108,6 +108,11 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
return bh;
}
+void qemu_bh_delete(QEMUBH *bh)
+{
+ g_free(bh);
+}
+
void replay_bh_schedule_event(QEMUBH *bh)
{
bh->cb(bh->opaque);
diff --git a/tests/ptimer-test.c b/tests/ptimer-test.c
index b36a476..5d1a2a8 100644
--- a/tests/ptimer-test.c
+++ b/tests/ptimer-test.c
@@ -73,6 +73,7 @@ static void check_set_count(gconstpointer arg)
ptimer_set_count(ptimer, 1000);
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 1000);
g_assert_false(triggered);
+ ptimer_free(ptimer);
}
static void check_set_limit(gconstpointer arg)
@@ -92,6 +93,7 @@ static void check_set_limit(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 2000);
g_assert_cmpuint(ptimer_get_limit(ptimer), ==, 2000);
g_assert_false(triggered);
+ ptimer_free(ptimer);
}
static void check_oneshot(gconstpointer arg)
@@ -194,6 +196,7 @@ static void check_oneshot(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
g_assert_false(triggered);
+ ptimer_free(ptimer);
}
static void check_periodic(gconstpointer arg)
@@ -360,6 +363,7 @@ static void check_periodic(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==,
(no_round_down ? 8 : 7) + (wrap_policy ? 1 : 0));
g_assert_false(triggered);
+ ptimer_free(ptimer);
}
static void check_on_the_fly_mode_change(gconstpointer arg)
@@ -406,6 +410,7 @@ static void check_on_the_fly_mode_change(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
g_assert_true(triggered);
+ ptimer_free(ptimer);
}
static void check_on_the_fly_period_change(gconstpointer arg)
@@ -438,6 +443,7 @@ static void check_on_the_fly_period_change(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
g_assert_true(triggered);
+ ptimer_free(ptimer);
}
static void check_on_the_fly_freq_change(gconstpointer arg)
@@ -470,6 +476,7 @@ static void check_on_the_fly_freq_change(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
g_assert_true(triggered);
+ ptimer_free(ptimer);
}
static void check_run_with_period_0(gconstpointer arg)
@@ -487,6 +494,7 @@ static void check_run_with_period_0(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 99);
g_assert_false(triggered);
+ ptimer_free(ptimer);
}
static void check_run_with_delta_0(gconstpointer arg)
@@ -591,6 +599,7 @@ static void check_run_with_delta_0(gconstpointer arg)
g_assert_true(triggered);
ptimer_stop(ptimer);
+ ptimer_free(ptimer);
}
static void check_periodic_with_load_0(gconstpointer arg)
@@ -649,6 +658,7 @@ static void check_periodic_with_load_0(gconstpointer arg)
}
ptimer_stop(ptimer);
+ ptimer_free(ptimer);
}
static void check_oneshot_with_load_0(gconstpointer arg)
@@ -682,14 +692,14 @@ static void check_oneshot_with_load_0(gconstpointer arg)
} else {
g_assert_false(triggered);
}
+
+ ptimer_free(ptimer);
}
static void add_ptimer_tests(uint8_t policy)
{
- uint8_t *ppolicy = g_malloc(1);
- char *policy_name = g_malloc0(256);
-
- *ppolicy = policy;
+ char policy_name[256] = "";
+ char *tmp;
if (policy == PTIMER_POLICY_DEFAULT) {
g_sprintf(policy_name, "default");
@@ -715,49 +725,67 @@ static void add_ptimer_tests(uint8_t policy)
g_strlcat(policy_name, "no_counter_rounddown,", 256);
}
- g_test_add_data_func(
- g_strdup_printf("/ptimer/set_count policy=%s", policy_name),
- ppolicy, check_set_count);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/set_limit policy=%s", policy_name),
- ppolicy, check_set_limit);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/oneshot policy=%s", policy_name),
- ppolicy, check_oneshot);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/periodic policy=%s", policy_name),
- ppolicy, check_periodic);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/on_the_fly_mode_change policy=%s", policy_name),
- ppolicy, check_on_the_fly_mode_change);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/on_the_fly_period_change policy=%s", policy_name),
- ppolicy, check_on_the_fly_period_change);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/on_the_fly_freq_change policy=%s", policy_name),
- ppolicy, check_on_the_fly_freq_change);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/run_with_period_0 policy=%s", policy_name),
- ppolicy, check_run_with_period_0);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/run_with_delta_0 policy=%s", policy_name),
- ppolicy, check_run_with_delta_0);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/periodic_with_load_0 policy=%s", policy_name),
- ppolicy, check_periodic_with_load_0);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/oneshot_with_load_0 policy=%s", policy_name),
- ppolicy, check_oneshot_with_load_0);
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/set_count policy=%s", policy_name),
+ g_memdup(&policy, 1), check_set_count, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/set_limit policy=%s", policy_name),
+ g_memdup(&policy, 1), check_set_limit, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/oneshot policy=%s", policy_name),
+ g_memdup(&policy, 1), check_oneshot, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/periodic policy=%s", policy_name),
+ g_memdup(&policy, 1), check_periodic, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/on_the_fly_mode_change policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_on_the_fly_mode_change, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/on_the_fly_period_change policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_on_the_fly_period_change, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/on_the_fly_freq_change policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_on_the_fly_freq_change, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/run_with_period_0 policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_run_with_period_0, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/run_with_delta_0 policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_run_with_delta_0, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/periodic_with_load_0 policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_periodic_with_load_0, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/oneshot_with_load_0 policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_oneshot_with_load_0, g_free);
+ g_free(tmp);
}
static void add_all_ptimer_policies_comb_tests(void)
diff --git a/tests/pvpanic-test.c b/tests/pvpanic-test.c
index 3bfa678..71ebb5c 100644
--- a/tests/pvpanic-test.c
+++ b/tests/pvpanic-test.c
@@ -27,6 +27,7 @@ static void test_panic(void)
data = qdict_get_qdict(response, "data");
g_assert(qdict_haskey(data, "action"));
g_assert_cmpstr(qdict_get_str(data, "action"), ==, "pause");
+ QDECREF(response);
}
int main(int argc, char **argv)
diff --git a/tests/q35-test.c b/tests/q35-test.c
index 763fe3d..cc58f3e 100644
--- a/tests/q35-test.c
+++ b/tests/q35-test.c
@@ -71,6 +71,9 @@ static void test_smram_lock(void)
g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == false);
smram_set_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN, true);
g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == true);
+
+ g_free(pcidev);
+ qpci_free_pc(pcibus);
}
int main(int argc, char **argv)
diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out
index 4673b67..34e66db 100644
--- a/tests/qemu-iotests/049.out
+++ b/tests/qemu-iotests/049.out
@@ -95,14 +95,14 @@ qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024
qemu-img: Image size must be less than 8 EiB!
qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a non-negative number below 2^64
+qemu-img: Value '-1024' is out of range for parameter 'size'
qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k
qemu-img: Image size must be less than 8 EiB!
qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a non-negative number below 2^64
+qemu-img: Value '-1k' is out of range for parameter 'size'
qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte
@@ -110,15 +110,19 @@ qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes
qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2
-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16
+qemu-img: Parameter 'size' expects a non-negative number below 2^64
+Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta-
+and exabytes, respectively.
+qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar
qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for
qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a size
-You may use k, M, G or T suffixes for kilobytes, megabytes, gigabytes and terabytes.
+qemu-img: Parameter 'size' expects a non-negative number below 2^64
+Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta-
+and exabytes, respectively.
qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
== Check correct interpretation of suffixes for cluster size ==
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index e206ad6..c6f4eef 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -179,7 +179,7 @@ qququiquit
Testing: -drive file=TEST_DIR/t.qcow2,if=ide,readonly=on
QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: Can't use a read-only drive
+(qemu) QEMU_PROG: Block node is read-only
QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed.
Testing: -drive file=TEST_DIR/t.qcow2,if=scsi,readonly=on
@@ -201,12 +201,12 @@ QEMU X.Y.Z monitor - type 'help' for more information
Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-drive,drive=disk
QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: -device ide-drive,drive=disk: Can't use a read-only drive
+(qemu) QEMU_PROG: -device ide-drive,drive=disk: Block node is read-only
QEMU_PROG: -device ide-drive,drive=disk: Device initialization failed.
Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-hd,drive=disk
QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: -device ide-hd,drive=disk: Can't use a read-only drive
+(qemu) QEMU_PROG: -device ide-hd,drive=disk: Block node is read-only
QEMU_PROG: -device ide-hd,drive=disk: Device initialization failed.
Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk
diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
index 1d3fd04..aafcd24 100755
--- a/tests/qemu-iotests/055
+++ b/tests/qemu-iotests/055
@@ -48,7 +48,8 @@ class TestSingleDrive(iotests.QMPTestCase):
def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
- self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+ self.vm = iotests.VM().add_drive(test_img)
+ self.vm.add_drive(blockdev_target_img, interface="none")
if iotests.qemu_default_machine == 'pc':
self.vm.add_drive(None, 'media=cdrom', 'ide')
self.vm.launch()
@@ -164,7 +165,8 @@ class TestSetSpeed(iotests.QMPTestCase):
def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
- self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+ self.vm = iotests.VM().add_drive(test_img)
+ self.vm.add_drive(blockdev_target_img, interface="none")
self.vm.launch()
def tearDown(self):
@@ -247,7 +249,8 @@ class TestSingleTransaction(iotests.QMPTestCase):
def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
- self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+ self.vm = iotests.VM().add_drive(test_img)
+ self.vm.add_drive(blockdev_target_img, interface="none")
if iotests.qemu_default_machine == 'pc':
self.vm.add_drive(None, 'media=cdrom', 'ide')
self.vm.launch()
@@ -460,7 +463,7 @@ class TestDriveCompression(iotests.QMPTestCase):
qemu_img('create', '-f', fmt, blockdev_target_img,
str(TestDriveCompression.image_len), *args)
- self.vm.add_drive(blockdev_target_img, format=fmt)
+ self.vm.add_drive(blockdev_target_img, format=fmt, interface="none")
self.vm.launch()
diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out
index 08e4bb7..182acb4 100644
--- a/tests/qemu-iotests/085.out
+++ b/tests/qemu-iotests/085.out
@@ -74,7 +74,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/
=== Invalid command - snapshot node used as backing hd ===
-{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'virtio0'"}}
+{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}}
=== Invalid command - snapshot node has a backing image ===
diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141
index 3ba79f0..6d8f0a1 100755
--- a/tests/qemu-iotests/141
+++ b/tests/qemu-iotests/141
@@ -67,7 +67,7 @@ test_blockjob()
_send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'x-blockdev-del',
'arguments': {'node-name': 'drv0'}}" \
- 'error'
+ 'error' | _filter_generated_node_ids
_send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'block-job-cancel',
diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out
index 195ca1a..82e763b 100644
--- a/tests/qemu-iotests/141.out
+++ b/tests/qemu-iotests/141.out
@@ -20,7 +20,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
{"return": {}}
-{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
+{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
{"return": {}}
@@ -30,7 +30,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
{"return": {}}
-{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
+{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
{"return": {}}
diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
index 6b7edaf..54b5329 100644
--- a/tests/qemu-iotests/172.out
+++ b/tests/qemu-iotests/172.out
@@ -28,6 +28,7 @@ Testing:
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
@@ -57,6 +58,7 @@ Testing: -fda TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fdb TEST_DIR/t.qcow2
@@ -83,6 +85,7 @@ Testing: -fdb TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -93,6 +96,7 @@ Testing: -fdb TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
@@ -119,6 +123,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -129,6 +134,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
@@ -158,6 +164,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
@@ -184,6 +191,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -194,6 +202,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
@@ -220,6 +229,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -230,6 +240,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
@@ -259,6 +270,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
@@ -285,6 +297,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1
@@ -311,6 +324,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -321,6 +335,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
@@ -350,6 +365,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -376,6 +392,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1
@@ -402,6 +419,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -412,6 +430,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
@@ -441,6 +460,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -451,6 +471,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
@@ -477,6 +498,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -487,6 +509,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
@@ -513,6 +536,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
@@ -539,6 +563,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
@@ -568,6 +593,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -578,6 +604,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -604,6 +631,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -614,6 +642,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
@@ -640,6 +669,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 1 (0x1)
@@ -650,6 +680,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -676,6 +707,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 1 (0x1)
@@ -686,6 +718,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -723,6 +756,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -733,6 +767,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -759,6 +794,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -769,6 +805,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -802,6 +839,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -812,6 +850,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1
@@ -838,6 +877,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -848,6 +888,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1
@@ -874,6 +915,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 1 (0x1)
@@ -884,6 +926,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0
@@ -910,6 +953,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 1 (0x1)
@@ -920,6 +964,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0
@@ -964,6 +1009,7 @@ Testing: -device floppy
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
Testing: -device floppy,drive-type=120
@@ -990,6 +1036,7 @@ Testing: -device floppy,drive-type=120
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "120"
Testing: -device floppy,drive-type=144
@@ -1016,6 +1063,7 @@ Testing: -device floppy,drive-type=144
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -device floppy,drive-type=288
@@ -1042,6 +1090,7 @@ Testing: -device floppy,drive-type=288
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
@@ -1071,6 +1120,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "120"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-type=288
@@ -1097,6 +1147,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
@@ -1126,6 +1177,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=512
@@ -1152,6 +1204,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=4096
diff --git a/tests/tco-test.c b/tests/tco-test.c
index ef02ec5..c4c264e 100644
--- a/tests/tco-test.c
+++ b/tests/tco-test.c
@@ -42,11 +42,18 @@ typedef struct {
bool noreboot;
QPCIDevice *dev;
QPCIBar tco_io_bar;
+ QPCIBus *bus;
} TestData;
+static void test_end(TestData *d)
+{
+ g_free(d->dev);
+ qpci_free_pc(d->bus);
+ qtest_end();
+}
+
static void test_init(TestData *d)
{
- QPCIBus *bus;
QTestState *qs;
char *s;
@@ -57,8 +64,8 @@ static void test_init(TestData *d)
qtest_irq_intercept_in(qs, "ioapic");
g_free(s);
- bus = qpci_init_pc(NULL);
- d->dev = qpci_device_find(bus, QPCI_DEVFN(0x1f, 0x00));
+ d->bus = qpci_init_pc(NULL);
+ d->dev = qpci_device_find(d->bus, QPCI_DEVFN(0x1f, 0x00));
g_assert(d->dev != NULL);
qpci_device_enable(d->dev);
@@ -148,7 +155,7 @@ static void test_tco_defaults(void)
SW_IRQ_GEN_DEFAULT);
g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO_TMR), ==,
TCO_TMR_DEFAULT);
- qtest_end();
+ test_end(&d);
}
static void test_tco_timeout(void)
@@ -192,7 +199,7 @@ static void test_tco_timeout(void)
g_assert(ret == 1);
stop_tco(&d);
- qtest_end();
+ test_end(&d);
}
static void test_tco_max_timeout(void)
@@ -225,7 +232,7 @@ static void test_tco_max_timeout(void)
g_assert(ret == 1);
stop_tco(&d);
- qtest_end();
+ test_end(&d);
}
static QDict *get_watchdog_action(void)
@@ -262,7 +269,7 @@ static void test_tco_second_timeout_pause(void)
QDECREF(ad);
stop_tco(&td);
- qtest_end();
+ test_end(&td);
}
static void test_tco_second_timeout_reset(void)
@@ -287,7 +294,7 @@ static void test_tco_second_timeout_reset(void)
QDECREF(ad);
stop_tco(&td);
- qtest_end();
+ test_end(&td);
}
static void test_tco_second_timeout_shutdown(void)
@@ -312,7 +319,7 @@ static void test_tco_second_timeout_shutdown(void)
QDECREF(ad);
stop_tco(&td);
- qtest_end();
+ test_end(&td);
}
static void test_tco_second_timeout_none(void)
@@ -337,7 +344,7 @@ static void test_tco_second_timeout_none(void)
QDECREF(ad);
stop_tco(&td);
- qtest_end();
+ test_end(&td);
}
static void test_tco_ticks_counter(void)
@@ -365,7 +372,7 @@ static void test_tco_ticks_counter(void)
} while (!(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS) & TCO_TIMEOUT));
stop_tco(&d);
- qtest_end();
+ test_end(&d);
}
static void test_tco1_control_bits(void)
@@ -383,7 +390,7 @@ static void test_tco1_control_bits(void)
qpci_io_writew(d.dev, d.tco_io_bar, TCO1_CNT, val);
g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_CNT), ==,
TCO_LOCK);
- qtest_end();
+ test_end(&d);
}
static void test_tco1_status_bits(void)
@@ -412,7 +419,7 @@ static void test_tco1_status_bits(void)
g_assert(ret == 1);
qpci_io_writew(d.dev, d.tco_io_bar, TCO1_STS, val);
g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS), ==, 0);
- qtest_end();
+ test_end(&d);
}
static void test_tco2_status_bits(void)
@@ -439,7 +446,7 @@ static void test_tco2_status_bits(void)
g_assert(ret == 1);
qpci_io_writew(d.dev, d.tco_io_bar, TCO2_STS, val);
g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO2_STS), ==, 0);
- qtest_end();
+ test_end(&d);
}
int main(int argc, char **argv)
diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
index f6dfd08..4ccbda1 100644
--- a/tests/test-blockjob-txn.c
+++ b/tests/test-blockjob-txn.c
@@ -101,9 +101,9 @@ static BlockJob *test_block_job_start(unsigned int iterations,
g_assert_nonnull(bs);
snprintf(job_id, sizeof(job_id), "job%u", counter++);
- s = block_job_create(job_id, &test_block_job_driver, bs, 0,
- BLOCK_JOB_DEFAULT, test_block_job_cb,
- data, &error_abort);
+ s = block_job_create(job_id, &test_block_job_driver, bs,
+ 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT,
+ test_block_job_cb, data, &error_abort);
s->iterations = iterations;
s->use_timer = use_timer;
s->rc = rc;
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index 068c9e4..740e740 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -30,8 +30,9 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id,
BlockJob *job;
Error *errp = NULL;
- job = block_job_create(id, &test_block_job_driver, blk_bs(blk), 0,
- BLOCK_JOB_DEFAULT, block_job_cb, NULL, &errp);
+ job = block_job_create(id, &test_block_job_driver, blk_bs(blk),
+ 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, block_job_cb,
+ NULL, &errp);
if (should_succeed) {
g_assert_null(errp);
g_assert_nonnull(job);
@@ -53,13 +54,14 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id,
* BlockDriverState inserted. */
static BlockBackend *create_blk(const char *name)
{
- BlockBackend *blk = blk_new();
+ /* No I/O is performed on this device */
+ BlockBackend *blk = blk_new(0, BLK_PERM_ALL);
BlockDriverState *bs;
bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort);
g_assert_nonnull(bs);
- blk_insert_bs(blk, bs);
+ blk_insert_bs(blk, bs, &error_abort);
bdrv_unref(bs);
if (name) {
diff --git a/tests/test-filter-mirror.c b/tests/test-filter-mirror.c
index ffaaffa..9f84402 100644
--- a/tests/test-filter-mirror.c
+++ b/tests/test-filter-mirror.c
@@ -57,7 +57,7 @@ static void test_mirror(void)
};
/* send a qmp command to guarantee that 'connected' is setting to true. */
- qmp("{ 'execute' : 'query-status'}");
+ qmp_discard_response("{ 'execute' : 'query-status'}");
ret = iov_send(send_sock[0], iov, 2, 0, sizeof(size) + sizeof(send_buf));
g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size));
close(send_sock[0]);
diff --git a/tests/test-filter-redirector.c b/tests/test-filter-redirector.c
index c63b68f..0c4b8d5 100644
--- a/tests/test-filter-redirector.c
+++ b/tests/test-filter-redirector.c
@@ -99,7 +99,7 @@ static void test_redirector_tx(void)
g_assert_cmpint(recv_sock, !=, -1);
/* send a qmp command to guarantee that 'connected' is setting to true. */
- qmp("{ 'execute' : 'query-status'}");
+ qmp_discard_response("{ 'execute' : 'query-status'}");
struct iovec iov[] = {
{
@@ -184,7 +184,7 @@ static void test_redirector_rx(void)
send_sock = unix_connect(sock_path1, NULL);
g_assert_cmpint(send_sock, !=, -1);
/* send a qmp command to guarantee that 'connected' is setting to true. */
- qmp("{ 'execute' : 'query-status'}");
+ qmp_discard_response("{ 'execute' : 'query-status'}");
ret = iov_send(send_sock, iov, 2, 0, sizeof(size) + sizeof(send_buf));
g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size));
diff --git a/tests/test-io-channel-command.c b/tests/test-io-channel-command.c
index 1d1f461..46ce1ff 100644
--- a/tests/test-io-channel-command.c
+++ b/tests/test-io-channel-command.c
@@ -29,8 +29,8 @@ static void test_io_channel_command_fifo(bool async)
#define TEST_FIFO "tests/test-io-channel-command.fifo"
QIOChannel *src, *dst;
QIOChannelTest *test;
- char *srcfifo = g_strdup_printf("PIPE:%s,wronly", TEST_FIFO);
- char *dstfifo = g_strdup_printf("PIPE:%s,rdonly", TEST_FIFO);
+ const char *srcfifo = "PIPE:" TEST_FIFO ",wronly";
+ const char *dstfifo = "PIPE:" TEST_FIFO ",rdonly";
const char *srcargv[] = {
"/bin/socat", "-", srcfifo, NULL,
};
@@ -59,8 +59,6 @@ static void test_io_channel_command_fifo(bool async)
object_unref(OBJECT(src));
object_unref(OBJECT(dst));
- g_free(srcfifo);
- g_free(dstfifo);
unlink(TEST_FIFO);
}
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 363b59a..bd7c501 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -593,9 +593,10 @@ static void test_groups(void)
BlockBackend *blk1, *blk2, *blk3;
BlockBackendPublic *blkp1, *blkp2, *blkp3;
- blk1 = blk_new();
- blk2 = blk_new();
- blk3 = blk_new();
+ /* No actual I/O is performed on these devices */
+ blk1 = blk_new(0, BLK_PERM_ALL);
+ blk2 = blk_new(0, BLK_PERM_ALL);
+ blk3 = blk_new(0, BLK_PERM_ALL);
blkp1 = blk_get_public(blk1);
blkp2 = blk_get_public(blk2);
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 39f338a..f694a89 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -476,6 +476,8 @@ const VMStateDescription vmsd_tst = {
}
};
+/* test array migration */
+
#define AR_SIZE 4
typedef struct {
@@ -492,20 +494,22 @@ const VMStateDescription vmsd_arps = {
VMSTATE_END_OF_LIST()
}
};
+
+static uint8_t wire_arr_ptr_no0[] = {
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x03,
+ QEMU_VM_EOF
+};
+
static void test_arr_ptr_str_no0_save(void)
{
TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
TestArrayOfPtrToStuct sample = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
- uint8_t wire_sample[] = {
- 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01,
- 0x00, 0x00, 0x00, 0x02,
- 0x00, 0x00, 0x00, 0x03,
- QEMU_VM_EOF
- };
save_vmstate(&vmsd_arps, &sample);
- compare_vmstate(wire_sample, sizeof(wire_sample));
+ compare_vmstate(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0));
}
static void test_arr_ptr_str_no0_load(void)
@@ -514,21 +518,98 @@ static void test_arr_ptr_str_no0_load(void)
TestStructTriv ar[AR_SIZE] = {};
TestArrayOfPtrToStuct obj = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
int idx;
- uint8_t wire_sample[] = {
- 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01,
- 0x00, 0x00, 0x00, 0x02,
- 0x00, 0x00, 0x00, 0x03,
- QEMU_VM_EOF
- };
- save_buffer(wire_sample, sizeof(wire_sample));
+ save_buffer(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0));
+ SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1,
+ wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0)));
+ for (idx = 0; idx < AR_SIZE; ++idx) {
+ /* compare the target array ar with the ground truth array ar_gt */
+ g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i);
+ }
+}
+
+static uint8_t wire_arr_ptr_0[] = {
+ 0x00, 0x00, 0x00, 0x00,
+ VMS_NULLPTR_MARKER,
+ 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x03,
+ QEMU_VM_EOF
+};
+
+static void test_arr_ptr_str_0_save(void)
+{
+ TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
+ TestArrayOfPtrToStuct sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+
+ save_vmstate(&vmsd_arps, &sample);
+ compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+}
+
+static void test_arr_ptr_str_0_load(void)
+{
+ TestStructTriv ar_gt[AR_SIZE] = {{.i = 0}, {.i = 0}, {.i = 2}, {.i = 3} };
+ TestStructTriv ar[AR_SIZE] = {};
+ TestArrayOfPtrToStuct obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+ int idx;
+
+ save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1,
- wire_sample, sizeof(wire_sample)));
+ wire_arr_ptr_0, sizeof(wire_arr_ptr_0)));
for (idx = 0; idx < AR_SIZE; ++idx) {
/* compare the target array ar with the ground truth array ar_gt */
g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i);
}
+ for (idx = 0; idx < AR_SIZE; ++idx) {
+ if (idx == 1) {
+ g_assert_cmpint((uintptr_t)(obj.ar[idx]), ==, 0);
+ } else {
+ g_assert_cmpint((uintptr_t)(obj.ar[idx]), !=, 0);
+ }
+ }
+}
+
+typedef struct TestArrayOfPtrToInt {
+ int32_t *ar[AR_SIZE];
+} TestArrayOfPtrToInt;
+
+const VMStateDescription vmsd_arpp = {
+ .name = "test/arps",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_ARRAY_OF_POINTER(ar, TestArrayOfPtrToInt,
+ AR_SIZE, 0, vmstate_info_int32, int32_t*),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void test_arr_ptr_prim_0_save(void)
+{
+ int32_t ar[AR_SIZE] = {0 , 1, 2, 3};
+ TestArrayOfPtrToInt sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+
+ save_vmstate(&vmsd_arpp, &sample);
+ compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+}
+
+static void test_arr_ptr_prim_0_load(void)
+{
+ int32_t ar_gt[AR_SIZE] = {0, 1, 2, 3};
+ int32_t ar[AR_SIZE] = {3 , 42, 1, 0};
+ TestArrayOfPtrToInt obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+ int idx;
+
+ save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+ SUCCESS(load_vmstate_one(&vmsd_arpp, &obj, 1,
+ wire_arr_ptr_0, sizeof(wire_arr_ptr_0)));
+ for (idx = 0; idx < AR_SIZE; ++idx) {
+ /* compare the target array ar with the ground truth array ar_gt */
+ if (idx == 1) {
+ g_assert_cmpint(42, ==, ar[idx]);
+ } else {
+ g_assert_cmpint(ar_gt[idx], ==, ar[idx]);
+ }
+ }
}
/* test QTAILQ migration */
@@ -781,6 +862,13 @@ int main(int argc, char **argv)
test_arr_ptr_str_no0_save);
g_test_add_func("/vmstate/array/ptr/str/no0/load",
test_arr_ptr_str_no0_load);
+ g_test_add_func("/vmstate/array/ptr/str/0/save", test_arr_ptr_str_0_save);
+ g_test_add_func("/vmstate/array/ptr/str/0/load",
+ test_arr_ptr_str_0_load);
+ g_test_add_func("/vmstate/array/ptr/prim/0/save",
+ test_arr_ptr_prim_0_save);
+ g_test_add_func("/vmstate/array/ptr/prim/0/load",
+ test_arr_ptr_prim_0_load);
g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q);
g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q);
g_test_add_func("/vmstate/tmp_struct", test_tmp_struct);
diff --git a/tests/usb-hcd-ehci-test.c b/tests/usb-hcd-ehci-test.c
index 57af8a0..944eb1c 100644
--- a/tests/usb-hcd-ehci-test.c
+++ b/tests/usb-hcd-ehci-test.c
@@ -50,11 +50,8 @@ static void ehci_port_test(struct qhc *hc, int port, uint32_t expect)
/* tests */
-static void pci_init(void)
+static void test_init(void)
{
- if (pcibus) {
- return;
- }
pcibus = qpci_init_pc(NULL);
g_assert(pcibus != NULL);
@@ -64,6 +61,15 @@ static void pci_init(void)
qusb_pci_init_one(pcibus, &ehci1, QPCI_DEVFN(0x1d, 7), 0);
}
+static void test_deinit(void)
+{
+ uhci_deinit(&uhci1);
+ uhci_deinit(&uhci2);
+ uhci_deinit(&uhci3);
+ uhci_deinit(&ehci1);
+ qpci_free_pc(pcibus);
+}
+
static void pci_uhci_port_1(void)
{
g_assert(pcibus != NULL);
@@ -142,7 +148,7 @@ int main(int argc, char **argv)
int ret;
g_test_init(&argc, &argv, NULL);
- qtest_add_func("/ehci/pci/init", pci_init);
+
qtest_add_func("/ehci/pci/uhci-port-1", pci_uhci_port_1);
qtest_add_func("/ehci/pci/ehci-port-1", pci_ehci_port_1);
qtest_add_func("/ehci/pci/ehci-config", pci_ehci_config);
@@ -161,7 +167,10 @@ int main(int argc, char **argv)
"-drive if=none,id=usbcdrom,media=cdrom "
"-device usb-tablet,bus=ich9-ehci-1.0,port=1,usb_version=1 "
"-device usb-storage,bus=ich9-ehci-1.0,port=2,drive=usbcdrom ");
+
+ test_init();
ret = g_test_run();
+ test_deinit();
qtest_end();
diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c
index e956b9c..f25bae5 100644
--- a/tests/usb-hcd-uhci-test.c
+++ b/tests/usb-hcd-uhci-test.c
@@ -28,6 +28,7 @@ static void test_port(int port)
g_assert(port > 0);
qusb_pci_init_one(qs->pcibus, &uhci, QPCI_DEVFN(0x1d, 0), 4);
uhci_port_test(&uhci, port - 1, UHCI_PORT_CCS);
+ uhci_deinit(&uhci);
}
static void test_port_1(void)
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 2c45c7b..a61896c 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -139,6 +139,7 @@ enum {
};
typedef struct TestServer {
+ QPCIBus *bus;
gchar *socket_path;
gchar *mig_path;
gchar *chr_name;
@@ -160,14 +161,13 @@ static const char *root;
static void init_virtio_dev(TestServer *s)
{
- QPCIBus *bus;
QVirtioPCIDevice *dev;
uint32_t features;
- bus = qpci_init_pc(NULL);
- g_assert_nonnull(bus);
+ s->bus = qpci_init_pc(NULL);
+ g_assert_nonnull(s->bus);
- dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET);
+ dev = qvirtio_pci_device_find(s->bus, VIRTIO_ID_NET);
g_assert_nonnull(dev);
qvirtio_pci_device_enable(dev);
@@ -180,6 +180,7 @@ static void init_virtio_dev(TestServer *s)
qvirtio_set_features(&dev->vdev, features);
qvirtio_set_driver_ok(&dev->vdev);
+ qvirtio_pci_device_free(dev);
}
static void wait_for_fds(TestServer *s)
@@ -507,6 +508,8 @@ static gboolean _test_server_free(TestServer *server)
g_free(server->mig_path);
g_free(server->chr_name);
+ qpci_free_pc(server->bus);
+
g_free(server);
return FALSE;
diff --git a/tests/virtio-9p-test.c b/tests/virtio-9p-test.c
index 9556291..43a1ad8 100644
--- a/tests/virtio-9p-test.c
+++ b/tests/virtio-9p-test.c
@@ -80,7 +80,7 @@ static void qvirtio_9p_pci_stop(QVirtIO9P *v9p)
{
qvirtqueue_cleanup(v9p->dev->bus, v9p->vq, v9p->qs->alloc);
qvirtio_pci_device_disable(container_of(v9p->dev, QVirtioPCIDevice, vdev));
- g_free(v9p->dev);
+ qvirtio_pci_device_free((QVirtioPCIDevice *)v9p->dev);
qvirtio_9p_stop(v9p);
}
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 0e32e41..1eee95d 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -108,7 +108,7 @@ static QVirtioPCIDevice *virtio_blk_pci_init(QPCIBus *bus, int slot)
{
QVirtioPCIDevice *dev;
- dev = qvirtio_pci_device_find(bus, VIRTIO_ID_BLOCK);
+ dev = qvirtio_pci_device_find_slot(bus, VIRTIO_ID_BLOCK, slot);
g_assert(dev != NULL);
g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_BLOCK);
g_assert_cmphex(dev->pdev->devfn, ==, ((slot << 3) | PCI_FN));
@@ -296,7 +296,7 @@ static void pci_basic(void)
/* End test */
qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
qtest_shutdown(qs);
}
@@ -389,7 +389,7 @@ static void pci_indirect(void)
/* End test */
qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
qtest_shutdown(qs);
}
@@ -409,15 +409,16 @@ static void pci_config(void)
qvirtio_set_driver_ok(&dev->vdev);
- qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
- " 'size': %d } }", n_size);
+ qmp_discard_response("{ 'execute': 'block_resize', "
+ " 'arguments': { 'device': 'drive0', "
+ " 'size': %d } }", n_size);
qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
capacity = qvirtio_config_readq(&dev->vdev, 0);
g_assert_cmpint(capacity, ==, n_size / 512);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
qtest_shutdown(qs);
}
@@ -458,8 +459,9 @@ static void pci_msix(void)
qvirtio_set_driver_ok(&dev->vdev);
- qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
- " 'size': %d } }", n_size);
+ qmp_discard_response("{ 'execute': 'block_resize', "
+ " 'arguments': { 'device': 'drive0', "
+ " 'size': %d } }", n_size);
qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
@@ -524,7 +526,7 @@ static void pci_msix(void)
qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
qpci_msix_disable(dev->pdev);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
qtest_shutdown(qs);
}
@@ -640,7 +642,7 @@ static void pci_idx(void)
qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
qpci_msix_disable(dev->pdev);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
qtest_shutdown(qs);
}
@@ -659,7 +661,7 @@ static void pci_hotplug(void)
dev = virtio_blk_pci_init(qs->pcibus, PCI_SLOT_HP);
g_assert(dev);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
/* unplug secondary disk */
if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
@@ -691,8 +693,9 @@ static void mmio_basic(void)
test_basic(&dev->vdev, alloc, vq);
- qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
- " 'size': %d } }", n_size);
+ qmp_discard_response("{ 'execute': 'block_resize', "
+ " 'arguments': { 'device': 'drive0', "
+ " 'size': %d } }", n_size);
qvirtio_wait_queue_isr(&dev->vdev, vq, QVIRTIO_BLK_TIMEOUT_US);
diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c
index 69220ef..0eabd56 100644
--- a/tests/virtio-scsi-test.c
+++ b/tests/virtio-scsi-test.c
@@ -63,7 +63,7 @@ static void qvirtio_scsi_pci_free(QVirtIOSCSI *vs)
qvirtqueue_cleanup(vs->dev->bus, vs->vq[i], vs->qs->alloc);
}
qvirtio_pci_device_disable(container_of(vs->dev, QVirtioPCIDevice, vdev));
- g_free(vs->dev);
+ qvirtio_pci_device_free((QVirtioPCIDevice *)vs->dev);
qvirtio_scsi_stop(vs->qs);
g_free(vs);
}
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 419f252..5ce1b5c 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -179,7 +179,7 @@ void parse_option_size(const char *name, const char *value,
err = qemu_strtosz(value, NULL, &size);
if (err == -ERANGE) {
- error_setg(errp, "Value '%s' is too large for parameter '%s'",
+ error_setg(errp, "Value '%s' is out of range for parameter '%s'",
value, name);
return;
}
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index ff620ec..6cf70b9 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -355,11 +355,6 @@ void timer_deinit(QEMUTimer *ts)
ts->timer_list = NULL;
}
-void timer_free(QEMUTimer *ts)
-{
- g_free(ts);
-}
-
static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
{
QEMUTimer **pt, *t;