From 5dd7a535b71a0f2f8e7af75c5d694174359ce323 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 16 Jun 2015 13:45:07 +0200 Subject: block/iscsi: add support for request timeouts libiscsi starting with 1.15 will properly support timeout of iscsi commands. The default will remain no timeout, but this can be changed via cmdline parameters, e.g.: qemu -iscsi timeout=30 -drive file=iscsi://... If a timeout occurs a reconnect is scheduled and the timed out command will be requeued for processing after a successful reconnect. The required API call iscsi_set_timeout is present since libiscsi 1.10 which was released in October 2013. However, due to some bugs in the libiscsi code the use is not recommended before version 1.15. Please note that this patch bumps the libiscsi requirement to 1.10 to have all function and macros defined. The patch fixes also a off-by-one error in the NOP timeout calculation which was fixed while touching these code parts. Signed-off-by: Peter Lieven Message-id: 1434455107-19328-1-git-send-email-pl@kamp.de Signed-off-by: Stefan Hajnoczi --- block/iscsi.c | 87 ++++++++++++++++++++++++++++++++++++++++++--------------- configure | 6 ++-- qemu-options.hx | 4 +++ 3 files changed, 72 insertions(+), 25 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index 49cee4d..a6b8fe2 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -70,6 +70,7 @@ typedef struct IscsiLun { bool dpofua; bool has_write_same; bool force_next_flush; + bool request_timed_out; } IscsiLun; typedef struct IscsiTask { @@ -100,7 +101,8 @@ typedef struct IscsiAIOCB { #endif } IscsiAIOCB; -#define EVENT_INTERVAL 250 +/* libiscsi uses time_t so its enough to process events every second */ +#define EVENT_INTERVAL 1000 #define NOP_INTERVAL 5000 #define MAX_NOP_FAILURES 3 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times) @@ -187,13 +189,18 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, iTask->do_retry = 1; goto out; } - /* status 0x28 is SCSI_TASK_SET_FULL. It was first introduced - * in libiscsi 1.10.0. Hardcode this value here to avoid - * the need to bump the libiscsi requirement to 1.10.0 */ - if (status == SCSI_STATUS_BUSY || status == 0x28) { + if (status == SCSI_STATUS_BUSY || status == SCSI_STATUS_TIMEOUT || + status == SCSI_STATUS_TASK_SET_FULL) { unsigned retry_time = exp_random(iscsi_retry_times[iTask->retries - 1]); - error_report("iSCSI Busy/TaskSetFull (retry #%u in %u ms): %s", + if (status == SCSI_STATUS_TIMEOUT) { + /* make sure the request is rescheduled AFTER the + * reconnect is initiated */ + retry_time = EVENT_INTERVAL * 2; + iTask->iscsilun->request_timed_out = true; + } + error_report("iSCSI Busy/TaskSetFull/TimeOut" + " (retry #%u in %u ms): %s", iTask->retries, retry_time, iscsi_get_error(iscsi)); aio_timer_init(iTask->iscsilun->aio_context, @@ -277,20 +284,26 @@ iscsi_set_events(IscsiLun *iscsilun) iscsilun); iscsilun->events = ev; } - - /* newer versions of libiscsi may return zero events. In this - * case start a timer to ensure we are able to return to service - * once this situation changes. */ - if (!ev) { - timer_mod(iscsilun->event_timer, - qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL); - } } -static void iscsi_timed_set_events(void *opaque) +static void iscsi_timed_check_events(void *opaque) { IscsiLun *iscsilun = opaque; + + /* check for timed out requests */ + iscsi_service(iscsilun->iscsi, 0); + + if (iscsilun->request_timed_out) { + iscsilun->request_timed_out = false; + iscsi_reconnect(iscsilun->iscsi); + } + + /* newer versions of libiscsi may return zero events. Ensure we are able + * to return to service once this situation changes. */ iscsi_set_events(iscsilun); + + timer_mod(iscsilun->event_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL); } static void @@ -1093,16 +1106,37 @@ static char *parse_initiator_name(const char *target) return iscsi_name; } +static int parse_timeout(const char *target) +{ + QemuOptsList *list; + QemuOpts *opts; + const char *timeout; + + list = qemu_find_opts("iscsi"); + if (list) { + opts = qemu_opts_find(list, target); + if (!opts) { + opts = QTAILQ_FIRST(&list->head); + } + if (opts) { + timeout = qemu_opt_get(opts, "timeout"); + if (timeout) { + return atoi(timeout); + } + } + } + + return 0; +} + static void iscsi_nop_timed_event(void *opaque) { IscsiLun *iscsilun = opaque; - if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) { + if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) { error_report("iSCSI: NOP timeout. Reconnecting..."); - iscsi_reconnect(iscsilun->iscsi); - } - - if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) { + iscsilun->request_timed_out = true; + } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) { error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages."); return; } @@ -1260,10 +1294,13 @@ static void iscsi_attach_aio_context(BlockDriverState *bs, timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); - /* Prepare a timer for a delayed call to iscsi_set_events */ + /* Set up a timer for periodic calls to iscsi_set_events and to + * scan for command timeout */ iscsilun->event_timer = aio_timer_new(iscsilun->aio_context, QEMU_CLOCK_REALTIME, SCALE_MS, - iscsi_timed_set_events, iscsilun); + iscsi_timed_check_events, iscsilun); + timer_mod(iscsilun->event_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL); } static void iscsi_modesense_sync(IscsiLun *iscsilun) @@ -1388,6 +1425,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, goto out; } + iscsi_set_timeout(iscsi, parse_timeout(iscsi_url->target)); + if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) { error_setg(errp, "iSCSI: Failed to connect to LUN : %s", iscsi_get_error(iscsi)); @@ -1736,6 +1775,10 @@ static QemuOptsList qemu_iscsi_opts = { .name = "initiator-name", .type = QEMU_OPT_STRING, .help = "Initiator iqn name to use when connecting", + },{ + .name = "timeout", + .type = QEMU_OPT_NUMBER, + .help = "Request timeout in seconds (default 0 = no timeout)", }, { /* end of list */ } }, diff --git a/configure b/configure index 3063739..08f9a22 100755 --- a/configure +++ b/configure @@ -3618,15 +3618,15 @@ if compile_prog "" "" ; then fi ########################################## -# Do we have libiscsi >= 1.9.0 +# Do we have libiscsi >= 1.10.0 if test "$libiscsi" != "no" ; then - if $pkg_config --atleast-version=1.9.0 libiscsi; then + if $pkg_config --atleast-version=1.10.0 libiscsi; then libiscsi="yes" libiscsi_cflags=$($pkg_config --cflags libiscsi) libiscsi_libs=$($pkg_config --libs libiscsi) else if test "$libiscsi" = "yes" ; then - feature_not_found "libiscsi" "Install libiscsi >= 1.9.0" + feature_not_found "libiscsi" "Install libiscsi >= 1.10.0" fi libiscsi="no" fi diff --git a/qemu-options.hx b/qemu-options.hx index e6e3895..0bdd59f 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2294,6 +2294,9 @@ By default qemu will use the iSCSI initiator-name 'iqn.2008-11.org.linux-kvm[:]' but this can also be set from the command line or a configuration file. +Since version Qemu 2.4 it is possible to specify a iSCSI request timeout to detect +stalled requests and force a reestablishment of the session. The timeout +is specified in seconds. The default is 0 which means no timeout. Example (without authentication): @example @@ -2321,6 +2324,7 @@ DEF("iscsi", HAS_ARG, QEMU_OPTION_iscsi, "-iscsi [user=user][,password=password]\n" " [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE\n" " [,initiator-name=initiator-iqn][,id=target-iqn]\n" + " [,timeout=timeout]\n" " iSCSI session parameters\n", QEMU_ARCH_ALL) STEXI -- cgit v1.1 From 3e5feb6202149e8a963a33b911216e40d790f1d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20Makovi=C4=8Dka?= Date: Wed, 24 Jun 2015 07:05:25 +0200 Subject: qcow2: Handle EAGAIN returned from update_refcount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a crash during image compression Signed-off-by: Jindřich Makovička Tested-by: Richard W.M. Jones Reviewed-by: Max Reitz Signed-off-by: Stefan Hajnoczi --- block/qcow2-refcount.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 0632fc3..b0ee42d 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -940,19 +940,21 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) } free_in_cluster = s->cluster_size - offset_into_cluster(s, offset); - if (!offset || free_in_cluster < size) { - int64_t new_cluster = alloc_clusters_noref(bs, s->cluster_size); - if (new_cluster < 0) { - return new_cluster; - } + do { + if (!offset || free_in_cluster < size) { + int64_t new_cluster = alloc_clusters_noref(bs, s->cluster_size); + if (new_cluster < 0) { + return new_cluster; + } - if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) { - offset = new_cluster; + if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) { + offset = new_cluster; + } } - } - assert(offset); - ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER); + assert(offset); + ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER); + } while (ret == -EAGAIN); if (ret < 0) { return ret; } -- cgit v1.1 From 4b80ab2b7d950d5b22647b364e37eb81c756f061 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 4 Jun 2015 20:20:34 -0400 Subject: qapi: Rename 'dirty-bitmap' mode to 'incremental' If we wish to make differential backups a feature that's easy to access, it might be pertinent to rename the "dirty-bitmap" mode to "incremental" to make it clear what /type/ of backup the dirty-bitmap is helping us perform. This is an API breaking change, but 2.4 has not yet gone live, so we have this flexibility. Signed-off-by: John Snow Message-id: 1433463642-21840-2-git-send-email-jsnow@redhat.com Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- block/backup.c | 10 +++++----- block/mirror.c | 4 ++-- docs/bitmaps.md | 8 ++++---- include/block/block_int.h | 2 +- qapi/block-core.json | 8 ++++---- qmp-commands.hx | 6 +++--- tests/qemu-iotests/124 | 6 +++--- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/block/backup.c b/block/backup.c index 4a1af68..d3c7d9f 100644 --- a/block/backup.c +++ b/block/backup.c @@ -38,7 +38,7 @@ typedef struct CowRequest { typedef struct BackupBlockJob { BlockJob common; BlockDriverState *target; - /* bitmap for sync=dirty-bitmap */ + /* bitmap for sync=incremental */ BdrvDirtyBitmap *sync_bitmap; MirrorSyncMode sync_mode; RateLimit limit; @@ -365,7 +365,7 @@ static void coroutine_fn backup_run(void *opaque) qemu_coroutine_yield(); job->common.busy = true; } - } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) { + } else if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { ret = backup_run_incremental(job); } else { /* Both FULL and TOP SYNC_MODE's require copying.. */ @@ -497,10 +497,10 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } - if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) { + if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { if (!sync_bitmap) { error_setg(errp, "must provide a valid bitmap name for " - "\"dirty-bitmap\" sync mode"); + "\"incremental\" sync mode"); return; } @@ -535,7 +535,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, job->on_target_error = on_target_error; job->target = target; job->sync_mode = sync_mode; - job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ? + job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ? sync_bitmap : NULL; job->common.len = len; job->common.co = qemu_coroutine_create(backup_run); diff --git a/block/mirror.c b/block/mirror.c index 048e452..49aa8bc 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -710,8 +710,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, bool is_none_mode; BlockDriverState *base; - if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) { - error_setg(errp, "Sync mode 'dirty-bitmap' not supported"); + if (mode == MIRROR_SYNC_MODE_INCREMENTAL) { + error_setg(errp, "Sync mode 'incremental' not supported"); return; } is_none_mode = mode == MIRROR_SYNC_MODE_NONE; diff --git a/docs/bitmaps.md b/docs/bitmaps.md index f066b48..fa87f07 100644 --- a/docs/bitmaps.md +++ b/docs/bitmaps.md @@ -210,7 +210,7 @@ full backup as a backing image. "bitmap": "bitmap0", "target": "incremental.0.img", "format": "qcow2", - "sync": "dirty-bitmap", + "sync": "incremental", "mode": "existing" } } @@ -235,7 +235,7 @@ full backup as a backing image. "bitmap": "bitmap0", "target": "incremental.1.img", "format": "qcow2", - "sync": "dirty-bitmap", + "sync": "incremental", "mode": "existing" } } @@ -275,7 +275,7 @@ full backup as a backing image. "bitmap": "bitmap0", "target": "incremental.0.img", "format": "qcow2", - "sync": "dirty-bitmap", + "sync": "incremental", "mode": "existing" } } @@ -308,7 +308,7 @@ full backup as a backing image. "bitmap": "bitmap0", "target": "incremental.0.img", "format": "qcow2", - "sync": "dirty-bitmap", + "sync": "incremental", "mode": "existing" } } diff --git a/include/block/block_int.h b/include/block/block_int.h index b0476fc..44a5cf7 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -635,7 +635,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, * @target: Block device to write to. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @sync_mode: What parts of the disk image should be copied to the destination. - * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_DIRTY_BITMAP. + * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. * @cb: Completion function for the job. diff --git a/qapi/block-core.json b/qapi/block-core.json index 5a368f6..ed3f890 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -536,12 +536,12 @@ # # @none: only copy data written from now on # -# @dirty-bitmap: only copy data described by the dirty bitmap. Since: 2.4 +# @incremental: only copy data described by the dirty bitmap. Since: 2.4 # # Since: 1.3 ## { 'enum': 'MirrorSyncMode', - 'data': ['top', 'full', 'none', 'dirty-bitmap'] } + 'data': ['top', 'full', 'none', 'incremental'] } ## # @BlockJobType: @@ -724,8 +724,8 @@ # # @speed: #optional the maximum speed, in bytes per second # -# @bitmap: #optional the name of dirty bitmap if sync is "dirty-bitmap". -# Must be present if sync is "dirty-bitmap", must NOT be present +# @bitmap: #optional the name of dirty bitmap if sync is "incremental". +# Must be present if sync is "incremental", must NOT be present # otherwise. (Since 2.4) # # @on-source-error: #optional the action to take on an error on the source, diff --git a/qmp-commands.hx b/qmp-commands.hx index a05d25f..87fa172 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1137,10 +1137,10 @@ Arguments: (json-string, optional) - "sync": what parts of the disk image should be copied to the destination; possibilities include "full" for all the disk, "top" for only the sectors - allocated in the topmost image, "dirty-bitmap" for only the dirty sectors in + allocated in the topmost image, "incremental" for only the dirty sectors in the bitmap, or "none" to only replicate new I/O (MirrorSyncMode). -- "bitmap": dirty bitmap name for sync==dirty-bitmap. Must be present if sync - is "dirty-bitmap", must NOT be present otherwise. +- "bitmap": dirty bitmap name for sync==incremental. Must be present if sync + is "incremental", must NOT be present otherwise. - "mode": whether and how QEMU should create a new image (NewImageMode, optional, default 'absolute-paths') - "speed": the maximum speed, in bytes per second (json-int, optional) diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124 index 8abce2f..9ccd118 100644 --- a/tests/qemu-iotests/124 +++ b/tests/qemu-iotests/124 @@ -187,7 +187,7 @@ class TestIncrementalBackup(iotests.QMPTestCase): target = self.prepare_backup(bitmap, parent) res = self.do_qmp_backup(device=bitmap.drive['id'], - sync='dirty-bitmap', bitmap=bitmap.name, + sync='incremental', bitmap=bitmap.name, format=bitmap.drive['fmt'], target=target, mode='existing') if not res: @@ -325,7 +325,7 @@ class TestIncrementalBackup(iotests.QMPTestCase): self.assert_no_active_block_jobs() self.files.append(self.err_img) result = self.vm.qmp('drive-backup', device=self.drives[0]['id'], - sync='dirty-bitmap', format=self.drives[0]['fmt'], + sync='incremental', format=self.drives[0]['fmt'], target=self.err_img) self.assert_qmp(result, 'error/class', 'GenericError') @@ -334,7 +334,7 @@ class TestIncrementalBackup(iotests.QMPTestCase): self.assert_no_active_block_jobs() self.files.append(self.err_img) result = self.vm.qmp('drive-backup', device=self.drives[0]['id'], - sync='dirty-bitmap', bitmap='unknown', + sync='incremental', bitmap='unknown', format=self.drives[0]['fmt'], target=self.err_img) self.assert_qmp(result, 'error/class', 'GenericError') -- cgit v1.1 From 126b8bbdfe8bc4042f13f230a4b36f90646f47c6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 28 May 2015 16:17:09 +0200 Subject: blockdev: no need to drain+flush in hmp_drive_del bdrv_close already does that, and in fact hmp_drive_del would need another drain after the flush (which bdrv_close does). So remove the duplication. Signed-off-by: Paolo Bonzini Reviewed-by: Alberto Garcia Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Message-id: 1432822629-25401-1-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- blockdev.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/blockdev.c b/blockdev.c index b354676..4d5e016 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2167,9 +2167,6 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) return; } - /* quiesce block driver; prevent further io */ - bdrv_drain_all(); - bdrv_flush(bs); bdrv_close(bs); /* if we have a device attached to this BlockDriverState -- cgit v1.1 From 471fae3c98d4f44b1957eb09d51ace440c585a20 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 12 Jun 2015 16:01:29 +0300 Subject: timer: Move NANOSECONDS_PER_SECONDS to timer.h We want to be able to reuse this define by making it common to multiple QEMU modules. This also makes it an integer since there's no need for it to be a float. Signed-off-by: Alberto Garcia Message-id: 6375912849da2ab561046dd013684535ccecca44.1434113783.git.berto@igalia.com Signed-off-by: Stefan Hajnoczi --- include/qemu/throttle.h | 2 -- include/qemu/timer.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h index 5af76f0..995b2d5 100644 --- a/include/qemu/throttle.h +++ b/include/qemu/throttle.h @@ -29,8 +29,6 @@ #include "qemu-common.h" #include "qemu/timer.h" -#define NANOSECONDS_PER_SECOND 1000000000.0 - typedef enum { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ, diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 9e4f90f..5923d60 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -5,6 +5,8 @@ #include "qemu-common.h" #include "qemu/notify.h" +#define NANOSECONDS_PER_SECOND 1000000000LL + /* timers */ #define SCALE_MS 1000000 -- cgit v1.1 From e0cf11f31c24cfb17f44ed46c254d84c78e7f6e9 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 12 Jun 2015 16:01:30 +0300 Subject: timer: Use a single definition of NSEC_PER_SEC for the whole codebase Signed-off-by: Alberto Garcia Message-id: c6e55468856ba0b8f95913c4da111cc0ef266541.1434113783.git.berto@igalia.com Signed-off-by: Stefan Hajnoczi --- hw/ppc/ppc.c | 2 -- hw/ppc/spapr_rtc.c | 3 +-- hw/timer/mc146818rtc.c | 1 - hw/usb/hcd-ehci.c | 2 +- include/qemu/timer.h | 2 +- tests/rtl8139-test.c | 10 +++++----- tests/test-throttle.c | 8 ++++---- tests/wdt_ib700-test.c | 15 +++++++-------- util/throttle.c | 4 ++-- 9 files changed, 21 insertions(+), 26 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 99db56c..2a4b8e1 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -51,8 +51,6 @@ # define LOG_TB(...) do { } while (0) #endif -#define NSEC_PER_SEC 1000000000LL - static void cpu_ppc_tb_stop (CPUPPCState *env); static void cpu_ppc_tb_start (CPUPPCState *env); diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c index 83eb7c1..9da3746 100644 --- a/hw/ppc/spapr_rtc.c +++ b/hw/ppc/spapr_rtc.c @@ -26,6 +26,7 @@ * */ #include "cpu.h" +#include "qemu/timer.h" #include "sysemu/sysemu.h" #include "hw/ppc/spapr.h" #include "qapi-event.h" @@ -40,8 +41,6 @@ struct sPAPRRTCState { int64_t ns_offset; }; -#define NSEC_PER_SEC 1000000000LL - void spapr_rtc_read(DeviceState *dev, struct tm *tm, uint32_t *ns) { sPAPRRTCState *rtc = SPAPR_RTC(dev); diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c index 2e3ffc8..954c34d 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -48,7 +48,6 @@ # define DPRINTF_C(format, ...) do { } while (0) #endif -#define NSEC_PER_SEC 1000000000LL #define SEC_PER_MIN 60 #define MIN_PER_HOUR 60 #define SEC_PER_HOUR 3600 diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index d4d7547..d7cd40b 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -32,7 +32,7 @@ #include "trace.h" #define FRAME_TIMER_FREQ 1000 -#define FRAME_TIMER_NS (1000000000 / FRAME_TIMER_FREQ) +#define FRAME_TIMER_NS (NSEC_PER_SEC / FRAME_TIMER_FREQ) #define UFRAME_TIMER_NS (FRAME_TIMER_NS / 8) #define NB_MAXINTRATE 8 // Max rate at which controller issues ints diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 5923d60..4dda20b 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -5,7 +5,7 @@ #include "qemu-common.h" #include "qemu/notify.h" -#define NANOSECONDS_PER_SECOND 1000000000LL +#define NSEC_PER_SEC 1000000000LL /* timers */ diff --git a/tests/rtl8139-test.c b/tests/rtl8139-test.c index 4e0bf02..3bff0e3 100644 --- a/tests/rtl8139-test.c +++ b/tests/rtl8139-test.c @@ -12,6 +12,7 @@ #include "libqtest.h" #include "libqos/pci-pc.h" #include "qemu/osdep.h" +#include "qemu/timer.h" #include "qemu-common.h" /* Tests only initialization so far. TODO: Replace with functional tests */ @@ -20,7 +21,6 @@ static void nop(void) } #define CLK 33000000 -#define NS_PER_SEC 1000000000ULL static QPCIBus *pcibus; static QPCIDevice *dev; @@ -86,7 +86,7 @@ static void test_timer(void) fatal("time too big %u\n", curr); } for (cnt = 0; ; ) { - clock_step(1 * NS_PER_SEC); + clock_step(1 * NSEC_PER_SEC); prev = curr; curr = in_Timer(); @@ -125,7 +125,7 @@ static void test_timer(void) out_IntrStatus(0x4000); curr = in_Timer(); out_TimerInt(curr + 0.5 * CLK); - clock_step(1 * NS_PER_SEC); + clock_step(1 * NSEC_PER_SEC); out_Timer(0); if ((in_IntrStatus() & 0x4000) == 0) { fatal("we should have an interrupt here!\n"); @@ -137,7 +137,7 @@ static void test_timer(void) out_IntrStatus(0x4000); curr = in_Timer(); out_TimerInt(curr + 0.5 * CLK); - clock_step(1 * NS_PER_SEC); + clock_step(1 * NSEC_PER_SEC); out_TimerInt(0); if ((in_IntrStatus() & 0x4000) == 0) { fatal("we should have an interrupt here!\n"); @@ -148,7 +148,7 @@ static void test_timer(void) next = curr + 5.0 * CLK; out_TimerInt(next); for (cnt = 0; ; ) { - clock_step(1 * NS_PER_SEC); + clock_step(1 * NSEC_PER_SEC); prev = curr; curr = in_Timer(); diff = (curr-prev) & 0xffffffffu; diff --git a/tests/test-throttle.c b/tests/test-throttle.c index 0168445..33b6b95 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -40,19 +40,19 @@ static void test_leak_bucket(void) bkt.level = 1.5; /* leak an op work of time */ - throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150); + throttle_leak_bucket(&bkt, NSEC_PER_SEC / 150); g_assert(bkt.avg == 150); g_assert(bkt.max == 15); g_assert(double_cmp(bkt.level, 0.5)); /* leak again emptying the bucket */ - throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150); + throttle_leak_bucket(&bkt, NSEC_PER_SEC / 150); g_assert(bkt.avg == 150); g_assert(bkt.max == 15); g_assert(double_cmp(bkt.level, 0)); /* check that the bucket level won't go lower */ - throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150); + throttle_leak_bucket(&bkt, NSEC_PER_SEC / 150); g_assert(bkt.avg == 150); g_assert(bkt.max == 15); g_assert(double_cmp(bkt.level, 0)); @@ -90,7 +90,7 @@ static void test_compute_wait(void) bkt.level = 15.5; wait = throttle_compute_wait(&bkt); /* time required to do half an operation */ - result = (int64_t) NANOSECONDS_PER_SECOND / 150 / 2; + result = (int64_t) NSEC_PER_SEC / 150 / 2; g_assert(wait == result); } diff --git a/tests/wdt_ib700-test.c b/tests/wdt_ib700-test.c index 513a533..10a5472 100644 --- a/tests/wdt_ib700-test.c +++ b/tests/wdt_ib700-test.c @@ -11,8 +11,7 @@ #include #include "libqtest.h" #include "qemu/osdep.h" - -#define NS_PER_SEC 1000000000ULL +#include "qemu/timer.h" static void qmp_check_no_event(void) { @@ -41,29 +40,29 @@ static QDict *qmp_get_event(const char *name) static QDict *ib700_program_and_wait(QTestState *s) { - clock_step(NS_PER_SEC * 40); + clock_step(NSEC_PER_SEC * 40); qmp_check_no_event(); /* 2 second limit */ outb(0x443, 14); /* Ping */ - clock_step(NS_PER_SEC); + clock_step(NSEC_PER_SEC); qmp_check_no_event(); outb(0x443, 14); /* Disable */ - clock_step(NS_PER_SEC); + clock_step(NSEC_PER_SEC); qmp_check_no_event(); outb(0x441, 1); - clock_step(3 * NS_PER_SEC); + clock_step(3 * NSEC_PER_SEC); qmp_check_no_event(); /* Enable and let it fire */ outb(0x443, 13); - clock_step(3 * NS_PER_SEC); + clock_step(3 * NSEC_PER_SEC); qmp_check_no_event(); - clock_step(2 * NS_PER_SEC); + clock_step(2 * NSEC_PER_SEC); return qmp_get_event("WATCHDOG"); } diff --git a/util/throttle.c b/util/throttle.c index 706c131..ec70476 100644 --- a/util/throttle.c +++ b/util/throttle.c @@ -36,7 +36,7 @@ void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns) double leak; /* compute how much to leak */ - leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND; + leak = (bkt->avg * (double) delta_ns) / NSEC_PER_SEC; /* make the bucket leak */ bkt->level = MAX(bkt->level - leak, 0); @@ -72,7 +72,7 @@ static void throttle_do_leak(ThrottleState *ts, int64_t now) */ static int64_t throttle_do_compute_wait(double limit, double extra) { - double wait = extra * NANOSECONDS_PER_SECOND; + double wait = extra * NSEC_PER_SEC; wait /= limit; return wait; } -- cgit v1.1 From ba3f0e2545c365ebe1dbddb0e53058710d41881e Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 8 Jun 2015 13:56:07 +0800 Subject: block: Add bdrv_get_block_status_above Like bdrv_is_allocated_above, this function follows the backing chain until seeing BDRV_BLOCK_ALLOCATED. Base is not included. Reimplement bdrv_is_allocated on top. [Initialized bdrv_co_get_block_status_above() ret to 0 to silence mingw64 compiler warning about the unitialized variable. assert(bs != base) prevents that case but I suppose the program could be compiled with -DNDEBUG. --Stefan] Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/io.c | 56 +++++++++++++++++++++++++++++++++++++++++---------- include/block/block.h | 4 ++++ 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/block/io.c b/block/io.c index e295992..ccf79c3 100644 --- a/block/io.c +++ b/block/io.c @@ -1531,28 +1531,54 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, return ret; } -/* Coroutine wrapper for bdrv_get_block_status() */ -static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque) +static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + int64_t sector_num, + int nb_sectors, + int *pnum) +{ + BlockDriverState *p; + int64_t ret = 0; + + assert(bs != base); + for (p = bs; p != base; p = p->backing_hd) { + ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum); + if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) { + break; + } + /* [sector_num, pnum] unallocated on this layer, which could be only + * the first part of [sector_num, nb_sectors]. */ + nb_sectors = MIN(nb_sectors, *pnum); + } + return ret; +} + +/* Coroutine wrapper for bdrv_get_block_status_above() */ +static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque) { BdrvCoGetBlockStatusData *data = opaque; - BlockDriverState *bs = data->bs; - data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors, - data->pnum); + data->ret = bdrv_co_get_block_status_above(data->bs, data->base, + data->sector_num, + data->nb_sectors, + data->pnum); data->done = true; } /* - * Synchronous wrapper around bdrv_co_get_block_status(). + * Synchronous wrapper around bdrv_co_get_block_status_above(). * - * See bdrv_co_get_block_status() for details. + * See bdrv_co_get_block_status_above() for details. */ -int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) +int64_t bdrv_get_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + int64_t sector_num, + int nb_sectors, int *pnum) { Coroutine *co; BdrvCoGetBlockStatusData data = { .bs = bs, + .base = base, .sector_num = sector_num, .nb_sectors = nb_sectors, .pnum = pnum, @@ -1561,11 +1587,11 @@ int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, if (qemu_in_coroutine()) { /* Fast-path if already in coroutine context */ - bdrv_get_block_status_co_entry(&data); + bdrv_get_block_status_above_co_entry(&data); } else { AioContext *aio_context = bdrv_get_aio_context(bs); - co = qemu_coroutine_create(bdrv_get_block_status_co_entry); + co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry); qemu_coroutine_enter(co, &data); while (!data.done) { aio_poll(aio_context, true); @@ -1574,6 +1600,14 @@ int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, return data.ret; } +int64_t bdrv_get_block_status(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, int *pnum) +{ + return bdrv_get_block_status_above(bs, bs->backing_hd, + sector_num, nb_sectors, pnum); +} + int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { diff --git a/include/block/block.h b/include/block/block.h index 07bb724..06e4137 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -372,6 +372,10 @@ bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs); bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); +int64_t bdrv_get_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + int64_t sector_num, + int nb_sectors, int *pnum); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, -- cgit v1.1 From 0fc9f8ea2800b76eaea20a8a3a91fbeeb4bfa81b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 8 Jun 2015 13:56:08 +0800 Subject: qmp: Add optional bool "unmap" to drive-mirror If specified as "true", it allows discarding on target sectors where source is not allocated. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/mirror.c | 8 ++++++-- blockdev.c | 5 +++++ hmp.c | 2 +- include/block/block_int.h | 2 ++ qapi/block-core.json | 8 +++++++- qmp-commands.hx | 3 +++ 6 files changed, 24 insertions(+), 4 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 49aa8bc..4be06a5 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -58,6 +58,7 @@ typedef struct MirrorBlockJob { int in_flight; int sectors_in_flight; int ret; + bool unmap; } MirrorBlockJob; typedef struct MirrorOp { @@ -652,6 +653,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, int64_t buf_size, BlockdevOnError on_source_error, BlockdevOnError on_target_error, + bool unmap, BlockCompletionFunc *cb, void *opaque, Error **errp, const BlockJobDriver *driver, @@ -686,6 +688,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, s->base = base; s->granularity = granularity; s->buf_size = MAX(buf_size, granularity); + s->unmap = unmap; s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); if (!s->dirty_bitmap) { @@ -704,6 +707,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, int64_t speed, uint32_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, + bool unmap, BlockCompletionFunc *cb, void *opaque, Error **errp) { @@ -718,7 +722,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL; mirror_start_job(bs, target, replaces, speed, granularity, buf_size, - on_source_error, on_target_error, cb, opaque, errp, + on_source_error, on_target_error, unmap, cb, opaque, errp, &mirror_job_driver, is_none_mode, base); } @@ -766,7 +770,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, bdrv_ref(base); mirror_start_job(bs, base, NULL, speed, 0, 0, - on_error, on_error, cb, opaque, &local_err, + on_error, on_error, false, cb, opaque, &local_err, &commit_active_job_driver, false, base); if (local_err) { error_propagate(errp, local_err); diff --git a/blockdev.c b/blockdev.c index 4d5e016..7fee519 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2655,6 +2655,7 @@ void qmp_drive_mirror(const char *device, const char *target, bool has_buf_size, int64_t buf_size, bool has_on_source_error, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, + bool has_unmap, bool unmap, Error **errp) { BlockBackend *blk; @@ -2686,6 +2687,9 @@ void qmp_drive_mirror(const char *device, const char *target, if (!has_buf_size) { buf_size = DEFAULT_MIRROR_BUF_SIZE; } + if (!has_unmap) { + unmap = true; + } if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", @@ -2827,6 +2831,7 @@ void qmp_drive_mirror(const char *device, const char *target, has_replaces ? replaces : NULL, speed, granularity, buf_size, sync, on_source_error, on_target_error, + unmap, block_job_cb, bs, &local_err); if (local_err != NULL) { bdrv_unref(target_bs); diff --git a/hmp.c b/hmp.c index 070aaf8..dcc66f1 100644 --- a/hmp.c +++ b/hmp.c @@ -1061,7 +1061,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict) false, NULL, false, NULL, full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, true, mode, false, 0, false, 0, false, 0, - false, 0, false, 0, &err); + false, 0, false, 0, false, true, &err); hmp_handle_error(mon, &err); } diff --git a/include/block/block_int.h b/include/block/block_int.h index 44a5cf7..83ed1a5 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -612,6 +612,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, * @mode: Whether to collapse all images in the chain to the target. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. + * @unmap: Whether to unmap target where source sectors only contain zeroes. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. * @errp: Error object. @@ -626,6 +627,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, int64_t speed, uint32_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, + bool unmap, BlockCompletionFunc *cb, void *opaque, Error **errp); diff --git a/qapi/block-core.json b/qapi/block-core.json index ed3f890..7b2efb8 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -975,6 +975,11 @@ # @on-target-error: #optional the action to take on an error on the target, # default 'report' (no limitations, since this applies to # a different block device than @device). +# @unmap: #optional Whether to try to unmap target sectors where source has +# only zero. If true, and target unallocated sectors will read as zero, +# target image sectors will be unmapped; otherwise, zeroes will be +# written. Both will result in identical contents. +# Default is true. (Since 2.4) # # Returns: nothing on success # If @device is not a valid block device, DeviceNotFound @@ -987,7 +992,8 @@ 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', '*speed': 'int', '*granularity': 'uint32', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', - '*on-target-error': 'BlockdevOnError' } } + '*on-target-error': 'BlockdevOnError', + '*unmap': 'bool' } } ## # @BlockDirtyBitmap diff --git a/qmp-commands.hx b/qmp-commands.hx index 87fa172..e1bcc60 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1503,6 +1503,7 @@ EQMP .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," "node-name:s?,replaces:s?," "on-source-error:s?,on-target-error:s?," + "unmap:b?," "granularity:i?,buf-size:i?", .mhandler.cmd_new = qmp_marshal_input_drive_mirror, }, @@ -1542,6 +1543,8 @@ Arguments: (BlockdevOnError, default 'report') - "on-target-error": the action to take on an error on the target (BlockdevOnError, default 'report') +- "unmap": whether the target sectors should be discarded where source has only + zeroes. (json-bool, optional, default true) The default value of the granularity is the image cluster size clamped between 4096 and 65536, if the image format defines one. If the format -- cgit v1.1 From dcfb3beb5130694b76b57de109619fcbf9c7e5b5 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 8 Jun 2015 13:56:09 +0800 Subject: mirror: Do zero write on target if sectors not allocated If guest discards a source cluster, mirroring with bdrv_aio_readv is overkill. Some protocols do zero upon discard, where it's best to use bdrv_aio_write_zeroes, otherwise, bdrv_aio_discard will be enough. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/mirror.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 4be06a5..8888cea 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -165,6 +165,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector; uint64_t delay_ns = 0; MirrorOp *op; + int pnum; + int64_t ret; s->sector_num = hbitmap_iter_next(&s->hbi); if (s->sector_num < 0) { @@ -291,8 +293,22 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) s->in_flight++; s->sectors_in_flight += nb_sectors; trace_mirror_one_iteration(s, sector_num, nb_sectors); - bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, - mirror_read_complete, op); + + ret = bdrv_get_block_status_above(source, NULL, sector_num, + nb_sectors, &pnum); + if (ret < 0 || pnum < nb_sectors || + (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) { + bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, + mirror_read_complete, op); + } else if (ret & BDRV_BLOCK_ZERO) { + bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors, + s->unmap ? BDRV_REQ_MAY_UNMAP : 0, + mirror_write_complete, op); + } else { + assert(!(ret & BDRV_BLOCK_DATA)); + bdrv_aio_discard(s->target, sector_num, op->nb_sectors, + mirror_write_complete, op); + } return delay_ns; } -- cgit v1.1 From 508249952c0ea7472c62e17bf8132295dab4912d Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 8 Jun 2015 13:56:10 +0800 Subject: block: Fix dirty bitmap in bdrv_co_discard Unsetting dirty globally with discard is not very correct. The discard may zero out sectors (depending on can_write_zeroes_with_unmap), we should replicate this change to destination side to make sure that the guest sees the same data. Calling bdrv_reset_dirty also troubles mirror job because the hbitmap iterator doesn't expect unsetting of bits after current position. So let's do it the opposite way which fixes both problems: set the dirty bits if we are to discard it. Reported-by: wangxiaolong@ucloud.cn Signed-off-by: Fam Zheng Reviewed-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- block/io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/io.c b/block/io.c index ccf79c3..ad31822 100644 --- a/block/io.c +++ b/block/io.c @@ -2412,8 +2412,6 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, return -EPERM; } - bdrv_reset_dirty(bs, sector_num, nb_sectors); - /* Do nothing if disabled. */ if (!(bs->open_flags & BDRV_O_UNMAP)) { return 0; @@ -2423,6 +2421,8 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, return 0; } + bdrv_set_dirty(bs, sector_num, nb_sectors); + max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS); while (nb_sectors > 0) { int ret; -- cgit v1.1 From 6e82e4bce127654b2dd42ef393587775be792334 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 8 Jun 2015 13:56:11 +0800 Subject: block: Remove bdrv_reset_dirty Using this function would always be wrong because a dirty bitmap must have a specific owner that consumes the dirty bits and calls bdrv_reset_dirty_bitmap(). Remove the unused function to avoid future misuse. Reviewed-by: Eric Blake Signed-off-by: Fam Zheng Reviewed-by: John Snow Signed-off-by: Stefan Hajnoczi --- block.c | 12 ------------ include/block/block_int.h | 2 -- 2 files changed, 14 deletions(-) diff --git a/block.c b/block.c index 81233be..7e130cc 100644 --- a/block.c +++ b/block.c @@ -3528,18 +3528,6 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, } } -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors) -{ - BdrvDirtyBitmap *bitmap; - QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { - if (!bdrv_dirty_bitmap_enabled(bitmap)) { - continue; - } - hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); - } -} - /** * Advance an HBitmapIter to an arbitrary offset. */ diff --git a/include/block/block_int.h b/include/block/block_int.h index 83ed1a5..8996baf 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -662,7 +662,5 @@ bool blk_dev_is_medium_locked(BlockBackend *blk); void blk_dev_resize_cb(BlockBackend *blk); void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors); #endif /* BLOCK_INT_H */ -- cgit v1.1 From 866323f39d5c7bb053f5e5bf753908ad9f5abec7 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 8 Jun 2015 13:56:12 +0800 Subject: qemu-iotests: Make block job methods common Signed-off-by: Fam Zheng Reviewed-by: John Snow Reviewed-by: Paolo Bonzini Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/041 | 66 ++++++++++--------------------------------- tests/qemu-iotests/iotests.py | 28 ++++++++++++++++++ 2 files changed, 43 insertions(+), 51 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 59a8f73..3d46ed7 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -34,38 +34,8 @@ quorum_img3 = os.path.join(iotests.test_dir, 'quorum3.img') quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img') quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img') -class ImageMirroringTestCase(iotests.QMPTestCase): - '''Abstract base class for image mirroring test cases''' - def wait_ready(self, drive='drive0'): - '''Wait until a block job BLOCK_JOB_READY event''' - ready = False - while not ready: - for event in self.vm.get_qmp_events(wait=True): - if event['event'] == 'BLOCK_JOB_READY': - self.assert_qmp(event, 'data/type', 'mirror') - self.assert_qmp(event, 'data/device', drive) - ready = True - - def wait_ready_and_cancel(self, drive='drive0'): - self.wait_ready(drive=drive) - event = self.cancel_and_wait(drive=drive) - self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED') - self.assert_qmp(event, 'data/type', 'mirror') - self.assert_qmp(event, 'data/offset', event['data']['len']) - - def complete_and_wait(self, drive='drive0', wait_ready=True): - '''Complete a block job and wait for it to finish''' - if wait_ready: - self.wait_ready(drive=drive) - - result = self.vm.qmp('block-job-complete', device=drive) - self.assert_qmp(result, 'return', {}) - - event = self.wait_until_completed(drive=drive) - self.assert_qmp(event, 'data/type', 'mirror') - -class TestSingleDrive(ImageMirroringTestCase): +class TestSingleDrive(iotests.QMPTestCase): image_len = 1 * 1024 * 1024 # MB def setUp(self): @@ -221,17 +191,9 @@ class TestSingleDriveUnalignedLength(TestSingleDrive): test_small_buffer2 = None test_large_cluster = None -class TestMirrorNoBacking(ImageMirroringTestCase): +class TestMirrorNoBacking(iotests.QMPTestCase): image_len = 2 * 1024 * 1024 # MB - def complete_and_wait(self, drive='drive0', wait_ready=True): - iotests.create_image(target_backing_img, TestMirrorNoBacking.image_len) - return ImageMirroringTestCase.complete_and_wait(self, drive, wait_ready) - - def compare_images(self, img1, img2): - iotests.create_image(target_backing_img, TestMirrorNoBacking.image_len) - return iotests.compare_images(img1, img2) - def setUp(self): iotests.create_image(backing_img, TestMirrorNoBacking.image_len) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) @@ -242,7 +204,10 @@ class TestMirrorNoBacking(ImageMirroringTestCase): self.vm.shutdown() os.remove(test_img) os.remove(backing_img) - os.remove(target_backing_img) + try: + os.remove(target_backing_img) + except: + pass os.remove(target_img) def test_complete(self): @@ -257,7 +222,7 @@ class TestMirrorNoBacking(ImageMirroringTestCase): result = self.vm.qmp('query-block') self.assert_qmp(result, 'return[0]/inserted/file', target_img) self.vm.shutdown() - self.assertTrue(self.compare_images(test_img, target_img), + self.assertTrue(iotests.compare_images(test_img, target_img), 'target image does not match source after mirroring') def test_cancel(self): @@ -272,7 +237,7 @@ class TestMirrorNoBacking(ImageMirroringTestCase): result = self.vm.qmp('query-block') self.assert_qmp(result, 'return[0]/inserted/file', test_img) self.vm.shutdown() - self.assertTrue(self.compare_images(test_img, target_img), + self.assertTrue(iotests.compare_images(test_img, target_img), 'target image does not match source after mirroring') def test_large_cluster(self): @@ -283,7 +248,6 @@ class TestMirrorNoBacking(ImageMirroringTestCase): %(TestMirrorNoBacking.image_len), target_backing_img) qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s' % (TestMirrorNoBacking.image_len, target_backing_img), target_img) - os.remove(target_backing_img) result = self.vm.qmp('drive-mirror', device='drive0', sync='full', mode='existing', target=target_img) @@ -293,10 +257,10 @@ class TestMirrorNoBacking(ImageMirroringTestCase): result = self.vm.qmp('query-block') self.assert_qmp(result, 'return[0]/inserted/file', target_img) self.vm.shutdown() - self.assertTrue(self.compare_images(test_img, target_img), + self.assertTrue(iotests.compare_images(test_img, target_img), 'target image does not match source after mirroring') -class TestMirrorResized(ImageMirroringTestCase): +class TestMirrorResized(iotests.QMPTestCase): backing_len = 1 * 1024 * 1024 # MB image_len = 2 * 1024 * 1024 # MB @@ -344,7 +308,7 @@ class TestMirrorResized(ImageMirroringTestCase): self.assertTrue(iotests.compare_images(test_img, target_img), 'target image does not match source after mirroring') -class TestReadErrors(ImageMirroringTestCase): +class TestReadErrors(iotests.QMPTestCase): image_len = 2 * 1024 * 1024 # MB # this should be a multiple of twice the default granularity @@ -498,7 +462,7 @@ new_state = "1" self.assert_no_active_block_jobs() self.vm.shutdown() -class TestWriteErrors(ImageMirroringTestCase): +class TestWriteErrors(iotests.QMPTestCase): image_len = 2 * 1024 * 1024 # MB # this should be a multiple of twice the default granularity @@ -624,7 +588,7 @@ new_state = "1" self.assert_no_active_block_jobs() self.vm.shutdown() -class TestSetSpeed(ImageMirroringTestCase): +class TestSetSpeed(iotests.QMPTestCase): image_len = 80 * 1024 * 1024 # MB def setUp(self): @@ -690,7 +654,7 @@ class TestSetSpeed(ImageMirroringTestCase): self.wait_ready_and_cancel() -class TestUnbackedSource(ImageMirroringTestCase): +class TestUnbackedSource(iotests.QMPTestCase): image_len = 2 * 1024 * 1024 # MB def setUp(self): @@ -731,7 +695,7 @@ class TestUnbackedSource(ImageMirroringTestCase): self.complete_and_wait() self.assert_no_active_block_jobs() -class TestRepairQuorum(ImageMirroringTestCase): +class TestRepairQuorum(iotests.QMPTestCase): """ This class test quorum file repair using drive-mirror. It's mostly a fork of TestSingleDrive """ image_len = 1 * 1024 * 1024 # MB diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 04a294d..63de726 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -326,6 +326,34 @@ class QMPTestCase(unittest.TestCase): self.assert_no_active_block_jobs() return event + def wait_ready(self, drive='drive0'): + '''Wait until a block job BLOCK_JOB_READY event''' + ready = False + while not ready: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_READY': + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', drive) + ready = True + + def wait_ready_and_cancel(self, drive='drive0'): + self.wait_ready(drive=drive) + event = self.cancel_and_wait(drive=drive) + self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED') + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/offset', event['data']['len']) + + def complete_and_wait(self, drive='drive0', wait_ready=True): + '''Complete a block job and wait for it to finish''' + if wait_ready: + self.wait_ready(drive=drive) + + result = self.vm.qmp('block-job-complete', device=drive) + self.assert_qmp(result, 'return', {}) + + event = self.wait_until_completed(drive=drive) + self.assert_qmp(event, 'data/type', 'mirror') + def notrun(reason): '''Skip this test suite''' # Each test in qemu-iotests has a number ("seq") -- cgit v1.1 From c615091793f53ff33b8f6c1b1ba711cf7c93e97b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 8 Jun 2015 13:56:13 +0800 Subject: qemu-iotests: Add test case for mirror with unmap This checks that the discard on mirror source that effectively zeroes data is also reflected by the data of target. Signed-off-by: Fam Zheng Reviewed-by: John Snow Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/132 | 59 ++++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/132.out | 5 ++++ tests/qemu-iotests/group | 1 + 3 files changed, 65 insertions(+) create mode 100644 tests/qemu-iotests/132 create mode 100644 tests/qemu-iotests/132.out diff --git a/tests/qemu-iotests/132 b/tests/qemu-iotests/132 new file mode 100644 index 0000000..f53ef6e --- /dev/null +++ b/tests/qemu-iotests/132 @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# +# Test mirror with unmap +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import time +import os +import iotests +from iotests import qemu_img, qemu_io + +test_img = os.path.join(iotests.test_dir, 'test.img') +target_img = os.path.join(iotests.test_dir, 'target.img') + +class TestSingleDrive(iotests.QMPTestCase): + image_len = 2 * 1024 * 1024 # MB + + def setUp(self): + # Write data to the image so we can compare later + qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSingleDrive.image_len)) + qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x5d 0 2M', test_img) + + self.vm = iotests.VM().add_drive(test_img, 'discard=unmap') + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + try: + os.remove(target_img) + except OSError: + pass + + def test_mirror_discard(self): + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + self.vm.hmp_qemu_io('drive0', 'discard 0 64k') + self.complete_and_wait('drive0') + self.vm.shutdown() + self.assertTrue(iotests.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + +if __name__ == '__main__': + iotests.main(supported_fmts=['raw', 'qcow2']) diff --git a/tests/qemu-iotests/132.out b/tests/qemu-iotests/132.out new file mode 100644 index 0000000..ae1213e --- /dev/null +++ b/tests/qemu-iotests/132.out @@ -0,0 +1,5 @@ +. +---------------------------------------------------------------------- +Ran 1 tests + +OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 4597fc1..6206765 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -131,4 +131,5 @@ 129 rw auto quick 130 rw auto quick 131 rw auto quick +132 rw auto quick 134 rw auto quick -- cgit v1.1 From d7b25297920d18fa2a2cde1ed21fde38a88c935f Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 8 Jun 2015 13:56:14 +0800 Subject: iotests: Use event_wait in wait_ready Only poll the specific type of event we are interested in, to avoid stealing events that should be consumed by someone else. Suggested-by: John Snow Signed-off-by: Fam Zheng Reviewed-by: John Snow Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/iotests.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 63de726..8615b10 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -328,13 +328,8 @@ class QMPTestCase(unittest.TestCase): def wait_ready(self, drive='drive0'): '''Wait until a block job BLOCK_JOB_READY event''' - ready = False - while not ready: - for event in self.vm.get_qmp_events(wait=True): - if event['event'] == 'BLOCK_JOB_READY': - self.assert_qmp(event, 'data/type', 'mirror') - self.assert_qmp(event, 'data/device', drive) - ready = True + f = {'data': {'type': 'mirror', 'device': drive } } + event = self.vm.event_wait(name='BLOCK_JOB_READY', match=f) def wait_ready_and_cancel(self, drive='drive0'): self.wait_ready(drive=drive) -- cgit v1.1 From 9049736ec70fdc886ac0df521fdd8b2886b2cb63 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Fri, 26 Jun 2015 12:18:01 +0200 Subject: block/iscsi: restore compatiblity with libiscsi 1.9.0 RHEL7 and others are stuck with libiscsi 1.9.0 since there unfortunately was an ABI breakage after that release. Signed-off-by: Peter Lieven Reviewed-by: Paolo Bonzini Message-id: 1435313881-19366-1-git-send-email-pl@kamp.de Signed-off-by: Stefan Hajnoczi --- block/iscsi.c | 32 +++++++++++++++++++++++++++----- configure | 6 +++--- qemu-options.hx | 3 ++- 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index a6b8fe2..5002916 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -169,6 +169,19 @@ static inline unsigned exp_random(double mean) return -mean * log((double)rand() / RAND_MAX); } +/* SCSI_STATUS_TASK_SET_FULL and SCSI_STATUS_TIMEOUT were introduced + * in libiscsi 1.10.0 as part of an enum. The LIBISCSI_API_VERSION + * macro was introduced in 1.11.0. So use the API_VERSION macro as + * a hint that the macros are defined and define them ourselves + * otherwise to keep the required libiscsi version at 1.9.0 */ +#if !defined(LIBISCSI_API_VERSION) +#define QEMU_SCSI_STATUS_TASK_SET_FULL 0x28 +#define QEMU_SCSI_STATUS_TIMEOUT 0x0f000002 +#else +#define QEMU_SCSI_STATUS_TASK_SET_FULL SCSI_STATUS_TASK_SET_FULL +#define QEMU_SCSI_STATUS_TIMEOUT SCSI_STATUS_TIMEOUT +#endif + static void iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, void *command_data, void *opaque) @@ -189,11 +202,12 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, iTask->do_retry = 1; goto out; } - if (status == SCSI_STATUS_BUSY || status == SCSI_STATUS_TIMEOUT || - status == SCSI_STATUS_TASK_SET_FULL) { + if (status == SCSI_STATUS_BUSY || + status == QEMU_SCSI_STATUS_TIMEOUT || + status == QEMU_SCSI_STATUS_TASK_SET_FULL) { unsigned retry_time = exp_random(iscsi_retry_times[iTask->retries - 1]); - if (status == SCSI_STATUS_TIMEOUT) { + if (status == QEMU_SCSI_STATUS_TIMEOUT) { /* make sure the request is rescheduled AFTER the * reconnect is initiated */ retry_time = EVENT_INTERVAL * 2; @@ -1355,7 +1369,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, QemuOpts *opts; Error *local_err = NULL; const char *filename; - int i, ret = 0; + int i, ret = 0, timeout = 0; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -1425,7 +1439,15 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, goto out; } - iscsi_set_timeout(iscsi, parse_timeout(iscsi_url->target)); + /* timeout handling is broken in libiscsi before 1.15.0 */ + timeout = parse_timeout(iscsi_url->target); +#if defined(LIBISCSI_API_VERSION) && LIBISCSI_API_VERSION >= 20150621 + iscsi_set_timeout(iscsi, timeout); +#else + if (timeout) { + error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0"); + } +#endif if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) { error_setg(errp, "iSCSI: Failed to connect to LUN : %s", diff --git a/configure b/configure index 08f9a22..3063739 100755 --- a/configure +++ b/configure @@ -3618,15 +3618,15 @@ if compile_prog "" "" ; then fi ########################################## -# Do we have libiscsi >= 1.10.0 +# Do we have libiscsi >= 1.9.0 if test "$libiscsi" != "no" ; then - if $pkg_config --atleast-version=1.10.0 libiscsi; then + if $pkg_config --atleast-version=1.9.0 libiscsi; then libiscsi="yes" libiscsi_cflags=$($pkg_config --cflags libiscsi) libiscsi_libs=$($pkg_config --libs libiscsi) else if test "$libiscsi" = "yes" ; then - feature_not_found "libiscsi" "Install libiscsi >= 1.10.0" + feature_not_found "libiscsi" "Install libiscsi >= 1.9.0" fi libiscsi="no" fi diff --git a/qemu-options.hx b/qemu-options.hx index 0bdd59f..7b8efbf 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2296,7 +2296,8 @@ line or a configuration file. Since version Qemu 2.4 it is possible to specify a iSCSI request timeout to detect stalled requests and force a reestablishment of the session. The timeout -is specified in seconds. The default is 0 which means no timeout. +is specified in seconds. The default is 0 which means no timeout. Libiscsi +1.15.0 or greater is required for this feature. Example (without authentication): @example -- cgit v1.1 From 29c838cdc96c4d117f00c75bbcb941e1be9590fb Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Fri, 26 Jun 2015 13:14:01 +0200 Subject: block/nfs: limit maximum readahead size to 1MB a malicious caller could otherwise specify a very large value via the URI and force libnfs to allocate a large amount of memory for the readahead buffer. Cc: qemu-stable@nongnu.org Signed-off-by: Peter Lieven Message-id: 1435317241-25585-1-git-send-email-pl@kamp.de Signed-off-by: Stefan Hajnoczi --- block/nfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/nfs.c b/block/nfs.c index ca9e24e..c026ff6 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -35,6 +35,8 @@ #include "sysemu/sysemu.h" #include +#define QEMU_NFS_MAX_READAHEAD_SIZE 1048576 + typedef struct NFSClient { struct nfs_context *context; struct nfsfh *fh; @@ -327,6 +329,11 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename, nfs_set_tcp_syncnt(client->context, val); #ifdef LIBNFS_FEATURE_READAHEAD } else if (!strcmp(qp->p[i].name, "readahead")) { + if (val > QEMU_NFS_MAX_READAHEAD_SIZE) { + error_report("NFS Warning: Truncating NFS readahead" + " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE); + val = QEMU_NFS_MAX_READAHEAD_SIZE; + } nfs_set_readahead(client->context, val); #endif } else { -- cgit v1.1 From 764ba3ae511adddfa750db290ac8375d660ca5b9 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Mon, 29 Jun 2015 16:12:13 +0300 Subject: block: remove redundant check before g_slist_find() An empty GSList is represented by a NULL pointer, therefore it's a perfectly valid argument for g_slist_find() and there's no need to make any additional check. Signed-off-by: Alberto Garcia Message-id: 1435583533-5758-1-git-send-email-berto@igalia.com Signed-off-by: Stefan Hajnoczi --- block/io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/io.c b/block/io.c index ad31822..305e0d9 100644 --- a/block/io.c +++ b/block/io.c @@ -283,7 +283,7 @@ void bdrv_drain_all(void) } aio_context_release(aio_context); - if (!aio_ctxs || !g_slist_find(aio_ctxs, aio_context)) { + if (!g_slist_find(aio_ctxs, aio_context)) { aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); } } -- cgit v1.1