diff options
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | block/block-backend.c | 5 | ||||
-rw-r--r-- | block/io.c | 10 | ||||
-rw-r--r-- | block/mirror.c | 6 | ||||
-rw-r--r-- | docs/specs/parallels.txt | 228 | ||||
-rw-r--r-- | hw/block/virtio-blk.c | 28 | ||||
-rw-r--r-- | hw/sd/sdhci.c | 10 | ||||
-rw-r--r-- | include/block/block_int.h | 3 | ||||
-rw-r--r-- | include/hw/sd/sdhci.h | 1 | ||||
-rw-r--r-- | include/sysemu/block-backend.h | 1 | ||||
-rw-r--r-- | scripts/qemugdb/mtree.py | 10 |
11 files changed, 272 insertions, 31 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 55a0fd8..46bba68 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1490,6 +1490,7 @@ M: Denis V. Lunev <den@openvz.org> L: qemu-block@nongnu.org S: Supported F: block/parallels.c +F: docs/specs/parallels.txt qed M: Stefan Hajnoczi <stefanha@redhat.com> diff --git a/block/block-backend.c b/block/block-backend.c index 36ccc9e..f41d326 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1023,6 +1023,11 @@ int blk_get_max_transfer_length(BlockBackend *blk) } } +int blk_get_max_iov(BlockBackend *blk) +{ + return blk->bs->bl.max_iov; +} + void blk_set_guest_block_size(BlockBackend *blk, int align) { blk->guest_block_size = align; @@ -166,9 +166,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length; bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment; bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment; + bs->bl.max_iov = bs->file->bs->bl.max_iov; } else { bs->bl.min_mem_alignment = 512; bs->bl.opt_mem_alignment = getpagesize(); + + /* Safe default since most protocols use readv()/writev()/etc */ + bs->bl.max_iov = IOV_MAX; } if (bs->backing) { @@ -189,6 +193,9 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.min_mem_alignment = MAX(bs->bl.min_mem_alignment, bs->backing->bs->bl.min_mem_alignment); + bs->bl.max_iov = + MIN(bs->bl.max_iov, + bs->backing->bs->bl.max_iov); } /* Then let the driver override it */ @@ -1882,7 +1889,8 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, merge = 1; } - if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { + if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > + bs->bl.max_iov) { merge = 0; } diff --git a/block/mirror.c b/block/mirror.c index fc34a9c..f201f2b 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -161,13 +161,15 @@ static void mirror_read_complete(void *opaque, int ret) static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = s->common.bs; - int nb_sectors, sectors_per_chunk, nb_chunks; + int nb_sectors, sectors_per_chunk, nb_chunks, max_iov; int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector; uint64_t delay_ns = 0; MirrorOp *op; int pnum; int64_t ret; + max_iov = MIN(source->bl.max_iov, s->target->bl.max_iov); + s->sector_num = hbitmap_iter_next(&s->hbi); if (s->sector_num < 0) { bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); @@ -248,7 +250,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight); break; } - if (IOV_MAX < nb_chunks + added_chunks) { + if (max_iov < nb_chunks + added_chunks) { trace_mirror_break_iov_max(s, nb_chunks, added_chunks); break; } diff --git a/docs/specs/parallels.txt b/docs/specs/parallels.txt new file mode 100644 index 0000000..b4fe229 --- /dev/null +++ b/docs/specs/parallels.txt @@ -0,0 +1,228 @@ += License = + +Copyright (c) 2015 Denis Lunev +Copyright (c) 2015 Vladimir Sementsov-Ogievskiy + +This work is licensed under the terms of the GNU GPL, version 2 or later. +See the COPYING file in the top-level directory. + += Parallels Expandable Image File Format = + +A Parallels expandable image file consists of three consecutive parts: + * header + * BAT + * data area + +All numbers in a Parallels expandable image are stored in little-endian byte +order. + + +== Definitions == + + Sector A 512-byte data chunk. + + Cluster A data chunk of the size specified in the image header. + Currently, the default size is 1MiB (2048 sectors). In previous + versions, cluster sizes of 63 sectors, 256 and 252 kilobytes were + used. + + BAT Block Allocation Table, an entity that contains information for + guest-to-host I/O data address translation. + + +== Header == + +The header is placed at the start of an image and contains the following +fields: + +Bytes: + 0 - 15: magic + Must contain "WithoutFreeSpace" or "WithouFreSpacExt". + + 16 - 19: version + Must be 2. + + 20 - 23: heads + Disk geometry parameter for guest. + + 24 - 27: cylinders + Disk geometry parameter for guest. + + 28 - 31: tracks + Cluster size, in sectors. + + 32 - 35: nb_bat_entries + Disk size, in clusters (BAT size). + + 36 - 43: nb_sectors + Disk size, in sectors. + + For "WithoutFreeSpace" images: + Only the lowest 4 bytes are used. The highest 4 bytes must be + cleared in this case. + + For "WithouFreSpacExt" images, there are no such + restrictions. + + 44 - 47: in_use + Set to 0x746F6E59 when the image is opened by software in R/W + mode; set to 0x312e3276 when the image is closed. + + A zero in this field means that the image was opened by an old + version of the software that doesn't support Format Extension + (see below). + + Other values are not allowed. + + 48 - 51: data_off + An offset, in sectors, from the start of the file to the start of + the data area. + + For "WithoutFreeSpace" images: + - If data_off is zero, the offset is calculated as the end of BAT + table plus some padding to ensure sector size alignment. + - If data_off is non-zero, the offset should be aligned to sector + size. However it is recommended to align it to cluster size for + newly created images. + + For "WithouFreSpacExt" images: + data_off must be non-zero and aligned to cluster size. + + 52 - 55: flags + Miscellaneous flags. + + Bit 0: Empty Image bit. If set, the image should be + considered clear. + + Bits 2-31: Unused. + + 56 - 63: ext_off + Format Extension offset, an offset, in sectors, from the start of + the file to the start of the Format Extension Cluster. + + ext_off must meet the same requirements as cluster offsets + defined by BAT entries (see below). + + +== BAT == + +BAT is placed immediately after the image header. In the file, BAT is a +contiguous array of 32-bit unsigned little-endian integers with +(bat_entries * 4) bytes size. + +Each BAT entry contains an offset from the start of the file to the +corresponding cluster. The offset set in clusters for "WithouFreSpacExt" images +and in sectors for "WithoutFreeSpace" images. + +If a BAT entry is zero, the corresponding cluster is not allocated and should +be considered as filled with zeroes. + +Cluster offsets specified by BAT entries must meet the following requirements: + - the value must not be lower than data offset (provided by header.data_off + or calculated as specified above), + - the value must be lower than the desired file size, + - the value must be unique among all BAT entries, + - the result of (cluster offset - data offset) must be aligned to cluster + size. + + +== Data Area == + +The data area is an area from the data offset (provided by header.data_off or +calculated as specified above) to the end of the file. It represents a +contiguous array of clusters. Most of them are allocated by the BAT, some may +be allocated by the ext_off field in the header while other may be allocated by +extensions. All clusters allocated by ext_off and extensions should meet the +same requirements as clusters specified by BAT entries. + + +== Format Extension == + +The Format Extension is an area 1 cluster in size that provides additional +format features. This cluster is addressed by the ext_off field in the header. +The format of the Format Extension area is the following: + + 0 - 7: magic + Must be 0xAB234CEF23DCEA87 + + 8 - 23: m_CheckSum + The MD5 checksum of the entire Header Extension cluster except + the first 24 bytes. + + The above are followed by feature sections or "extensions". The last + extension must be "End of features" (see below). + +Each feature section has the following format: + + 0 - 7: magic + The identifier of the feature: + 0x0000000000000000 - End of features + 0x20385FAE252CB34A - Dirty bitmap + + 8 - 15: flags + External flags for extension: + + Bit 0: NECESSARY + If the software cannot load the extension (due to an + unknown magic number or error), the file should not be + changed. If this flag is unset and there is an error on + loading the extension, said extension should be dropped. + + Bit 1: TRANSIT + If there is an unknown extension with this flag set, + said extension should be left as is. + + If neither NECESSARY nor TRANSIT are set, the extension should be + dropped. + + 16 - 19: data_size + The size of the following feature data, in bytes. + + 20 - 23: unused32 + Align header to 8 bytes boundary. + + variable: data (data_size bytes) + + The above is followed by padding to the next 8 bytes boundary, then the + next extension starts. + + The last extension must be "End of features" with all the fields set to 0. + + +=== Dirty bitmaps feature === + +This feature provides a way of storing dirty bitmaps in the image. The fields +of its data area are: + + 0 - 7: size + The bitmap size, should be equal to disk size in sectors. + + 8 - 23: id + An identifier for backup consistency checking. + + 24 - 27: granularity + Bitmap granularity, in sectors. I.e., the number of sectors + corresponding to one bit of the bitmap. Granularity must be + a power of 2. + + 28 - 31: l1_size + The number of entries in the L1 table of the bitmap. + + variable: l1 (64 * l1_size bytes) + L1 offset table (in bytes) + +A dirty bitmap is stored using a one-level structure for the mapping to host +clusters - an L1 table. + +Given an offset in bytes into the bitmap data, the offset in bytes into the +image file can be obtained as follows: + + offset = l1_table[offset / cluster_size] + (offset % cluster_size) + +If an L1 table entry is 0, the corresponding cluster of the bitmap is assumed +to be zero. + +If an L1 table entry is 1, the corresponding cluster of the bitmap is assumed +to have all bits set. + +If an L1 table entry is not 0 or 1, it allocates a cluster from the data area. diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index b88b726..51f867b 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -407,24 +407,16 @@ void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb) for (i = 0; i < mrb->num_reqs; i++) { VirtIOBlockReq *req = mrb->reqs[i]; if (num_reqs > 0) { - bool merge = true; - - /* merge would exceed maximum number of IOVs */ - if (niov + req->qiov.niov > IOV_MAX) { - merge = false; - } - - /* merge would exceed maximum transfer length of backend device */ - if (req->qiov.size / BDRV_SECTOR_SIZE + nb_sectors > max_xfer_len) { - merge = false; - } - - /* requests are not sequential */ - if (sector_num + nb_sectors != req->sector_num) { - merge = false; - } - - if (!merge) { + /* + * NOTE: We cannot merge the requests in below situations: + * 1. requests are not sequential + * 2. merge would exceed maximum number of IOVs + * 3. merge would exceed maximum transfer length of backend device + */ + if (sector_num + nb_sectors != req->sector_num || + niov > blk_get_max_iov(blk) - req->qiov.niov || + req->qiov.size / BDRV_SECTOR_SIZE > max_xfer_len || + nb_sectors > max_xfer_len - req->qiov.size / BDRV_SECTOR_SIZE) { submit_requests(blk, mrb, start, num_reqs, niov); num_reqs = 0; } diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index 8612760..7acb4d7 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -193,7 +193,9 @@ static void sdhci_reset(SDHCIState *s) * initialization */ memset(&s->sdmasysad, 0, (uintptr_t)&s->capareg - (uintptr_t)&s->sdmasysad); - sd_set_cb(s->card, s->ro_cb, s->eject_cb); + if (!s->noeject_quirk) { + sd_set_cb(s->card, s->ro_cb, s->eject_cb); + } s->data_count = 0; s->stopped_state = sdhc_not_stopped; } @@ -243,9 +245,6 @@ static void sdhci_send_command(SDHCIState *s) (s->cmdreg & SDHC_CMD_RESPONSE) == SDHC_CMD_RSP_WITH_BUSY) { s->norintsts |= SDHC_NIS_TRSCMP; } - } else if (rlen != 0 && (s->errintstsen & SDHC_EISEN_CMDIDX)) { - s->errintsts |= SDHC_EIS_CMDIDX; - s->norintsts |= SDHC_NIS_ERR; } if (s->norintstsen & SDHC_NISEN_CMDCMP) { @@ -831,7 +830,7 @@ static void sdhci_data_transfer(void *opaque) static bool sdhci_can_issue_command(SDHCIState *s) { - if (!SDHC_CLOCK_IS_ON(s->clkcon) || !(s->pwrcon & SDHC_POWER_ON) || + if (!SDHC_CLOCK_IS_ON(s->clkcon) || (((s->prnsts & SDHC_DATA_INHIBIT) || s->stopped_state) && ((s->cmdreg & SDHC_CMD_DATA_PRESENT) || ((s->cmdreg & SDHC_CMD_RESPONSE) == SDHC_CMD_RSP_WITH_BUSY && @@ -1279,6 +1278,7 @@ static Property sdhci_sysbus_properties[] = { DEFINE_PROP_UINT32("capareg", SDHCIState, capareg, SDHC_CAPAB_REG_DEFAULT), DEFINE_PROP_UINT32("maxcurr", SDHCIState, maxcurr, 0), + DEFINE_PROP_BOOL("noeject-quirk", SDHCIState, noeject_quirk, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/block/block_int.h b/include/block/block_int.h index 9a1c466..256609d 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -330,6 +330,9 @@ typedef struct BlockLimits { /* memory alignment for bounce buffer */ size_t opt_mem_alignment; + + /* maximum number of iovec elements */ + int max_iov; } BlockLimits; typedef struct BdrvOpBlocker BdrvOpBlocker; diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h index e78d938..ffd1f80 100644 --- a/include/hw/sd/sdhci.h +++ b/include/hw/sd/sdhci.h @@ -77,6 +77,7 @@ typedef struct SDHCIState { uint32_t buf_maxsz; uint16_t data_count; /* current element in FIFO buffer */ uint8_t stopped_state;/* Current SDHC state */ + bool noeject_quirk;/* Quirk to disable card insert/remove interrupts */ /* Buffer Data Port Register - virtual access point to R and W buffers */ /* Software Reset Register - always reads as 0 */ /* Force Event Auto CMD12 Error Interrupt Reg - write only */ diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index fb068ea4..dc24476 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -146,6 +146,7 @@ void blk_lock_medium(BlockBackend *blk, bool locked); void blk_eject(BlockBackend *blk, bool eject_flag); int blk_get_flags(BlockBackend *blk); int blk_get_max_transfer_length(BlockBackend *blk); +int blk_get_max_iov(BlockBackend *blk); void blk_set_guest_block_size(BlockBackend *blk, int align); void *blk_blockalign(BlockBackend *blk, size_t size); bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp); diff --git a/scripts/qemugdb/mtree.py b/scripts/qemugdb/mtree.py index 06011c3..cc8131c 100644 --- a/scripts/qemugdb/mtree.py +++ b/scripts/qemugdb/mtree.py @@ -21,7 +21,7 @@ def isnull(ptr): return ptr == gdb.Value(0).cast(ptr.type) def int128(p): - return long(p['lo']) + (long(p['hi']) << 64) + return int(p['lo']) + (int(p['hi']) << 64) class MtreeCommand(gdb.Command): '''Display the memory tree hierarchy''' @@ -40,11 +40,11 @@ class MtreeCommand(gdb.Command): def process_queue(self): while self.queue: ptr = self.queue.pop(0) - if long(ptr) in self.seen: + if int(ptr) in self.seen: continue self.print_item(ptr) def print_item(self, ptr, offset = gdb.Value(0), level = 0): - self.seen.add(long(ptr)) + self.seen.add(int(ptr)) addr = ptr['addr'] addr += offset size = int128(ptr['size']) @@ -58,8 +58,8 @@ class MtreeCommand(gdb.Command): klass = ' (RAM)' gdb.write('%s%016x-%016x %s%s (@ %s)\n' % (' ' * level, - long(addr), - long(addr + (size - 1)), + int(addr), + int(addr + (size - 1)), ptr['name'].string(), klass, ptr, |