diff options
author | John Levon <john.levon@nutanix.com> | 2022-05-30 09:41:32 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-30 09:41:32 +0100 |
commit | e036ac145acea1a5aa77879e978ac2fff909a657 (patch) | |
tree | 1f0837b4c79feb97aa642d4e505e3d64012896d7 /lib | |
parent | 79e83e482d4eb0b7a07cfa207506d33edf05d04b (diff) | |
download | libvfio-user-e036ac145acea1a5aa77879e978ac2fff909a657.zip libvfio-user-e036ac145acea1a5aa77879e978ac2fff909a657.tar.gz libvfio-user-e036ac145acea1a5aa77879e978ac2fff909a657.tar.bz2 |
allow concurrent dirty bitmap get (#677)
Use atomic operations to allow concurrent bitmap updates with
VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP operations.
Dirtying clients can race against each other, so we must use atomic or
when marking dirty: we do this byte-by-byte.
When reading the dirty bitmap, we must be careful to not race and lose
any set bits within the same byte. If we miss an update, we'll catch it
the next time around, presuming that before the final pass we'll have
quiesced all I/O.
Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
Reviewed-by: Thanos Makatos <thanos.makatos@nutanix.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/dma.c | 34 | ||||
-rw-r--r-- | lib/dma.h | 57 | ||||
-rw-r--r-- | lib/libvfio-user.c | 11 |
3 files changed, 87 insertions, 15 deletions
@@ -31,7 +31,6 @@ #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> -#include <stdio.h> #include <sys/param.h> #include <stddef.h> @@ -281,7 +280,8 @@ dirty_page_logging_start_on_region(dma_memory_region_t *region, size_t pgsize) if (size < 0) { return size; } - region->dirty_bitmap = calloc(size, sizeof(char)); + + region->dirty_bitmap = calloc(size, 1); if (region->dirty_bitmap == NULL) { return ERROR_INT(errno); } @@ -553,10 +553,11 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, uint64_t len, size_t pgsize, size_t size, char *bitmap) { - int ret; + dma_memory_region_t *region; ssize_t bitmap_size; dma_sg_t sg; - dma_memory_region_t *region; + size_t i; + int ret; assert(dma != NULL); assert(bitmap != NULL); @@ -599,11 +600,32 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, return ERROR_INT(EINVAL); } - memcpy(bitmap, region->dirty_bitmap, size); + for (i = 0; i < (size_t)bitmap_size; i++) { + uint8_t val = region->dirty_bitmap[i]; + uint8_t *outp = (uint8_t *)&bitmap[i]; + + /* + * If no bits are dirty, avoid the atomic exchange. This is obviously + * racy, but it's OK: if we miss a dirty bit being set, we'll catch it + * the next time around. + * + * Otherwise, atomically exchange the dirty bits with zero: as we use + * atomic or in _dma_mark_dirty(), this cannot lose set bits - we might + * miss a bit being set after, but again, we'll catch that next time + * around. + */ + if (val == 0) { + *outp = 0; + } else { + uint8_t zero = 0; + __atomic_exchange(®ion->dirty_bitmap[i], &zero, + outp, __ATOMIC_SEQ_CST); + } + } + #ifdef DEBUG log_dirty_bitmap(dma->vfu_ctx, region, bitmap, size); #endif - memset(region->dirty_bitmap, 0, size); return 0; } @@ -58,6 +58,7 @@ * effectively a no-op. */ +#include <stdio.h> #ifdef DMA_MAP_PROTECTED #undef DMA_MAP_FAST #define DMA_MAP_FAST_IMPL 0 @@ -95,7 +96,7 @@ typedef struct { vfu_dma_info_t info; int fd; // File descriptor to mmap off_t offset; // File offset - char *dirty_bitmap; // Dirty page bitmap + uint8_t *dirty_bitmap; // Dirty page bitmap } dma_memory_region_t; typedef struct dma_controller { @@ -140,22 +141,64 @@ _dma_addr_sg_split(const dma_controller_t *dma, vfu_dma_addr_t dma_addr, uint64_t len, dma_sg_t *sg, int max_nr_sgs, int prot); -static void +/* Convert a start address and length to its containing page numbers. */ +static inline void +range_to_pages(size_t start, size_t len, size_t pgsize, + size_t *pgstart, size_t *pgend) +{ + *pgstart = start / pgsize; + *pgend = ROUND_UP(start + len, pgsize) / pgsize; +} + +/* Given a bit position, return the containing byte. */ +static inline size_t +bit_to_u8(size_t val) +{ + return val / (CHAR_BIT); +} + +/* Return a value modulo the bitsize of a uint8_t. */ +static inline size_t +bit_to_u8off(size_t val) +{ + return val % (CHAR_BIT); +} + +static inline void _dma_mark_dirty(const dma_controller_t *dma, const dma_memory_region_t *region, dma_sg_t *sg) { - size_t i, start, end; + size_t index; + size_t end; + size_t pgstart; + size_t pgend; + size_t i; assert(dma != NULL); assert(region != NULL); assert(sg != NULL); assert(region->dirty_bitmap != NULL); - start = sg->offset / dma->dirty_pgsize; - end = start + (sg->length / dma->dirty_pgsize) + (sg->length % dma->dirty_pgsize != 0) - 1; + range_to_pages(sg->offset, sg->length, dma->dirty_pgsize, + &pgstart, &pgend); + + index = bit_to_u8(pgstart); + end = bit_to_u8(pgend) + !!(bit_to_u8off(pgend)); + + for (i = index; i < end; i++) { + uint8_t bm = ~0; + + /* Mask off any pages in the first u8 that aren't in the range. */ + if (i == index && bit_to_u8off(pgstart) != 0) { + bm &= ~((1 << bit_to_u8off(pgstart)) - 1); + } + + /* Mask off any pages in the last u8 that aren't in the range. */ + if (i == end - 1 && bit_to_u8off(pgend) != 0) { + bm &= ((1 << bit_to_u8off(pgend)) - 1); + } - for (i = start; i <= end; i++) { - region->dirty_bitmap[i / CHAR_BIT] |= 1 << (i % CHAR_BIT); + __atomic_or_fetch(®ion->dirty_bitmap[i], bm, __ATOMIC_SEQ_CST); } } diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 90c4b39..566ece0 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -1310,8 +1310,15 @@ command_needs_quiesce(vfu_ctx_t *vfu_ctx, const vfu_msg_t *msg) case VFIO_USER_DEVICE_RESET: return true; - case VFIO_USER_DIRTY_PAGES: - return true; + case VFIO_USER_DIRTY_PAGES: { + struct vfio_user_dirty_pages *dirty_pages = msg->in.iov.iov_base; + + if (msg->in.iov.iov_len < sizeof(*dirty_pages)) { + return false; + } + + return !(dirty_pages->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP); + } case VFIO_USER_REGION_WRITE: if (msg->in.iov.iov_len < sizeof(*reg)) { |