aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJohn Levon <john.levon@nutanix.com>2022-05-30 09:41:32 +0100
committerGitHub <noreply@github.com>2022-05-30 09:41:32 +0100
commite036ac145acea1a5aa77879e978ac2fff909a657 (patch)
tree1f0837b4c79feb97aa642d4e505e3d64012896d7 /lib
parent79e83e482d4eb0b7a07cfa207506d33edf05d04b (diff)
downloadlibvfio-user-e036ac145acea1a5aa77879e978ac2fff909a657.zip
libvfio-user-e036ac145acea1a5aa77879e978ac2fff909a657.tar.gz
libvfio-user-e036ac145acea1a5aa77879e978ac2fff909a657.tar.bz2
allow concurrent dirty bitmap get (#677)
Use atomic operations to allow concurrent bitmap updates with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP operations. Dirtying clients can race against each other, so we must use atomic or when marking dirty: we do this byte-by-byte. When reading the dirty bitmap, we must be careful to not race and lose any set bits within the same byte. If we miss an update, we'll catch it the next time around, presuming that before the final pass we'll have quiesced all I/O. Signed-off-by: John Levon <john.levon@nutanix.com> Reviewed-by: Raphael Norwitz <raphael.norwitz@nutanix.com> Reviewed-by: Thanos Makatos <thanos.makatos@nutanix.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/dma.c34
-rw-r--r--lib/dma.h57
-rw-r--r--lib/libvfio-user.c11
3 files changed, 87 insertions, 15 deletions
diff --git a/lib/dma.c b/lib/dma.c
index 5ca897f..ac3ddfe 100644
--- a/lib/dma.c
+++ b/lib/dma.c
@@ -31,7 +31,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
-#include <stdio.h>
#include <sys/param.h>
#include <stddef.h>
@@ -281,7 +280,8 @@ dirty_page_logging_start_on_region(dma_memory_region_t *region, size_t pgsize)
if (size < 0) {
return size;
}
- region->dirty_bitmap = calloc(size, sizeof(char));
+
+ region->dirty_bitmap = calloc(size, 1);
if (region->dirty_bitmap == NULL) {
return ERROR_INT(errno);
}
@@ -553,10 +553,11 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr,
uint64_t len, size_t pgsize, size_t size,
char *bitmap)
{
- int ret;
+ dma_memory_region_t *region;
ssize_t bitmap_size;
dma_sg_t sg;
- dma_memory_region_t *region;
+ size_t i;
+ int ret;
assert(dma != NULL);
assert(bitmap != NULL);
@@ -599,11 +600,32 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr,
return ERROR_INT(EINVAL);
}
- memcpy(bitmap, region->dirty_bitmap, size);
+ for (i = 0; i < (size_t)bitmap_size; i++) {
+ uint8_t val = region->dirty_bitmap[i];
+ uint8_t *outp = (uint8_t *)&bitmap[i];
+
+ /*
+ * If no bits are dirty, avoid the atomic exchange. This is obviously
+ * racy, but it's OK: if we miss a dirty bit being set, we'll catch it
+ * the next time around.
+ *
+ * Otherwise, atomically exchange the dirty bits with zero: as we use
+ * atomic or in _dma_mark_dirty(), this cannot lose set bits - we might
+ * miss a bit being set after, but again, we'll catch that next time
+ * around.
+ */
+ if (val == 0) {
+ *outp = 0;
+ } else {
+ uint8_t zero = 0;
+ __atomic_exchange(&region->dirty_bitmap[i], &zero,
+ outp, __ATOMIC_SEQ_CST);
+ }
+ }
+
#ifdef DEBUG
log_dirty_bitmap(dma->vfu_ctx, region, bitmap, size);
#endif
- memset(region->dirty_bitmap, 0, size);
return 0;
}
diff --git a/lib/dma.h b/lib/dma.h
index 3fdbd65..089f973 100644
--- a/lib/dma.h
+++ b/lib/dma.h
@@ -58,6 +58,7 @@
* effectively a no-op.
*/
+#include <stdio.h>
#ifdef DMA_MAP_PROTECTED
#undef DMA_MAP_FAST
#define DMA_MAP_FAST_IMPL 0
@@ -95,7 +96,7 @@ typedef struct {
vfu_dma_info_t info;
int fd; // File descriptor to mmap
off_t offset; // File offset
- char *dirty_bitmap; // Dirty page bitmap
+ uint8_t *dirty_bitmap; // Dirty page bitmap
} dma_memory_region_t;
typedef struct dma_controller {
@@ -140,22 +141,64 @@ _dma_addr_sg_split(const dma_controller_t *dma,
vfu_dma_addr_t dma_addr, uint64_t len,
dma_sg_t *sg, int max_nr_sgs, int prot);
-static void
+/* Convert a start address and length to its containing page numbers. */
+static inline void
+range_to_pages(size_t start, size_t len, size_t pgsize,
+ size_t *pgstart, size_t *pgend)
+{
+ *pgstart = start / pgsize;
+ *pgend = ROUND_UP(start + len, pgsize) / pgsize;
+}
+
+/* Given a bit position, return the containing byte. */
+static inline size_t
+bit_to_u8(size_t val)
+{
+ return val / (CHAR_BIT);
+}
+
+/* Return a value modulo the bitsize of a uint8_t. */
+static inline size_t
+bit_to_u8off(size_t val)
+{
+ return val % (CHAR_BIT);
+}
+
+static inline void
_dma_mark_dirty(const dma_controller_t *dma, const dma_memory_region_t *region,
dma_sg_t *sg)
{
- size_t i, start, end;
+ size_t index;
+ size_t end;
+ size_t pgstart;
+ size_t pgend;
+ size_t i;
assert(dma != NULL);
assert(region != NULL);
assert(sg != NULL);
assert(region->dirty_bitmap != NULL);
- start = sg->offset / dma->dirty_pgsize;
- end = start + (sg->length / dma->dirty_pgsize) + (sg->length % dma->dirty_pgsize != 0) - 1;
+ range_to_pages(sg->offset, sg->length, dma->dirty_pgsize,
+ &pgstart, &pgend);
+
+ index = bit_to_u8(pgstart);
+ end = bit_to_u8(pgend) + !!(bit_to_u8off(pgend));
+
+ for (i = index; i < end; i++) {
+ uint8_t bm = ~0;
+
+ /* Mask off any pages in the first u8 that aren't in the range. */
+ if (i == index && bit_to_u8off(pgstart) != 0) {
+ bm &= ~((1 << bit_to_u8off(pgstart)) - 1);
+ }
+
+ /* Mask off any pages in the last u8 that aren't in the range. */
+ if (i == end - 1 && bit_to_u8off(pgend) != 0) {
+ bm &= ((1 << bit_to_u8off(pgend)) - 1);
+ }
- for (i = start; i <= end; i++) {
- region->dirty_bitmap[i / CHAR_BIT] |= 1 << (i % CHAR_BIT);
+ __atomic_or_fetch(&region->dirty_bitmap[i], bm, __ATOMIC_SEQ_CST);
}
}
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index 90c4b39..566ece0 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -1310,8 +1310,15 @@ command_needs_quiesce(vfu_ctx_t *vfu_ctx, const vfu_msg_t *msg)
case VFIO_USER_DEVICE_RESET:
return true;
- case VFIO_USER_DIRTY_PAGES:
- return true;
+ case VFIO_USER_DIRTY_PAGES: {
+ struct vfio_user_dirty_pages *dirty_pages = msg->in.iov.iov_base;
+
+ if (msg->in.iov.iov_len < sizeof(*dirty_pages)) {
+ return false;
+ }
+
+ return !(dirty_pages->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP);
+ }
case VFIO_USER_REGION_WRITE:
if (msg->in.iov.iov_len < sizeof(*reg)) {