diff options
-rw-r--r-- | lib/dma.c | 34 | ||||
-rw-r--r-- | lib/dma.h | 57 | ||||
-rw-r--r-- | lib/libvfio-user.c | 11 | ||||
-rw-r--r-- | test/py/test_dirty_pages.py | 104 |
4 files changed, 161 insertions, 45 deletions
@@ -31,7 +31,6 @@ #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> -#include <stdio.h> #include <sys/param.h> #include <stddef.h> @@ -281,7 +280,8 @@ dirty_page_logging_start_on_region(dma_memory_region_t *region, size_t pgsize) if (size < 0) { return size; } - region->dirty_bitmap = calloc(size, sizeof(char)); + + region->dirty_bitmap = calloc(size, 1); if (region->dirty_bitmap == NULL) { return ERROR_INT(errno); } @@ -553,10 +553,11 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, uint64_t len, size_t pgsize, size_t size, char *bitmap) { - int ret; + dma_memory_region_t *region; ssize_t bitmap_size; dma_sg_t sg; - dma_memory_region_t *region; + size_t i; + int ret; assert(dma != NULL); assert(bitmap != NULL); @@ -599,11 +600,32 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, return ERROR_INT(EINVAL); } - memcpy(bitmap, region->dirty_bitmap, size); + for (i = 0; i < (size_t)bitmap_size; i++) { + uint8_t val = region->dirty_bitmap[i]; + uint8_t *outp = (uint8_t *)&bitmap[i]; + + /* + * If no bits are dirty, avoid the atomic exchange. This is obviously + * racy, but it's OK: if we miss a dirty bit being set, we'll catch it + * the next time around. + * + * Otherwise, atomically exchange the dirty bits with zero: as we use + * atomic or in _dma_mark_dirty(), this cannot lose set bits - we might + * miss a bit being set after, but again, we'll catch that next time + * around. + */ + if (val == 0) { + *outp = 0; + } else { + uint8_t zero = 0; + __atomic_exchange(®ion->dirty_bitmap[i], &zero, + outp, __ATOMIC_SEQ_CST); + } + } + #ifdef DEBUG log_dirty_bitmap(dma->vfu_ctx, region, bitmap, size); #endif - memset(region->dirty_bitmap, 0, size); return 0; } @@ -58,6 +58,7 @@ * effectively a no-op. */ +#include <stdio.h> #ifdef DMA_MAP_PROTECTED #undef DMA_MAP_FAST #define DMA_MAP_FAST_IMPL 0 @@ -95,7 +96,7 @@ typedef struct { vfu_dma_info_t info; int fd; // File descriptor to mmap off_t offset; // File offset - char *dirty_bitmap; // Dirty page bitmap + uint8_t *dirty_bitmap; // Dirty page bitmap } dma_memory_region_t; typedef struct dma_controller { @@ -140,22 +141,64 @@ _dma_addr_sg_split(const dma_controller_t *dma, vfu_dma_addr_t dma_addr, uint64_t len, dma_sg_t *sg, int max_nr_sgs, int prot); -static void +/* Convert a start address and length to its containing page numbers. */ +static inline void +range_to_pages(size_t start, size_t len, size_t pgsize, + size_t *pgstart, size_t *pgend) +{ + *pgstart = start / pgsize; + *pgend = ROUND_UP(start + len, pgsize) / pgsize; +} + +/* Given a bit position, return the containing byte. */ +static inline size_t +bit_to_u8(size_t val) +{ + return val / (CHAR_BIT); +} + +/* Return a value modulo the bitsize of a uint8_t. */ +static inline size_t +bit_to_u8off(size_t val) +{ + return val % (CHAR_BIT); +} + +static inline void _dma_mark_dirty(const dma_controller_t *dma, const dma_memory_region_t *region, dma_sg_t *sg) { - size_t i, start, end; + size_t index; + size_t end; + size_t pgstart; + size_t pgend; + size_t i; assert(dma != NULL); assert(region != NULL); assert(sg != NULL); assert(region->dirty_bitmap != NULL); - start = sg->offset / dma->dirty_pgsize; - end = start + (sg->length / dma->dirty_pgsize) + (sg->length % dma->dirty_pgsize != 0) - 1; + range_to_pages(sg->offset, sg->length, dma->dirty_pgsize, + &pgstart, &pgend); + + index = bit_to_u8(pgstart); + end = bit_to_u8(pgend) + !!(bit_to_u8off(pgend)); + + for (i = index; i < end; i++) { + uint8_t bm = ~0; + + /* Mask off any pages in the first u8 that aren't in the range. */ + if (i == index && bit_to_u8off(pgstart) != 0) { + bm &= ~((1 << bit_to_u8off(pgstart)) - 1); + } + + /* Mask off any pages in the last u8 that aren't in the range. */ + if (i == end - 1 && bit_to_u8off(pgend) != 0) { + bm &= ((1 << bit_to_u8off(pgend)) - 1); + } - for (i = start; i <= end; i++) { - region->dirty_bitmap[i / CHAR_BIT] |= 1 << (i % CHAR_BIT); + __atomic_or_fetch(®ion->dirty_bitmap[i], bm, __ATOMIC_SEQ_CST); } } diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 90c4b39..566ece0 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -1310,8 +1310,15 @@ command_needs_quiesce(vfu_ctx_t *vfu_ctx, const vfu_msg_t *msg) case VFIO_USER_DEVICE_RESET: return true; - case VFIO_USER_DIRTY_PAGES: - return true; + case VFIO_USER_DIRTY_PAGES: { + struct vfio_user_dirty_pages *dirty_pages = msg->in.iov.iov_base; + + if (msg->in.iov.iov_len < sizeof(*dirty_pages)) { + return false; + } + + return !(dirty_pages->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP); + } case VFIO_USER_REGION_WRITE: if (msg->in.iov.iov_len < sizeof(*reg)) { diff --git a/test/py/test_dirty_pages.py b/test/py/test_dirty_pages.py index 6cf87fb..8b4e3dc 100644 --- a/test/py/test_dirty_pages.py +++ b/test/py/test_dirty_pages.py @@ -89,13 +89,13 @@ def test_dirty_pages_setup(): payload = vfio_user_dma_map(argsz=len(vfio_user_dma_map()), flags=(VFIO_USER_F_DMA_REGION_READ | VFIO_USER_F_DMA_REGION_WRITE), - offset=0, addr=0x10000, size=0x10000) + offset=0, addr=0x10000, size=0x20000) msg(ctx, sock, VFIO_USER_DMA_MAP, payload, fds=[f.fileno()]) payload = vfio_user_dma_map(argsz=len(vfio_user_dma_map()), flags=(VFIO_USER_F_DMA_REGION_READ | VFIO_USER_F_DMA_REGION_WRITE), - offset=0, addr=0x20000, size=0x10000) + offset=0, addr=0x40000, size=0x10000) msg(ctx, sock, VFIO_USER_DMA_MAP, payload) @@ -247,7 +247,7 @@ def test_get_dirty_page_bitmap_unmapped(): dirty_pages = vfio_user_dirty_pages(argsz=argsz, flags=VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) bitmap = vfio_user_bitmap(pgsize=0x1000, size=8) - br = vfio_user_bitmap_range(iova=0x20000, size=0x10000, bitmap=bitmap) + br = vfio_user_bitmap_range(iova=0x40000, size=0x10000, bitmap=bitmap) payload = bytes(dirty_pages) + bytes(br) @@ -283,13 +283,7 @@ def test_dirty_pages_get_unmodified(): assert br.bitmap.size == 8 -def get_dirty_page_bitmap_result(resp): - _, resp = vfio_user_dirty_pages.pop_from_buffer(resp) - _, resp = vfio_user_bitmap_range.pop_from_buffer(resp) - return struct.unpack("Q", resp)[0] - - -def send_dirty_page_bitmap(busy=False): +def get_dirty_page_bitmap(): argsz = len(vfio_user_dirty_pages()) + len(vfio_user_bitmap_range()) + 8 dirty_pages = vfio_user_dirty_pages(argsz=argsz, @@ -299,18 +293,33 @@ def send_dirty_page_bitmap(busy=False): payload = bytes(dirty_pages) + bytes(br) - return msg(ctx, sock, VFIO_USER_DIRTY_PAGES, payload, busy=busy) + result = msg(ctx, sock, VFIO_USER_DIRTY_PAGES, payload) + + _, result = vfio_user_dirty_pages.pop_from_buffer(result) + _, result = vfio_user_bitmap_range.pop_from_buffer(result) + assert(len(result) == 8) -def get_dirty_page_bitmap(): - result = send_dirty_page_bitmap() - return get_dirty_page_bitmap_result(result) + return struct.unpack("Q", result)[0] sg3 = None iovec3 = None +def write_to_addr(ctx, addr, size, get_bitmap=True): + """Simulate a write to the given address and size.""" + ret, sg = vfu_addr_to_sgl(ctx, dma_addr=addr, length=size) + assert ret == 1 + iovec = iovec_t() + ret = vfu_sgl_get(ctx, sg, iovec) + assert ret == 0 + vfu_sgl_put(ctx, sg, iovec) + if get_bitmap: + return get_dirty_page_bitmap() + return None + + def test_dirty_pages_get_modified(): ret, sg1 = vfu_addr_to_sgl(ctx, dma_addr=0x10000, length=0x1000) assert ret == 1 @@ -326,13 +335,15 @@ def test_dirty_pages_get_modified(): ret = vfu_sgl_get(ctx, sg2, iovec2) assert ret == 0 + # simple single bitmap entry map ret, sg3 = vfu_addr_to_sgl(ctx, dma_addr=0x12000, length=0x1000) assert ret == 1 iovec3 = iovec_t() ret = vfu_sgl_get(ctx, sg3, iovec3) assert ret == 0 - ret, sg4 = vfu_addr_to_sgl(ctx, dma_addr=0x14000, length=0x4000) + # write that spans bytes in bitmap + ret, sg4 = vfu_addr_to_sgl(ctx, dma_addr=0x16000, length=0x4000) assert ret == 1 iovec4 = iovec_t() ret = vfu_sgl_get(ctx, sg4, iovec4) @@ -340,23 +351,65 @@ def test_dirty_pages_get_modified(): # not put yet, dirty bitmap should be zero bitmap = get_dirty_page_bitmap() - assert bitmap == 0b00000000 + assert bitmap == 0b0000000000000000 # put SGLs, dirty bitmap should be updated vfu_sgl_put(ctx, sg1, iovec1) vfu_sgl_put(ctx, sg4, iovec4) bitmap = get_dirty_page_bitmap() - assert bitmap == 0b11110001 + assert bitmap == 0b0000001111000001 # after another two puts, should just be one dirty page vfu_sgl_put(ctx, sg2, iovec2) vfu_sgl_put(ctx, sg3, iovec3) bitmap = get_dirty_page_bitmap() - assert bitmap == 0b00000100 + assert bitmap == 0b0000000000000100 # and should now be clear bitmap = get_dirty_page_bitmap() - assert bitmap == 0b00000000 + assert bitmap == 0b0000000000000000 + + # + # check various edge cases of bitmap values. + # + + # very first bit + bitmap = write_to_addr(ctx, 0x10000, 0x1000) + assert bitmap == 0b0000000000000001 + + # top bit of first byte + bitmap = write_to_addr(ctx, 0x17000, 0x1000) + assert bitmap == 0b0000000010000000 + + # all bits except top one of first byte + bitmap = write_to_addr(ctx, 0x10000, 0x7000) + assert bitmap == 0b0000000001111111 + + # all bits of first byte + bitmap = write_to_addr(ctx, 0x10000, 0x8000) + assert bitmap == 0b0000000011111111 + + # all bits of first byte plus bottom bit of next + bitmap = write_to_addr(ctx, 0x10000, 0x9000) + assert bitmap == 0b0000000111111111 + + # straddle top/bottom bit + bitmap = write_to_addr(ctx, 0x17000, 0x2000) + assert bitmap == 0b0000000110000000 + + # top bit of second byte + bitmap = write_to_addr(ctx, 0x1f000, 0x1000) + assert bitmap == 0b1000000000000000 + + # top bit of third byte + bitmap = write_to_addr(ctx, 0x27000, 0x1000) + assert bitmap == 0b100000000000000000000000 + + # bits in third and first byte + write_to_addr(ctx, 0x26000, 0x1000, get_bitmap=False) + write_to_addr(ctx, 0x12000, 0x2000, get_bitmap=False) + bitmap = get_dirty_page_bitmap() + assert bitmap == 0b010000000000000000001100 def test_dirty_pages_stop(): @@ -399,17 +452,8 @@ def test_dirty_pages_bitmap_with_quiesce(): assert ret == 0 vfu_sgl_put(ctx, sg1, iovec1) - send_dirty_page_bitmap(busy=True) - - ret = vfu_device_quiesced(ctx, 0) - assert ret == 0 - - # now should be able to get the reply - result = get_reply(sock, expect=0) - bitmap = get_dirty_page_bitmap_result(result) - assert bitmap == 0b00000001 - - quiesce_errno = 0 + bitmap = get_dirty_page_bitmap() + assert bitmap == 0b0000000000000001 def test_dirty_pages_stop_with_quiesce(): |