/* * QEMU coroutines * * Copyright IBM, Corp. 2011 * * Authors: * Stefan Hajnoczi * Kevin Wolf * * This work is licensed under the terms of the GNU LGPL, version 2 or later. * See the COPYING.LIB file in the top-level directory. * */ #include "qemu/osdep.h" #include "trace.h" #include "qemu/thread.h" #include "qemu/atomic.h" #include "qemu/coroutine_int.h" #include "qemu/coroutine-tls.h" #include "qemu/cutils.h" #include "block/aio.h" enum { COROUTINE_POOL_BATCH_MAX_SIZE = 128, }; /* * Coroutine creation and deletion is expensive so a pool of unused coroutines * is kept as a cache. When the pool has coroutines available, they are * recycled instead of creating new ones from scratch. Coroutines are added to * the pool upon termination. * * The pool is global but each thread maintains a small local pool to avoid * global pool contention. Threads fetch and return batches of coroutines from * the global pool to maintain their local pool. The local pool holds up to two * batches whereas the maximum size of the global pool is controlled by the * qemu_coroutine_inc_pool_size() API. * * .-----------------------------------. * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool * `-----------------------------------' * * .-------------------. * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches) * `-------------------' */ typedef struct CoroutinePoolBatch { /* Batches are kept in a list */ QSLIST_ENTRY(CoroutinePoolBatch) next; /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */ QSLIST_HEAD(, Coroutine) list; unsigned int size; } CoroutinePoolBatch; typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool; /* Host operating system limit on number of pooled coroutines */ static unsigned int global_pool_hard_max_size; static QemuMutex global_pool_lock; /* protects the following variables */ static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool); static unsigned int global_pool_size; static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE; QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool); QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier); static CoroutinePoolBatch *coroutine_pool_batch_new(void) { CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1); QSLIST_INIT(&batch->list); batch->size = 0; return batch; } static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch) { Coroutine *co; Coroutine *tmp; QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) { QSLIST_REMOVE_HEAD(&batch->list, pool_next); qemu_coroutine_delete(co); } g_free(batch); } static void local_pool_cleanup(Notifier *n, void *value) { CoroutinePool *local_pool = get_ptr_local_pool(); CoroutinePoolBatch *batch; CoroutinePoolBatch *tmp; QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) { QSLIST_REMOVE_HEAD(local_pool, next); coroutine_pool_batch_delete(batch); } } /* Ensure the atexit notifier is registered */ static void local_pool_cleanup_init_once(void) { Notifier *notifier = get_ptr_local_pool_cleanup_notifier(); if (!notifier->notify) { notifier->notify = local_pool_cleanup; qemu_thread_atexit_add(notifier); } } /* Helper to get the next unused coroutine from the local pool */ static Coroutine *coroutine_pool_get_local(void) { CoroutinePool *local_pool = get_ptr_local_pool(); CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); Coroutine *co; if (unlikely(!batch)) { return NULL; } co = QSLIST_FIRST(&batch->list); QSLIST_REMOVE_HEAD(&batch->list, pool_next); batch->size--; if (batch->size == 0) { QSLIST_REMOVE_HEAD(local_pool, next); coroutine_pool_batch_delete(batch); } return co; } /* Get the next batch from the global pool */ static void coroutine_pool_refill_local(void) { CoroutinePool *local_pool = get_ptr_local_pool(); CoroutinePoolBatch *batch; WITH_QEMU_LOCK_GUARD(&global_pool_lock) { batch = QSLIST_FIRST(&global_pool); if (batch) { QSLIST_REMOVE_HEAD(&global_pool, next); global_pool_size -= batch->size; } } if (batch) { QSLIST_INSERT_HEAD(local_pool, batch, next); local_pool_cleanup_init_once(); } } /* Add a batch of coroutines to the global pool */ static void coroutine_pool_put_global(CoroutinePoolBatch *batch) { WITH_QEMU_LOCK_GUARD(&global_pool_lock) { unsigned int max = MIN(global_pool_max_size, global_pool_hard_max_size); if (global_pool_size < max) { QSLIST_INSERT_HEAD(&global_pool, batch, next); /* Overshooting the max pool size is allowed */ global_pool_size += batch->size; return; } } /* The global pool was full, so throw away this batch */ coroutine_pool_batch_delete(batch); } /* Get the next unused coroutine from the pool or return NULL */ static Coroutine *coroutine_pool_get(void) { Coroutine *co; co = coroutine_pool_get_local(); if (!co) { coroutine_pool_refill_local(); co = coroutine_pool_get_local(); } return co; } static void coroutine_pool_put(Coroutine *co) { CoroutinePool *local_pool = get_ptr_local_pool(); CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); if (unlikely(!batch)) { batch = coroutine_pool_batch_new(); QSLIST_INSERT_HEAD(local_pool, batch, next); local_pool_cleanup_init_once(); } if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) { CoroutinePoolBatch *next = QSLIST_NEXT(batch, next); /* Is the local pool full? */ if (next) { QSLIST_REMOVE_HEAD(local_pool, next); coroutine_pool_put_global(batch); } batch = coroutine_pool_batch_new(); QSLIST_INSERT_HEAD(local_pool, batch, next); } QSLIST_INSERT_HEAD(&batch->list, co, pool_next); batch->size++; } Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) { Coroutine *co = NULL; if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { co = coroutine_pool_get(); } if (!co) { co = qemu_coroutine_new(); } co->entry = entry; co->entry_arg = opaque; QSIMPLEQ_INIT(&co->co_queue_wakeup); return co; } static void coroutine_delete(Coroutine *co) { co->caller = NULL; if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { coroutine_pool_put(co); } else { qemu_coroutine_delete(co); } } void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) { QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending); Coroutine *from = qemu_coroutine_self(); QSIMPLEQ_INSERT_TAIL(&pending, co, co_queue_next); /* Run co and any queued coroutines */ while (!QSIMPLEQ_EMPTY(&pending)) { Coroutine *to = QSIMPLEQ_FIRST(&pending); CoroutineAction ret; /* * Read to before to->scheduled; pairs with qatomic_cmpxchg in * qemu_co_sleep(), aio_co_schedule() etc. */ smp_read_barrier_depends(); const char *scheduled = qatomic_read(&to->scheduled); QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next); trace_qemu_aio_coroutine_enter(ctx, from, to, to->entry_arg); /* if the Coroutine has already been scheduled, entering it again will * cause us to enter it twice, potentially even after the coroutine has * been deleted */ if (scheduled) { fprintf(stderr, "%s: Co-routine was already scheduled in '%s'\n", __func__, scheduled); abort(); } if (to->caller) { fprintf(stderr, "Co-routine re-entered recursively\n"); abort(); } to->caller = from; to->ctx = ctx; /* Store to->ctx before anything that stores to. Matches * barrier in aio_co_wake and qemu_co_mutex_wake. */ smp_wmb(); ret = qemu_coroutine_switch(from, to, COROUTINE_ENTER); /* Queued coroutines are run depth-first; previously pending coroutines * run after those queued more recently. */ QSIMPLEQ_PREPEND(&pending, &to->co_queue_wakeup); switch (ret) { case COROUTINE_YIELD: break; case COROUTINE_TERMINATE: assert(!to->locks_held); trace_qemu_coroutine_terminate(to); coroutine_delete(to); break; default: abort(); } } } void qemu_coroutine_enter(Coroutine *co) { qemu_aio_coroutine_enter(qemu_get_current_aio_context(), co); } void qemu_coroutine_enter_if_inactive(Coroutine *co) { if (!qemu_coroutine_entered(co)) { qemu_coroutine_enter(co); } } void coroutine_fn qemu_coroutine_yield(void) { Coroutine *self = qemu_coroutine_self(); Coroutine *to = self->caller; trace_qemu_coroutine_yield(self, to); if (!to) { fprintf(stderr, "Co-routine is yielding to no one\n"); abort(); } self->caller = NULL; qemu_coroutine_switch(self, to, COROUTINE_YIELD); } bool qemu_coroutine_entered(Coroutine *co) { return co->caller; } AioContext *qemu_coroutine_get_aio_context(Coroutine *co) { return co->ctx; } void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) { QEMU_LOCK_GUARD(&global_pool_lock); global_pool_max_size += additional_pool_size; } void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) { QEMU_LOCK_GUARD(&global_pool_lock); global_pool_max_size -= removing_pool_size; } static unsigned int get_global_pool_hard_max_size(void) { #ifdef __linux__ g_autofree char *contents = NULL; int max_map_count; /* * Linux processes can have up to max_map_count virtual memory areas * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We * must limit the coroutine pool to a safe size to avoid running out of * VMAs. */ if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL, NULL) && qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) { /* * This is an upper bound that avoids exceeding max_map_count. Leave a * fixed amount for non-coroutine users like library dependencies, * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the * remaining amount. */ if (max_map_count > 5000) { return (max_map_count - 5000) / 2; } else { /* Disable the global pool but threads still have local pools */ return 0; } } #endif return UINT_MAX; } static void __attribute__((constructor)) qemu_coroutine_init(void) { qemu_mutex_init(&global_pool_lock); global_pool_hard_max_size = get_global_pool_hard_max_size(); }