aboutsummaryrefslogtreecommitdiff
path: root/job.c
diff options
context:
space:
mode:
authorEmanuele Giuseppe Esposito <eesposit@redhat.com>2022-09-26 05:32:11 -0400
committerKevin Wolf <kwolf@redhat.com>2022-10-07 12:11:41 +0200
commit6f592e5aca1a27fe1c1f661cfe68b35b90850acf (patch)
tree74f0dbf7c423bb4361cf0bb5c008a8b50de7fe6e /job.c
parent2fc3bdc3843d2d8bde54c2be4d4f4cc8a9ffcf50 (diff)
downloadqemu-6f592e5aca1a27fe1c1f661cfe68b35b90850acf.zip
qemu-6f592e5aca1a27fe1c1f661cfe68b35b90850acf.tar.gz
qemu-6f592e5aca1a27fe1c1f661cfe68b35b90850acf.tar.bz2
job.c: enable job lock/unlock and remove Aiocontext locks
Change the job_{lock/unlock} and macros to use job_mutex. Now that they are not nop anymore, remove the aiocontext to avoid deadlocks. Therefore: - when possible, remove completely the aiocontext lock/unlock pair - if it is used by some other function too, reduce the locking section as much as possible, leaving the job API outside. - change AIO_WAIT_WHILE in AIO_WAIT_WHILE_UNLOCKED, since we are not using the aiocontext lock anymore The only functions that still need the aiocontext lock are: - the JobDriver callbacks, already documented in job.h - job_cancel_sync() in replication.c is called with aio_context_lock taken, but now job is using AIO_WAIT_WHILE_UNLOCKED so we need to release the lock. Reduce the locking section to only cover the callback invocation and document the functions that take the AioContext lock, to avoid taking it twice. Also remove real_job_{lock/unlock}, as they are replaced by the public functions. Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> Message-Id: <20220926093214.506243-19-eesposit@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Diffstat (limited to 'job.c')
-rw-r--r--job.c111
1 files changed, 32 insertions, 79 deletions
diff --git a/job.c b/job.c
index 3e6f61c..76c6d7f 100644
--- a/job.c
+++ b/job.c
@@ -44,8 +44,6 @@
*
* The second includes functions used by the job drivers and sometimes
* by the core block layer. These delegate the locking to the callee instead.
- *
- * TODO Actually make this true
*/
/*
@@ -99,20 +97,10 @@ struct JobTxn {
void job_lock(void)
{
- /* nop */
-}
-
-void job_unlock(void)
-{
- /* nop */
-}
-
-static void real_job_lock(void)
-{
qemu_mutex_lock(&job_mutex);
}
-static void real_job_unlock(void)
+void job_unlock(void)
{
qemu_mutex_unlock(&job_mutex);
}
@@ -187,7 +175,6 @@ static void job_txn_del_job_locked(Job *job)
/* Called with job_mutex held, but releases it temporarily. */
static int job_txn_apply_locked(Job *job, int fn(Job *))
{
- AioContext *inner_ctx;
Job *other_job, *next;
JobTxn *txn = job->txn;
int rc = 0;
@@ -199,23 +186,14 @@ static int job_txn_apply_locked(Job *job, int fn(Job *))
* break AIO_WAIT_WHILE from within fn.
*/
job_ref_locked(job);
- aio_context_release(job->aio_context);
QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
- inner_ctx = other_job->aio_context;
- aio_context_acquire(inner_ctx);
rc = fn(other_job);
- aio_context_release(inner_ctx);
if (rc) {
break;
}
}
- /*
- * Note that job->aio_context might have been changed by calling fn, so we
- * can't use a local variable to cache it.
- */
- aio_context_acquire(job->aio_context);
job_unref_locked(job);
return rc;
}
@@ -503,8 +481,12 @@ void job_unref_locked(Job *job)
assert(!job->txn);
if (job->driver->free) {
+ AioContext *aio_context = job->aio_context;
job_unlock();
+ /* FIXME: aiocontext lock is required because cb calls blk_unref */
+ aio_context_acquire(aio_context);
job->driver->free(job);
+ aio_context_release(aio_context);
job_lock();
}
@@ -583,21 +565,17 @@ void job_enter_cond_locked(Job *job, bool(*fn)(Job *job))
return;
}
- real_job_lock();
if (job->busy) {
- real_job_unlock();
return;
}
if (fn && !fn(job)) {
- real_job_unlock();
return;
}
assert(!job->deferred_to_main_loop);
timer_del(&job->sleep_timer);
job->busy = true;
- real_job_unlock();
job_unlock();
aio_co_wake(job->co);
job_lock();
@@ -628,13 +606,11 @@ static void coroutine_fn job_do_yield_locked(Job *job, uint64_t ns)
{
AioContext *next_aio_context;
- real_job_lock();
if (ns != -1) {
timer_mod(&job->sleep_timer, ns);
}
job->busy = false;
job_event_idle_locked(job);
- real_job_unlock();
job_unlock();
qemu_coroutine_yield();
job_lock();
@@ -920,10 +896,14 @@ static void job_clean(Job *job)
}
}
-/* Called with job_mutex held, but releases it temporarily */
+/*
+ * Called with job_mutex held, but releases it temporarily.
+ * Takes AioContext lock internally to invoke a job->driver callback.
+ */
static int job_finalize_single_locked(Job *job)
{
int job_ret;
+ AioContext *ctx = job->aio_context;
assert(job_is_completed_locked(job));
@@ -932,6 +912,7 @@ static int job_finalize_single_locked(Job *job)
job_ret = job->ret;
job_unlock();
+ aio_context_acquire(ctx);
if (!job_ret) {
job_commit(job);
@@ -940,15 +921,13 @@ static int job_finalize_single_locked(Job *job)
}
job_clean(job);
- job_lock();
-
if (job->cb) {
- job_ret = job->ret;
- job_unlock();
job->cb(job->opaque, job_ret);
- job_lock();
}
+ aio_context_release(ctx);
+ job_lock();
+
/* Emit events only if we actually started */
if (job_started_locked(job)) {
if (job_is_cancelled_locked(job)) {
@@ -963,13 +942,19 @@ static int job_finalize_single_locked(Job *job)
return 0;
}
-/* Called with job_mutex held, but releases it temporarily */
+/*
+ * Called with job_mutex held, but releases it temporarily.
+ * Takes AioContext lock internally to invoke a job->driver callback.
+ */
static void job_cancel_async_locked(Job *job, bool force)
{
+ AioContext *ctx = job->aio_context;
GLOBAL_STATE_CODE();
if (job->driver->cancel) {
job_unlock();
+ aio_context_acquire(ctx);
force = job->driver->cancel(job, force);
+ aio_context_release(ctx);
job_lock();
} else {
/* No .cancel() means the job will behave as if force-cancelled */
@@ -1002,10 +987,12 @@ static void job_cancel_async_locked(Job *job, bool force)
}
}
-/* Called with job_mutex held, but releases it temporarily. */
+/*
+ * Called with job_mutex held, but releases it temporarily.
+ * Takes AioContext lock internally to invoke a job->driver callback.
+ */
static void job_completed_txn_abort_locked(Job *job)
{
- AioContext *ctx;
JobTxn *txn = job->txn;
Job *other_job;
@@ -1018,54 +1005,31 @@ static void job_completed_txn_abort_locked(Job *job)
txn->aborting = true;
job_txn_ref_locked(txn);
- /*
- * We can only hold the single job's AioContext lock while calling
- * job_finalize_single() because the finalization callbacks can involve
- * calls of AIO_WAIT_WHILE(), which could deadlock otherwise.
- * Note that the job's AioContext may change when it is finalized.
- */
job_ref_locked(job);
- aio_context_release(job->aio_context);
/* Other jobs are effectively cancelled by us, set the status for
* them; this job, however, may or may not be cancelled, depending
* on the caller, so leave it. */
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (other_job != job) {
- ctx = other_job->aio_context;
- aio_context_acquire(ctx);
/*
* This is a transaction: If one job failed, no result will matter.
* Therefore, pass force=true to terminate all other jobs as quickly
* as possible.
*/
job_cancel_async_locked(other_job, true);
- aio_context_release(ctx);
}
}
while (!QLIST_EMPTY(&txn->jobs)) {
other_job = QLIST_FIRST(&txn->jobs);
- /*
- * The job's AioContext may change, so store it in @ctx so we
- * release the same context that we have acquired before.
- */
- ctx = other_job->aio_context;
- aio_context_acquire(ctx);
if (!job_is_completed_locked(other_job)) {
assert(job_cancel_requested_locked(other_job));
job_finish_sync_locked(other_job, NULL, NULL);
}
job_finalize_single_locked(other_job);
- aio_context_release(ctx);
}
- /*
- * Use job_ref()/job_unref() so we can read the AioContext here
- * even if the job went away during job_finalize_single().
- */
- aio_context_acquire(job->aio_context);
job_unref_locked(job);
-
job_txn_unref_locked(txn);
}
@@ -1073,15 +1037,20 @@ static void job_completed_txn_abort_locked(Job *job)
static int job_prepare_locked(Job *job)
{
int ret;
+ AioContext *ctx = job->aio_context;
GLOBAL_STATE_CODE();
+
if (job->ret == 0 && job->driver->prepare) {
job_unlock();
+ aio_context_acquire(ctx);
ret = job->driver->prepare(job);
+ aio_context_release(ctx);
job_lock();
job->ret = ret;
job_update_rc_locked(job);
}
+
return job->ret;
}
@@ -1186,11 +1155,8 @@ static void job_completed_locked(Job *job)
static void job_exit(void *opaque)
{
Job *job = (Job *)opaque;
- AioContext *ctx;
JOB_LOCK_GUARD();
-
job_ref_locked(job);
- aio_context_acquire(job->aio_context);
/* This is a lie, we're not quiescent, but still doing the completion
* callbacks. However, completion callbacks tend to involve operations that
@@ -1200,16 +1166,7 @@ static void job_exit(void *opaque)
job_event_idle_locked(job);
job_completed_locked(job);
-
- /*
- * Note that calling job_completed can move the job to a different
- * aio_context, so we cannot cache from above. job_txn_apply takes care of
- * acquiring the new lock, and we ref/unref to avoid job_completed freeing
- * the job underneath us.
- */
- ctx = job->aio_context;
job_unref_locked(job);
- aio_context_release(ctx);
}
/**
@@ -1337,14 +1294,10 @@ int job_cancel_sync(Job *job, bool force)
void job_cancel_sync_all(void)
{
Job *job;
- AioContext *aio_context;
JOB_LOCK_GUARD();
while ((job = job_next_locked(NULL))) {
- aio_context = job->aio_context;
- aio_context_acquire(aio_context);
job_cancel_sync_locked(job, true);
- aio_context_release(aio_context);
}
}
@@ -1404,8 +1357,8 @@ int job_finish_sync_locked(Job *job,
}
job_unlock();
- AIO_WAIT_WHILE(job->aio_context,
- (job_enter(job), !job_is_completed(job)));
+ AIO_WAIT_WHILE_UNLOCKED(job->aio_context,
+ (job_enter(job), !job_is_completed(job)));
job_lock();
ret = (job_is_cancelled_locked(job) && job->ret == 0)