blockjob: reimplement block_job_sleep_ns to allow cancellation
This reverts the effects of commit 4afeffc857 ("blockjob: do not allow
coroutine double entry or entry-after-completion", 2017-11-21)
This fixed the symptom of a bug rather than the root cause. Canceling the
wait on a sleeping blockjob coroutine is generally fine, we just need to
make it work correctly across AioContexts.  To do so, use a QEMUTimer
that calls block_job_enter.  Use a mutex to ensure that block_job_enter
synchronizes correctly with block_job_sleep_ns.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-By: Jeff Cody <jcody@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
			
			
This commit is contained in:
		
							parent
							
								
									356f59b875
								
							
						
					
					
						commit
						fc24908e7d
					
				
							
								
								
									
										63
									
								
								blockjob.c
									
									
									
									
									
								
							
							
						
						
									
										63
									
								
								blockjob.c
									
									
									
									
									
								
							@ -37,6 +37,26 @@
 | 
			
		||||
#include "qemu/timer.h"
 | 
			
		||||
#include "qapi-event.h"
 | 
			
		||||
 | 
			
		||||
/* Right now, this mutex is only needed to synchronize accesses to job->busy
 | 
			
		||||
 * and job->sleep_timer, such as concurrent calls to block_job_do_yield and
 | 
			
		||||
 * block_job_enter. */
 | 
			
		||||
static QemuMutex block_job_mutex;
 | 
			
		||||
 | 
			
		||||
static void block_job_lock(void)
 | 
			
		||||
{
 | 
			
		||||
    qemu_mutex_lock(&block_job_mutex);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void block_job_unlock(void)
 | 
			
		||||
{
 | 
			
		||||
    qemu_mutex_unlock(&block_job_mutex);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void __attribute__((__constructor__)) block_job_init(void)
 | 
			
		||||
{
 | 
			
		||||
    qemu_mutex_init(&block_job_mutex);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void block_job_event_cancelled(BlockJob *job);
 | 
			
		||||
static void block_job_event_completed(BlockJob *job, const char *msg);
 | 
			
		||||
 | 
			
		||||
@ -161,6 +181,7 @@ void block_job_unref(BlockJob *job)
 | 
			
		||||
        blk_unref(job->blk);
 | 
			
		||||
        error_free(job->blocker);
 | 
			
		||||
        g_free(job->id);
 | 
			
		||||
        assert(!timer_pending(&job->sleep_timer));
 | 
			
		||||
        g_free(job);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@ -287,6 +308,13 @@ static void coroutine_fn block_job_co_entry(void *opaque)
 | 
			
		||||
    job->driver->start(job);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void block_job_sleep_timer_cb(void *opaque)
 | 
			
		||||
{
 | 
			
		||||
    BlockJob *job = opaque;
 | 
			
		||||
 | 
			
		||||
    block_job_enter(job);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void block_job_start(BlockJob *job)
 | 
			
		||||
{
 | 
			
		||||
    assert(job && !block_job_started(job) && job->paused &&
 | 
			
		||||
@ -556,7 +584,7 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
 | 
			
		||||
    info->type      = g_strdup(BlockJobType_str(job->driver->job_type));
 | 
			
		||||
    info->device    = g_strdup(job->id);
 | 
			
		||||
    info->len       = job->len;
 | 
			
		||||
    info->busy      = job->busy;
 | 
			
		||||
    info->busy      = atomic_read(&job->busy);
 | 
			
		||||
    info->paused    = job->pause_count > 0;
 | 
			
		||||
    info->offset    = job->offset;
 | 
			
		||||
    info->speed     = job->speed;
 | 
			
		||||
@ -664,6 +692,9 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
 | 
			
		||||
    job->paused        = true;
 | 
			
		||||
    job->pause_count   = 1;
 | 
			
		||||
    job->refcnt        = 1;
 | 
			
		||||
    aio_timer_init(qemu_get_aio_context(), &job->sleep_timer,
 | 
			
		||||
                   QEMU_CLOCK_REALTIME, SCALE_NS,
 | 
			
		||||
                   block_job_sleep_timer_cb, job);
 | 
			
		||||
 | 
			
		||||
    error_setg(&job->blocker, "block device is in use by block job: %s",
 | 
			
		||||
               BlockJobType_str(driver->job_type));
 | 
			
		||||
@ -729,9 +760,20 @@ static bool block_job_should_pause(BlockJob *job)
 | 
			
		||||
    return job->pause_count > 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void block_job_do_yield(BlockJob *job)
 | 
			
		||||
/* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds.
 | 
			
		||||
 * Reentering the job coroutine with block_job_enter() before the timer has
 | 
			
		||||
 * expired is allowed and cancels the timer.
 | 
			
		||||
 *
 | 
			
		||||
 * If @ns is (uint64_t) -1, no timer is scheduled and block_job_enter() must be
 | 
			
		||||
 * called explicitly. */
 | 
			
		||||
static void block_job_do_yield(BlockJob *job, uint64_t ns)
 | 
			
		||||
{
 | 
			
		||||
    block_job_lock();
 | 
			
		||||
    if (ns != -1) {
 | 
			
		||||
        timer_mod(&job->sleep_timer, ns);
 | 
			
		||||
    }
 | 
			
		||||
    job->busy = false;
 | 
			
		||||
    block_job_unlock();
 | 
			
		||||
    qemu_coroutine_yield();
 | 
			
		||||
 | 
			
		||||
    /* Set by block_job_enter before re-entering the coroutine.  */
 | 
			
		||||
@ -755,7 +797,7 @@ void coroutine_fn block_job_pause_point(BlockJob *job)
 | 
			
		||||
 | 
			
		||||
    if (block_job_should_pause(job) && !block_job_is_cancelled(job)) {
 | 
			
		||||
        job->paused = true;
 | 
			
		||||
        block_job_do_yield(job);
 | 
			
		||||
        block_job_do_yield(job, -1);
 | 
			
		||||
        job->paused = false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -785,11 +827,16 @@ void block_job_enter(BlockJob *job)
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    block_job_lock();
 | 
			
		||||
    if (job->busy) {
 | 
			
		||||
        block_job_unlock();
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    assert(!job->deferred_to_main_loop);
 | 
			
		||||
    timer_del(&job->sleep_timer);
 | 
			
		||||
    job->busy = true;
 | 
			
		||||
    block_job_unlock();
 | 
			
		||||
    aio_co_wake(job->co);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -807,14 +854,8 @@ void block_job_sleep_ns(BlockJob *job, int64_t ns)
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /* We need to leave job->busy set here, because when we have
 | 
			
		||||
     * put a coroutine to 'sleep', we have scheduled it to run in
 | 
			
		||||
     * the future.  We cannot enter that same coroutine again before
 | 
			
		||||
     * it wakes and runs, otherwise we risk double-entry or entry after
 | 
			
		||||
     * completion. */
 | 
			
		||||
    if (!block_job_should_pause(job)) {
 | 
			
		||||
        co_aio_sleep_ns(blk_get_aio_context(job->blk),
 | 
			
		||||
                        QEMU_CLOCK_REALTIME, ns);
 | 
			
		||||
        block_job_do_yield(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    block_job_pause_point(job);
 | 
			
		||||
@ -830,7 +871,7 @@ void block_job_yield(BlockJob *job)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!block_job_should_pause(job)) {
 | 
			
		||||
        block_job_do_yield(job);
 | 
			
		||||
        block_job_do_yield(job, -1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    block_job_pause_point(job);
 | 
			
		||||
 | 
			
		||||
@ -77,7 +77,7 @@ typedef struct BlockJob {
 | 
			
		||||
    /**
 | 
			
		||||
     * Set to false by the job while the coroutine has yielded and may be
 | 
			
		||||
     * re-entered by block_job_enter().  There may still be I/O or event loop
 | 
			
		||||
     * activity pending.
 | 
			
		||||
     * activity pending.  Accessed under block_job_mutex (in blockjob.c).
 | 
			
		||||
     */
 | 
			
		||||
    bool busy;
 | 
			
		||||
 | 
			
		||||
@ -135,6 +135,12 @@ typedef struct BlockJob {
 | 
			
		||||
     */
 | 
			
		||||
    int ret;
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Timer that is used by @block_job_sleep_ns. Accessed under
 | 
			
		||||
     * block_job_mutex (in blockjob.c).
 | 
			
		||||
     */
 | 
			
		||||
    QEMUTimer sleep_timer;
 | 
			
		||||
 | 
			
		||||
    /** Non-NULL if this job is part of a transaction */
 | 
			
		||||
    BlockJobTxn *txn;
 | 
			
		||||
    QLIST_ENTRY(BlockJob) txn_list;
 | 
			
		||||
 | 
			
		||||
@ -142,8 +142,8 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
 | 
			
		||||
 * @ns: How many nanoseconds to stop for.
 | 
			
		||||
 *
 | 
			
		||||
 * Put the job to sleep (assuming that it wasn't canceled) for @ns
 | 
			
		||||
 * %QEMU_CLOCK_REALTIME nanoseconds.  Canceling the job will not interrupt
 | 
			
		||||
 * the wait, so the cancel will not process until the coroutine wakes up.
 | 
			
		||||
 * %QEMU_CLOCK_REALTIME nanoseconds.  Canceling the job will immediately
 | 
			
		||||
 * interrupt the wait.
 | 
			
		||||
 */
 | 
			
		||||
void block_job_sleep_ns(BlockJob *job, int64_t ns);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user