AioContext: fix broken placement of event_notifier_test_and_clear
event_notifier_test_and_clear must be called before processing events.
Otherwise, an aio_poll could "eat" the notification before the main
I/O thread invokes ppoll().  The main I/O thread then never wakes up.
This is an example of what could happen:
   i/o thread       vcpu thread                     worker thread
   ---------------------------------------------------------------------
   lock_iothread
   notify_me = 1
   ...
   unlock_iothread
                                                     bh->scheduled = 1
                                                     event_notifier_set
                    lock_iothread
                    notify_me = 3
                    ppoll
                    notify_me = 1
                    aio_dispatch
                     aio_bh_poll
                      thread_pool_completion_bh
                                                     bh->scheduled = 1
                                                     event_notifier_set
                     node->io_read(node->opaque)
                      event_notifier_test_and_clear
   ppoll
   *** hang ***
"Tracing" with qemu_clock_get_ns shows pretty much the same behavior as
in the previous bug, so there are no new tricks here---just stare more
at the code until it is apparent.
One could also use a formal model, of course.  The included one shows
this with three processes: notifier corresponds to a QEMU thread pool
worker, temporary_waiter to a VCPU thread that invokes aio_poll(),
waiter to the main I/O thread.  I would be happy to say that the
formal model found the bug for me, but actually I wrote it after the
fact.
This patch is a bit of a big hammer.  The next one optimizes it,
with help (this time for real rather than a posteriori :)) from
another, similar formal model.
Reported-by: Richard W. M. Jones <rjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Message-id: 1437487673-23740-6-git-send-email-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
			
			
This commit is contained in:
		
							parent
							
								
									eabc977973
								
							
						
					
					
						commit
						21a03d17f2
					
				| @ -276,6 +276,8 @@ bool aio_poll(AioContext *ctx, bool blocking) | ||||
|         aio_context_acquire(ctx); | ||||
|     } | ||||
| 
 | ||||
|     event_notifier_test_and_clear(&ctx->notifier); | ||||
| 
 | ||||
|     /* if we have any readable fds, dispatch event */ | ||||
|     if (ret > 0) { | ||||
|         for (i = 0; i < npfd; i++) { | ||||
|  | ||||
| @ -337,10 +337,11 @@ bool aio_poll(AioContext *ctx, bool blocking) | ||||
|             aio_context_acquire(ctx); | ||||
|         } | ||||
| 
 | ||||
|         if (first && aio_bh_poll(ctx)) { | ||||
|             progress = true; | ||||
|         if (first) { | ||||
|             event_notifier_test_and_clear(&ctx->notifier); | ||||
|             progress |= aio_bh_poll(ctx); | ||||
|             first = false; | ||||
|         } | ||||
|         first = false; | ||||
| 
 | ||||
|         /* if we have any signaled events, dispatch event */ | ||||
|         event = NULL; | ||||
|  | ||||
							
								
								
									
										8
									
								
								async.c
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								async.c
									
									
									
									
									
								
							| @ -203,6 +203,8 @@ aio_ctx_check(GSource *source) | ||||
|     QEMUBH *bh; | ||||
| 
 | ||||
|     atomic_and(&ctx->notify_me, ~1); | ||||
|     event_notifier_test_and_clear(&ctx->notifier); | ||||
| 
 | ||||
|     for (bh = ctx->first_bh; bh; bh = bh->next) { | ||||
|         if (!bh->deleted && bh->scheduled) { | ||||
|             return true; | ||||
| @ -279,6 +281,10 @@ static void aio_rfifolock_cb(void *opaque) | ||||
|     aio_notify(opaque); | ||||
| } | ||||
| 
 | ||||
| static void event_notifier_dummy_cb(EventNotifier *e) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| AioContext *aio_context_new(Error **errp) | ||||
| { | ||||
|     int ret; | ||||
| @ -293,7 +299,7 @@ AioContext *aio_context_new(Error **errp) | ||||
|     g_source_set_can_recurse(&ctx->source, true); | ||||
|     aio_set_event_notifier(ctx, &ctx->notifier, | ||||
|                            (EventNotifierHandler *) | ||||
|                            event_notifier_test_and_clear); | ||||
|                            event_notifier_dummy_cb); | ||||
|     ctx->thread_pool = NULL; | ||||
|     qemu_mutex_init(&ctx->bh_lock); | ||||
|     rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx); | ||||
|  | ||||
							
								
								
									
										140
									
								
								docs/aio_notify_bug.promela
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								docs/aio_notify_bug.promela
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,140 @@ | ||||
| /* | ||||
|  * This model describes a bug in aio_notify.  If ctx->notifier is | ||||
|  * cleared too late, a wakeup could be lost. | ||||
|  * | ||||
|  * Author: Paolo Bonzini <pbonzini@redhat.com> | ||||
|  * | ||||
|  * This file is in the public domain.  If you really want a license, | ||||
|  * the WTFPL will do. | ||||
|  * | ||||
|  * To verify the buggy version: | ||||
|  *     spin -a -DBUG docs/aio_notify_bug.promela | ||||
|  *     gcc -O2 pan.c | ||||
|  *     ./a.out -a -f | ||||
|  * | ||||
|  * To verify the fixed version: | ||||
|  *     spin -a docs/aio_notify_bug.promela | ||||
|  *     gcc -O2 pan.c | ||||
|  *     ./a.out -a -f | ||||
|  * | ||||
|  * Add -DCHECK_REQ to test an alternative invariant and the | ||||
|  * "notify_me" optimization. | ||||
|  */ | ||||
| 
 | ||||
| int notify_me; | ||||
| bool event; | ||||
| bool req; | ||||
| bool notifier_done; | ||||
| 
 | ||||
| #ifdef CHECK_REQ | ||||
| #define USE_NOTIFY_ME 1 | ||||
| #else | ||||
| #define USE_NOTIFY_ME 0 | ||||
| #endif | ||||
| 
 | ||||
| active proctype notifier() | ||||
| { | ||||
|     do | ||||
|         :: true -> { | ||||
|             req = 1; | ||||
|             if | ||||
|                :: !USE_NOTIFY_ME || notify_me -> event = 1; | ||||
|                :: else -> skip; | ||||
|             fi | ||||
|         } | ||||
|         :: true -> break; | ||||
|     od; | ||||
|     notifier_done = 1; | ||||
| } | ||||
| 
 | ||||
| #ifdef BUG | ||||
| #define AIO_POLL                                                    \ | ||||
|     notify_me++;                                                    \ | ||||
|     if                                                              \ | ||||
|         :: !req -> {                                                \ | ||||
|             if                                                      \ | ||||
|                 :: event -> skip;                                   \ | ||||
|             fi;                                                     \ | ||||
|         }                                                           \ | ||||
|         :: else -> skip;                                            \ | ||||
|     fi;                                                             \ | ||||
|     notify_me--;                                                    \ | ||||
|                                                                     \ | ||||
|     req = 0;                                                        \ | ||||
|     event = 0; | ||||
| #else | ||||
| #define AIO_POLL                                                    \ | ||||
|     notify_me++;                                                    \ | ||||
|     if                                                              \ | ||||
|         :: !req -> {                                                \ | ||||
|             if                                                      \ | ||||
|                 :: event -> skip;                                   \ | ||||
|             fi;                                                     \ | ||||
|         }                                                           \ | ||||
|         :: else -> skip;                                            \ | ||||
|     fi;                                                             \ | ||||
|     notify_me--;                                                    \ | ||||
|                                                                     \ | ||||
|     event = 0;                                                      \ | ||||
|     req = 0; | ||||
| #endif | ||||
| 
 | ||||
| active proctype waiter() | ||||
| { | ||||
|     do | ||||
|        :: true -> AIO_POLL; | ||||
|     od; | ||||
| } | ||||
| 
 | ||||
| /* Same as waiter(), but disappears after a while.  */ | ||||
| active proctype temporary_waiter() | ||||
| { | ||||
|     do | ||||
|        :: true -> AIO_POLL; | ||||
|        :: true -> break; | ||||
|     od; | ||||
| } | ||||
| 
 | ||||
| #ifdef CHECK_REQ | ||||
| never { | ||||
|     do | ||||
|         :: req -> goto accept_if_req_not_eventually_false; | ||||
|         :: true -> skip; | ||||
|     od; | ||||
| 
 | ||||
| accept_if_req_not_eventually_false: | ||||
|     if | ||||
|         :: req -> goto accept_if_req_not_eventually_false; | ||||
|     fi; | ||||
|     assert(0); | ||||
| } | ||||
| 
 | ||||
| #else | ||||
| /* There must be infinitely many transitions of event as long | ||||
|  * as the notifier does not exit. | ||||
|  * | ||||
|  * If event stayed always true, the waiters would be busy looping. | ||||
|  * If event stayed always false, the waiters would be sleeping | ||||
|  * forever. | ||||
|  */ | ||||
| never { | ||||
|     do | ||||
|         :: !event    -> goto accept_if_event_not_eventually_true; | ||||
|         :: event     -> goto accept_if_event_not_eventually_false; | ||||
|         :: true      -> skip; | ||||
|     od; | ||||
| 
 | ||||
| accept_if_event_not_eventually_true: | ||||
|     if | ||||
|         :: !event && notifier_done  -> do :: true -> skip; od; | ||||
|         :: !event && !notifier_done -> goto accept_if_event_not_eventually_true; | ||||
|     fi; | ||||
|     assert(0); | ||||
| 
 | ||||
| accept_if_event_not_eventually_false: | ||||
|     if | ||||
|         :: event     -> goto accept_if_event_not_eventually_false; | ||||
|     fi; | ||||
|     assert(0); | ||||
| } | ||||
| #endif | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Paolo Bonzini
						Paolo Bonzini