Block layer patches
-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQIcBAABAgAGBQJYc5OQAAoJEH8JsnLIjy/W5Z8P+wVaf86agtt5tAnrytkKc3SM
 Ic+XiFQb7iN4xjL85O02FkXijB8AqMzGqgdeSts5r7AFu/VVPTa3+F8YinPuwxZY
 lxikRtCIihcRvkx1zvZhXtErGFif6BsfQY9F+JyYLrxkg9lepM/kHHlmdAmDiBLx
 qL+/CKOkkO5qKsRSqJ+nH33NGhVyJx8NubRbgEiuA0WOcwZ1gCvrAnBlN2dVSl+B
 6NN3BDg3DkNSfD67ZVHoC6RNsd7HaZfL2I1ox/uCdsVj2xm4z+iGOmX9pE0gOeGk
 arHBeJXbN3ybgJPD0X0bWeFAV3KJMC9Ndjh27ZZjtepAbHvdttDxr8ph0NoGTHV+
 CRWYKyObMqQy+1+GpfMNnRHENcSZPlBDTCliKWW0t7JssGEiJZ7Z7kffsmV5r4rU
 RnlvIvQ4PaPpPDzubtbyjcwPqsfQFGvRDiBBqSXEDdpy34ru4HLm9w87qHeQkTeG
 HkLqkbrNo/0v0TJldOwtDOnKo98vgYds7oZ0TjUDHHf9COeJfU5BEC/7AJhU6U+q
 x7hQLw0lWyRnuWKTLxic04T5EQrR5j1EO9PQHa3fk1AIxXf4e7gf5zW++C4DwITT
 z+Ma4UcowLwddwSR0MkGcODYvlPWdakrmF/VlG08ul4l0K+0ReffHtANGdBgkBT9
 3LefuCDPdc7ushYN2+6g
 =wM6S
 -----END PGP SIGNATURE-----
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches
# gpg: Signature made Mon 09 Jan 2017 13:43:44 GMT
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6
* remotes/kevin/tags/for-upstream:
  block: Rename raw-{posix,win32} to file-*.c
  block: Rename raw_bsd to raw-format.c
  blkverify: Implement bdrv_co_preadv/pwritev/flush
  blkdebug: Implement bdrv_co_preadv/pwritev/flush
  quorum: Clean up quorum_aio_get()
  quorum: Inline quorum_fifo_aio_cb()
  quorum: Implement .bdrv_co_preadv/pwritev()
  quorum: Avoid bdrv_aio_writev() for rewrites
  quorum: Inline quorum_aio_cb()
  quorum: Do cleanup in caller coroutine
  quorum: Implement .bdrv_co_readv/writev
  quorum: Remove s from quorum_aio_get() arguments
  coroutine: Introduce qemu_coroutine_enter_if_inactive()
  qemu-img: fix in-flight count for qemu-img bench
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
			
			
This commit is contained in:
		
						commit
						dba5c337c8
					
				| @ -1718,9 +1718,9 @@ L: qemu-block@nongnu.org | ||||
| S: Supported | ||||
| F: block/linux-aio.c | ||||
| F: include/block/raw-aio.h | ||||
| F: block/raw-posix.c | ||||
| F: block/raw-win32.c | ||||
| F: block/raw_bsd.c | ||||
| F: block/raw-format.c | ||||
| F: block/file-posix.c | ||||
| F: block/file-win32.c | ||||
| F: block/win32-aio.c | ||||
| 
 | ||||
| qcow2 | ||||
|  | ||||
| @ -1,4 +1,4 @@ | ||||
| block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o | ||||
| block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o | ||||
| block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o | ||||
| block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o | ||||
| block-obj-y += qed-check.o | ||||
| @ -6,8 +6,8 @@ block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o | ||||
| block-obj-y += quorum.o | ||||
| block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o | ||||
| block-obj-y += block-backend.o snapshot.o qapi.o | ||||
| block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o | ||||
| block-obj-$(CONFIG_POSIX) += raw-posix.o | ||||
| block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o | ||||
| block-obj-$(CONFIG_POSIX) += file-posix.o | ||||
| block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o | ||||
| block-obj-y += null.o mirror.o commit.o io.o | ||||
| block-obj-y += throttle-groups.o | ||||
|  | ||||
| @ -58,10 +58,6 @@ typedef struct BlkdebugSuspendedReq { | ||||
|     QLIST_ENTRY(BlkdebugSuspendedReq) next; | ||||
| } BlkdebugSuspendedReq; | ||||
| 
 | ||||
| static const AIOCBInfo blkdebug_aiocb_info = { | ||||
|     .aiocb_size    = sizeof(BlkdebugAIOCB), | ||||
| }; | ||||
| 
 | ||||
| enum { | ||||
|     ACTION_INJECT_ERROR, | ||||
|     ACTION_SET_STATE, | ||||
| @ -77,7 +73,7 @@ typedef struct BlkdebugRule { | ||||
|             int error; | ||||
|             int immediately; | ||||
|             int once; | ||||
|             int64_t sector; | ||||
|             int64_t offset; | ||||
|         } inject; | ||||
|         struct { | ||||
|             int new_state; | ||||
| @ -174,6 +170,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp) | ||||
|     const char* event_name; | ||||
|     BlkdebugEvent event; | ||||
|     struct BlkdebugRule *rule; | ||||
|     int64_t sector; | ||||
| 
 | ||||
|     /* Find the right event for the rule */ | ||||
|     event_name = qemu_opt_get(opts, "event"); | ||||
| @ -200,7 +197,9 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp) | ||||
|         rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0); | ||||
|         rule->options.inject.immediately = | ||||
|             qemu_opt_get_bool(opts, "immediately", 0); | ||||
|         rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1); | ||||
|         sector = qemu_opt_get_number(opts, "sector", -1); | ||||
|         rule->options.inject.offset = | ||||
|             sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE; | ||||
|         break; | ||||
| 
 | ||||
|     case ACTION_SET_STATE: | ||||
| @ -408,17 +407,14 @@ out: | ||||
| 
 | ||||
| static void error_callback_bh(void *opaque) | ||||
| { | ||||
|     struct BlkdebugAIOCB *acb = opaque; | ||||
|     acb->common.cb(acb->common.opaque, acb->ret); | ||||
|     qemu_aio_unref(acb); | ||||
|     Coroutine *co = opaque; | ||||
|     qemu_coroutine_enter(co); | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *inject_error(BlockDriverState *bs, | ||||
|     BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule) | ||||
| static int inject_error(BlockDriverState *bs, BlkdebugRule *rule) | ||||
| { | ||||
|     BDRVBlkdebugState *s = bs->opaque; | ||||
|     int error = rule->options.inject.error; | ||||
|     struct BlkdebugAIOCB *acb; | ||||
|     bool immediately = rule->options.inject.immediately; | ||||
| 
 | ||||
|     if (rule->options.inject.once) { | ||||
| @ -426,81 +422,79 @@ static BlockAIOCB *inject_error(BlockDriverState *bs, | ||||
|         remove_rule(rule); | ||||
|     } | ||||
| 
 | ||||
|     if (immediately) { | ||||
|         return NULL; | ||||
|     if (!immediately) { | ||||
|         aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, | ||||
|                                 qemu_coroutine_self()); | ||||
|         qemu_coroutine_yield(); | ||||
|     } | ||||
| 
 | ||||
|     acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque); | ||||
|     acb->ret = -error; | ||||
| 
 | ||||
|     aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb); | ||||
| 
 | ||||
|     return &acb->common; | ||||
|     return -error; | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs, | ||||
|     int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | ||||
|     BlockCompletionFunc *cb, void *opaque) | ||||
| static int coroutine_fn | ||||
| blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | ||||
|                    QEMUIOVector *qiov, int flags) | ||||
| { | ||||
|     BDRVBlkdebugState *s = bs->opaque; | ||||
|     BlkdebugRule *rule = NULL; | ||||
| 
 | ||||
|     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { | ||||
|         if (rule->options.inject.sector == -1 || | ||||
|             (rule->options.inject.sector >= sector_num && | ||||
|              rule->options.inject.sector < sector_num + nb_sectors)) { | ||||
|         uint64_t inject_offset = rule->options.inject.offset; | ||||
| 
 | ||||
|         if (inject_offset == -1 || | ||||
|             (inject_offset >= offset && inject_offset < offset + bytes)) | ||||
|         { | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (rule && rule->options.inject.error) { | ||||
|         return inject_error(bs, cb, opaque, rule); | ||||
|         return inject_error(bs, rule); | ||||
|     } | ||||
| 
 | ||||
|     return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, | ||||
|                           cb, opaque); | ||||
|     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs, | ||||
|     int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | ||||
|     BlockCompletionFunc *cb, void *opaque) | ||||
| static int coroutine_fn | ||||
| blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | ||||
|                     QEMUIOVector *qiov, int flags) | ||||
| { | ||||
|     BDRVBlkdebugState *s = bs->opaque; | ||||
|     BlkdebugRule *rule = NULL; | ||||
| 
 | ||||
|     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { | ||||
|         if (rule->options.inject.sector == -1 || | ||||
|             (rule->options.inject.sector >= sector_num && | ||||
|              rule->options.inject.sector < sector_num + nb_sectors)) { | ||||
|         uint64_t inject_offset = rule->options.inject.offset; | ||||
| 
 | ||||
|         if (inject_offset == -1 || | ||||
|             (inject_offset >= offset && inject_offset < offset + bytes)) | ||||
|         { | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (rule && rule->options.inject.error) { | ||||
|         return inject_error(bs, cb, opaque, rule); | ||||
|         return inject_error(bs, rule); | ||||
|     } | ||||
| 
 | ||||
|     return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, | ||||
|                            cb, opaque); | ||||
|     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs, | ||||
|     BlockCompletionFunc *cb, void *opaque) | ||||
| static int blkdebug_co_flush(BlockDriverState *bs) | ||||
| { | ||||
|     BDRVBlkdebugState *s = bs->opaque; | ||||
|     BlkdebugRule *rule = NULL; | ||||
| 
 | ||||
|     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { | ||||
|         if (rule->options.inject.sector == -1) { | ||||
|         if (rule->options.inject.offset == -1) { | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (rule && rule->options.inject.error) { | ||||
|         return inject_error(bs, cb, opaque, rule); | ||||
|         return inject_error(bs, rule); | ||||
|     } | ||||
| 
 | ||||
|     return bdrv_aio_flush(bs->file->bs, cb, opaque); | ||||
|     return bdrv_co_flush(bs->file->bs); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| @ -752,9 +746,9 @@ static BlockDriver bdrv_blkdebug = { | ||||
|     .bdrv_refresh_filename  = blkdebug_refresh_filename, | ||||
|     .bdrv_refresh_limits    = blkdebug_refresh_limits, | ||||
| 
 | ||||
|     .bdrv_aio_readv         = blkdebug_aio_readv, | ||||
|     .bdrv_aio_writev        = blkdebug_aio_writev, | ||||
|     .bdrv_aio_flush         = blkdebug_aio_flush, | ||||
|     .bdrv_co_preadv         = blkdebug_co_preadv, | ||||
|     .bdrv_co_pwritev        = blkdebug_co_pwritev, | ||||
|     .bdrv_co_flush_to_disk  = blkdebug_co_flush, | ||||
| 
 | ||||
|     .bdrv_debug_event           = blkdebug_debug_event, | ||||
|     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint, | ||||
|  | ||||
| @ -19,38 +19,36 @@ typedef struct { | ||||
|     BdrvChild *test_file; | ||||
| } BDRVBlkverifyState; | ||||
| 
 | ||||
| typedef struct BlkverifyAIOCB BlkverifyAIOCB; | ||||
| struct BlkverifyAIOCB { | ||||
|     BlockAIOCB common; | ||||
| typedef struct BlkverifyRequest { | ||||
|     Coroutine *co; | ||||
|     BlockDriverState *bs; | ||||
| 
 | ||||
|     /* Request metadata */ | ||||
|     bool is_write; | ||||
|     int64_t sector_num; | ||||
|     int nb_sectors; | ||||
|     uint64_t offset; | ||||
|     uint64_t bytes; | ||||
|     int flags; | ||||
| 
 | ||||
|     int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *, | ||||
|                       BdrvRequestFlags); | ||||
| 
 | ||||
|     int ret;                    /* test image result */ | ||||
|     int raw_ret;                /* raw image result */ | ||||
| 
 | ||||
|     int ret;                    /* first completed request's result */ | ||||
|     unsigned int done;          /* completion counter */ | ||||
| 
 | ||||
|     QEMUIOVector *qiov;         /* user I/O vector */ | ||||
|     QEMUIOVector raw_qiov;      /* cloned I/O vector for raw file */ | ||||
|     void *buf;                  /* buffer for raw file I/O */ | ||||
|     QEMUIOVector *raw_qiov;     /* cloned I/O vector for raw file */ | ||||
| } BlkverifyRequest; | ||||
| 
 | ||||
|     void (*verify)(BlkverifyAIOCB *acb); | ||||
| }; | ||||
| 
 | ||||
| static const AIOCBInfo blkverify_aiocb_info = { | ||||
|     .aiocb_size         = sizeof(BlkverifyAIOCB), | ||||
| }; | ||||
| 
 | ||||
| static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb, | ||||
| static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r, | ||||
|                                              const char *fmt, ...) | ||||
| { | ||||
|     va_list ap; | ||||
| 
 | ||||
|     va_start(ap, fmt); | ||||
|     fprintf(stderr, "blkverify: %s sector_num=%" PRId64 " nb_sectors=%d ", | ||||
|             acb->is_write ? "write" : "read", acb->sector_num, | ||||
|             acb->nb_sectors); | ||||
|     fprintf(stderr, "blkverify: %s offset=%" PRId64 " bytes=%" PRId64 " ", | ||||
|             r->is_write ? "write" : "read", r->offset, r->bytes); | ||||
|     vfprintf(stderr, fmt, ap); | ||||
|     fprintf(stderr, "\n"); | ||||
|     va_end(ap); | ||||
| @ -166,113 +164,106 @@ static int64_t blkverify_getlength(BlockDriverState *bs) | ||||
|     return bdrv_getlength(s->test_file->bs); | ||||
| } | ||||
| 
 | ||||
| static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write, | ||||
|                                          int64_t sector_num, QEMUIOVector *qiov, | ||||
|                                          int nb_sectors, | ||||
|                                          BlockCompletionFunc *cb, | ||||
|                                          void *opaque) | ||||
| static void coroutine_fn blkverify_do_test_req(void *opaque) | ||||
| { | ||||
|     BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque); | ||||
|     BlkverifyRequest *r = opaque; | ||||
|     BDRVBlkverifyState *s = r->bs->opaque; | ||||
| 
 | ||||
|     acb->is_write = is_write; | ||||
|     acb->sector_num = sector_num; | ||||
|     acb->nb_sectors = nb_sectors; | ||||
|     acb->ret = -EINPROGRESS; | ||||
|     acb->done = 0; | ||||
|     acb->qiov = qiov; | ||||
|     acb->buf = NULL; | ||||
|     acb->verify = NULL; | ||||
|     return acb; | ||||
|     r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov, | ||||
|                            r->flags); | ||||
|     r->done++; | ||||
|     qemu_coroutine_enter_if_inactive(r->co); | ||||
| } | ||||
| 
 | ||||
| static void blkverify_aio_bh(void *opaque) | ||||
| static void coroutine_fn blkverify_do_raw_req(void *opaque) | ||||
| { | ||||
|     BlkverifyAIOCB *acb = opaque; | ||||
|     BlkverifyRequest *r = opaque; | ||||
| 
 | ||||
|     if (acb->buf) { | ||||
|         qemu_iovec_destroy(&acb->raw_qiov); | ||||
|         qemu_vfree(acb->buf); | ||||
|     r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov, | ||||
|                                r->flags); | ||||
|     r->done++; | ||||
|     qemu_coroutine_enter_if_inactive(r->co); | ||||
| } | ||||
| 
 | ||||
| static int coroutine_fn | ||||
| blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset, | ||||
|                   uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov, | ||||
|                   int flags, bool is_write) | ||||
| { | ||||
|     Coroutine *co_a, *co_b; | ||||
| 
 | ||||
|     *r = (BlkverifyRequest) { | ||||
|         .co         = qemu_coroutine_self(), | ||||
|         .bs         = bs, | ||||
|         .offset     = offset, | ||||
|         .bytes      = bytes, | ||||
|         .qiov       = qiov, | ||||
|         .raw_qiov   = raw_qiov, | ||||
|         .flags      = flags, | ||||
|         .is_write   = is_write, | ||||
|         .request_fn = is_write ? bdrv_co_pwritev : bdrv_co_preadv, | ||||
|     }; | ||||
| 
 | ||||
|     co_a = qemu_coroutine_create(blkverify_do_test_req, r); | ||||
|     co_b = qemu_coroutine_create(blkverify_do_raw_req, r); | ||||
| 
 | ||||
|     qemu_coroutine_enter(co_a); | ||||
|     qemu_coroutine_enter(co_b); | ||||
| 
 | ||||
|     while (r->done < 2) { | ||||
|         qemu_coroutine_yield(); | ||||
|     } | ||||
|     acb->common.cb(acb->common.opaque, acb->ret); | ||||
|     qemu_aio_unref(acb); | ||||
| } | ||||
| 
 | ||||
| static void blkverify_aio_cb(void *opaque, int ret) | ||||
| { | ||||
|     BlkverifyAIOCB *acb = opaque; | ||||
| 
 | ||||
|     switch (++acb->done) { | ||||
|     case 1: | ||||
|         acb->ret = ret; | ||||
|         break; | ||||
| 
 | ||||
|     case 2: | ||||
|         if (acb->ret != ret) { | ||||
|             blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret); | ||||
|         } | ||||
| 
 | ||||
|         if (acb->verify) { | ||||
|             acb->verify(acb); | ||||
|         } | ||||
| 
 | ||||
|         aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs), | ||||
|                                 blkverify_aio_bh, acb); | ||||
|         break; | ||||
|     if (r->ret != r->raw_ret) { | ||||
|         blkverify_err(r, "return value mismatch %d != %d", r->ret, r->raw_ret); | ||||
|     } | ||||
| 
 | ||||
|     return r->ret; | ||||
| } | ||||
| 
 | ||||
| static void blkverify_verify_readv(BlkverifyAIOCB *acb) | ||||
| static int coroutine_fn | ||||
| blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | ||||
|                     QEMUIOVector *qiov, int flags) | ||||
| { | ||||
|     ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov); | ||||
|     if (offset != -1) { | ||||
|         blkverify_err(acb, "contents mismatch in sector %" PRId64, | ||||
|                       acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE)); | ||||
|     BlkverifyRequest r; | ||||
|     QEMUIOVector raw_qiov; | ||||
|     void *buf; | ||||
|     ssize_t cmp_offset; | ||||
|     int ret; | ||||
| 
 | ||||
|     buf = qemu_blockalign(bs->file->bs, qiov->size); | ||||
|     qemu_iovec_init(&raw_qiov, qiov->niov); | ||||
|     qemu_iovec_clone(&raw_qiov, qiov, buf); | ||||
| 
 | ||||
|     ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags, | ||||
|                             false); | ||||
| 
 | ||||
|     cmp_offset = qemu_iovec_compare(qiov, &raw_qiov); | ||||
|     if (cmp_offset != -1) { | ||||
|         blkverify_err(&r, "contents mismatch at offset %" PRId64, | ||||
|                       offset + cmp_offset); | ||||
|     } | ||||
| 
 | ||||
|     qemu_iovec_destroy(&raw_qiov); | ||||
|     qemu_vfree(buf); | ||||
| 
 | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs, | ||||
|         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | ||||
|         BlockCompletionFunc *cb, void *opaque) | ||||
| static int coroutine_fn | ||||
| blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | ||||
|                      QEMUIOVector *qiov, int flags) | ||||
| { | ||||
|     BDRVBlkverifyState *s = bs->opaque; | ||||
|     BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov, | ||||
|                                             nb_sectors, cb, opaque); | ||||
| 
 | ||||
|     acb->verify = blkverify_verify_readv; | ||||
|     acb->buf = qemu_blockalign(bs->file->bs, qiov->size); | ||||
|     qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov); | ||||
|     qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf); | ||||
| 
 | ||||
|     bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors, | ||||
|                    blkverify_aio_cb, acb); | ||||
|     bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors, | ||||
|                    blkverify_aio_cb, acb); | ||||
|     return &acb->common; | ||||
|     BlkverifyRequest r; | ||||
|     return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true); | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs, | ||||
|         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | ||||
|         BlockCompletionFunc *cb, void *opaque) | ||||
| { | ||||
|     BDRVBlkverifyState *s = bs->opaque; | ||||
|     BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov, | ||||
|                                             nb_sectors, cb, opaque); | ||||
| 
 | ||||
|     bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors, | ||||
|                     blkverify_aio_cb, acb); | ||||
|     bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, | ||||
|                     blkverify_aio_cb, acb); | ||||
|     return &acb->common; | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs, | ||||
|                                        BlockCompletionFunc *cb, | ||||
|                                        void *opaque) | ||||
| static int blkverify_co_flush(BlockDriverState *bs) | ||||
| { | ||||
|     BDRVBlkverifyState *s = bs->opaque; | ||||
| 
 | ||||
|     /* Only flush test file, the raw file is not important */ | ||||
|     return bdrv_aio_flush(s->test_file->bs, cb, opaque); | ||||
|     return bdrv_co_flush(s->test_file->bs); | ||||
| } | ||||
| 
 | ||||
| static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs, | ||||
| @ -332,9 +323,9 @@ static BlockDriver bdrv_blkverify = { | ||||
|     .bdrv_getlength                   = blkverify_getlength, | ||||
|     .bdrv_refresh_filename            = blkverify_refresh_filename, | ||||
| 
 | ||||
|     .bdrv_aio_readv                   = blkverify_aio_readv, | ||||
|     .bdrv_aio_writev                  = blkverify_aio_writev, | ||||
|     .bdrv_aio_flush                   = blkverify_aio_flush, | ||||
|     .bdrv_co_preadv                   = blkverify_co_preadv, | ||||
|     .bdrv_co_pwritev                  = blkverify_co_pwritev, | ||||
|     .bdrv_co_flush                    = blkverify_co_flush, | ||||
| 
 | ||||
|     .is_filter                        = true, | ||||
|     .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter, | ||||
|  | ||||
| @ -1253,7 +1253,7 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs) | ||||
|  * If @start is in a trailing hole or beyond EOF, return -ENXIO. | ||||
|  * If we can't find out, return a negative errno other than -ENXIO. | ||||
|  * | ||||
|  * (Shamefully copied from raw-posix.c, only miniscule adaptions.) | ||||
|  * (Shamefully copied from file-posix.c, only miniscule adaptions.) | ||||
|  */ | ||||
| static int find_allocation(BlockDriverState *bs, off_t start, | ||||
|                            off_t *data, off_t *hole) | ||||
| @ -1349,7 +1349,7 @@ exit: | ||||
|  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes | ||||
|  * beyond the end of the disk image it will be clamped. | ||||
|  * | ||||
|  * (Based on raw_co_get_block_status() from raw-posix.c.) | ||||
|  * (Based on raw_co_get_block_status() from file-posix.c.) | ||||
|  */ | ||||
| static int64_t coroutine_fn qemu_gluster_co_get_block_status( | ||||
|         BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, | ||||
|  | ||||
							
								
								
									
										410
									
								
								block/quorum.c
									
									
									
									
									
								
							
							
						
						
									
										410
									
								
								block/quorum.c
									
									
									
									
									
								
							| @ -97,7 +97,7 @@ typedef struct QuorumAIOCB QuorumAIOCB; | ||||
|  * $children_count QuorumChildRequest. | ||||
|  */ | ||||
| typedef struct QuorumChildRequest { | ||||
|     BlockAIOCB *aiocb; | ||||
|     BlockDriverState *bs; | ||||
|     QEMUIOVector qiov; | ||||
|     uint8_t *buf; | ||||
|     int ret; | ||||
| @ -110,11 +110,12 @@ typedef struct QuorumChildRequest { | ||||
|  * used to do operations on each children and track overall progress. | ||||
|  */ | ||||
| struct QuorumAIOCB { | ||||
|     BlockAIOCB common; | ||||
|     BlockDriverState *bs; | ||||
|     Coroutine *co; | ||||
| 
 | ||||
|     /* Request metadata */ | ||||
|     uint64_t sector_num; | ||||
|     int nb_sectors; | ||||
|     uint64_t offset; | ||||
|     uint64_t bytes; | ||||
| 
 | ||||
|     QEMUIOVector *qiov;         /* calling IOV */ | ||||
| 
 | ||||
| @ -133,32 +134,15 @@ struct QuorumAIOCB { | ||||
|     int children_read;          /* how many children have been read from */ | ||||
| }; | ||||
| 
 | ||||
| static bool quorum_vote(QuorumAIOCB *acb); | ||||
| 
 | ||||
| static void quorum_aio_cancel(BlockAIOCB *blockacb) | ||||
| { | ||||
|     QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common); | ||||
|     BDRVQuorumState *s = acb->common.bs->opaque; | ||||
|     int i; | ||||
| 
 | ||||
|     /* cancel all callbacks */ | ||||
|     for (i = 0; i < s->num_children; i++) { | ||||
|         if (acb->qcrs[i].aiocb) { | ||||
|             bdrv_aio_cancel_async(acb->qcrs[i].aiocb); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static AIOCBInfo quorum_aiocb_info = { | ||||
|     .aiocb_size         = sizeof(QuorumAIOCB), | ||||
|     .cancel_async       = quorum_aio_cancel, | ||||
| }; | ||||
| typedef struct QuorumCo { | ||||
|     QuorumAIOCB *acb; | ||||
|     int idx; | ||||
| } QuorumCo; | ||||
| 
 | ||||
| static void quorum_aio_finalize(QuorumAIOCB *acb) | ||||
| { | ||||
|     acb->common.cb(acb->common.opaque, acb->vote_ret); | ||||
|     g_free(acb->qcrs); | ||||
|     qemu_aio_unref(acb); | ||||
|     g_free(acb); | ||||
| } | ||||
| 
 | ||||
| static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b) | ||||
| @ -171,30 +155,26 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b) | ||||
|     return a->l == b->l; | ||||
| } | ||||
| 
 | ||||
| static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, | ||||
|                                    BlockDriverState *bs, | ||||
| static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, | ||||
|                                    QEMUIOVector *qiov, | ||||
|                                    uint64_t sector_num, | ||||
|                                    int nb_sectors, | ||||
|                                    BlockCompletionFunc *cb, | ||||
|                                    void *opaque) | ||||
|                                    uint64_t offset, | ||||
|                                    uint64_t bytes) | ||||
| { | ||||
|     QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque); | ||||
|     BDRVQuorumState *s = bs->opaque; | ||||
|     QuorumAIOCB *acb = g_new(QuorumAIOCB, 1); | ||||
|     int i; | ||||
| 
 | ||||
|     acb->common.bs->opaque = s; | ||||
|     acb->sector_num = sector_num; | ||||
|     acb->nb_sectors = nb_sectors; | ||||
|     acb->qiov = qiov; | ||||
|     acb->qcrs = g_new0(QuorumChildRequest, s->num_children); | ||||
|     acb->count = 0; | ||||
|     acb->success_count = 0; | ||||
|     acb->rewrite_count = 0; | ||||
|     acb->votes.compare = quorum_sha256_compare; | ||||
|     QLIST_INIT(&acb->votes.vote_list); | ||||
|     acb->is_read = false; | ||||
|     acb->vote_ret = 0; | ||||
|     *acb = (QuorumAIOCB) { | ||||
|         .co                 = qemu_coroutine_self(), | ||||
|         .bs                 = bs, | ||||
|         .offset             = offset, | ||||
|         .bytes              = bytes, | ||||
|         .qiov               = qiov, | ||||
|         .votes.compare      = quorum_sha256_compare, | ||||
|         .votes.vote_list    = QLIST_HEAD_INITIALIZER(acb.votes.vote_list), | ||||
|     }; | ||||
| 
 | ||||
|     acb->qcrs = g_new0(QuorumChildRequest, s->num_children); | ||||
|     for (i = 0; i < s->num_children; i++) { | ||||
|         acb->qcrs[i].buf = NULL; | ||||
|         acb->qcrs[i].ret = 0; | ||||
| @ -204,30 +184,37 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, | ||||
|     return acb; | ||||
| } | ||||
| 
 | ||||
| static void quorum_report_bad(QuorumOpType type, uint64_t sector_num, | ||||
|                               int nb_sectors, char *node_name, int ret) | ||||
| static void quorum_report_bad(QuorumOpType type, uint64_t offset, | ||||
|                               uint64_t bytes, char *node_name, int ret) | ||||
| { | ||||
|     const char *msg = NULL; | ||||
|     int64_t start_sector = offset / BDRV_SECTOR_SIZE; | ||||
|     int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); | ||||
| 
 | ||||
|     if (ret < 0) { | ||||
|         msg = strerror(-ret); | ||||
|     } | ||||
| 
 | ||||
|     qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, | ||||
|                                       sector_num, nb_sectors, &error_abort); | ||||
|     qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector, | ||||
|                                       end_sector - start_sector, &error_abort); | ||||
| } | ||||
| 
 | ||||
| static void quorum_report_failure(QuorumAIOCB *acb) | ||||
| { | ||||
|     const char *reference = bdrv_get_device_or_node_name(acb->common.bs); | ||||
|     qapi_event_send_quorum_failure(reference, acb->sector_num, | ||||
|                                    acb->nb_sectors, &error_abort); | ||||
|     const char *reference = bdrv_get_device_or_node_name(acb->bs); | ||||
|     int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE; | ||||
|     int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes, | ||||
|                                       BDRV_SECTOR_SIZE); | ||||
| 
 | ||||
|     qapi_event_send_quorum_failure(reference, start_sector, | ||||
|                                    end_sector - start_sector, &error_abort); | ||||
| } | ||||
| 
 | ||||
| static int quorum_vote_error(QuorumAIOCB *acb); | ||||
| 
 | ||||
| static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb) | ||||
| { | ||||
|     BDRVQuorumState *s = acb->common.bs->opaque; | ||||
|     BDRVQuorumState *s = acb->bs->opaque; | ||||
| 
 | ||||
|     if (acb->success_count < s->threshold) { | ||||
|         acb->vote_ret = quorum_vote_error(acb); | ||||
| @ -238,22 +225,7 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb) | ||||
|     return false; | ||||
| } | ||||
| 
 | ||||
| static void quorum_rewrite_aio_cb(void *opaque, int ret) | ||||
| { | ||||
|     QuorumAIOCB *acb = opaque; | ||||
| 
 | ||||
|     /* one less rewrite to do */ | ||||
|     acb->rewrite_count--; | ||||
| 
 | ||||
|     /* wait until all rewrite callbacks have completed */ | ||||
|     if (acb->rewrite_count) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     quorum_aio_finalize(acb); | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb); | ||||
| static int read_fifo_child(QuorumAIOCB *acb); | ||||
| 
 | ||||
| static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) | ||||
| { | ||||
| @ -272,70 +244,7 @@ static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret) | ||||
| { | ||||
|     QuorumAIOCB *acb = sacb->parent; | ||||
|     QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; | ||||
|     quorum_report_bad(type, acb->sector_num, acb->nb_sectors, | ||||
|                       sacb->aiocb->bs->node_name, ret); | ||||
| } | ||||
| 
 | ||||
| static void quorum_fifo_aio_cb(void *opaque, int ret) | ||||
| { | ||||
|     QuorumChildRequest *sacb = opaque; | ||||
|     QuorumAIOCB *acb = sacb->parent; | ||||
|     BDRVQuorumState *s = acb->common.bs->opaque; | ||||
| 
 | ||||
|     assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO); | ||||
| 
 | ||||
|     if (ret < 0) { | ||||
|         quorum_report_bad_acb(sacb, ret); | ||||
| 
 | ||||
|         /* We try to read next child in FIFO order if we fail to read */ | ||||
|         if (acb->children_read < s->num_children) { | ||||
|             read_fifo_child(acb); | ||||
|             return; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     acb->vote_ret = ret; | ||||
| 
 | ||||
|     /* FIXME: rewrite failed children if acb->children_read > 1? */ | ||||
|     quorum_aio_finalize(acb); | ||||
| } | ||||
| 
 | ||||
| static void quorum_aio_cb(void *opaque, int ret) | ||||
| { | ||||
|     QuorumChildRequest *sacb = opaque; | ||||
|     QuorumAIOCB *acb = sacb->parent; | ||||
|     BDRVQuorumState *s = acb->common.bs->opaque; | ||||
|     bool rewrite = false; | ||||
|     int i; | ||||
| 
 | ||||
|     sacb->ret = ret; | ||||
|     if (ret == 0) { | ||||
|         acb->success_count++; | ||||
|     } else { | ||||
|         quorum_report_bad_acb(sacb, ret); | ||||
|     } | ||||
|     acb->count++; | ||||
|     assert(acb->count <= s->num_children); | ||||
|     assert(acb->success_count <= s->num_children); | ||||
|     if (acb->count < s->num_children) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     /* Do the vote on read */ | ||||
|     if (acb->is_read) { | ||||
|         rewrite = quorum_vote(acb); | ||||
|         for (i = 0; i < s->num_children; i++) { | ||||
|             qemu_vfree(acb->qcrs[i].buf); | ||||
|             qemu_iovec_destroy(&acb->qcrs[i].qiov); | ||||
|         } | ||||
|     } else { | ||||
|         quorum_has_too_much_io_failed(acb); | ||||
|     } | ||||
| 
 | ||||
|     /* if no rewrite is done the code will finish right away */ | ||||
|     if (!rewrite) { | ||||
|         quorum_aio_finalize(acb); | ||||
|     } | ||||
|     quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret); | ||||
| } | ||||
| 
 | ||||
| static void quorum_report_bad_versions(BDRVQuorumState *s, | ||||
| @ -350,14 +259,31 @@ static void quorum_report_bad_versions(BDRVQuorumState *s, | ||||
|             continue; | ||||
|         } | ||||
|         QLIST_FOREACH(item, &version->items, next) { | ||||
|             quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num, | ||||
|                               acb->nb_sectors, | ||||
|             quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes, | ||||
|                               s->children[item->index]->bs->node_name, 0); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb, | ||||
| static void quorum_rewrite_entry(void *opaque) | ||||
| { | ||||
|     QuorumCo *co = opaque; | ||||
|     QuorumAIOCB *acb = co->acb; | ||||
|     BDRVQuorumState *s = acb->bs->opaque; | ||||
| 
 | ||||
|     /* Ignore any errors, it's just a correction attempt for already
 | ||||
|      * corrupted data. */ | ||||
|     bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes, | ||||
|                     acb->qiov, 0); | ||||
| 
 | ||||
|     /* Wake up the caller after the last rewrite */ | ||||
|     acb->rewrite_count--; | ||||
|     if (!acb->rewrite_count) { | ||||
|         qemu_coroutine_enter_if_inactive(acb->co); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb, | ||||
|                                         QuorumVoteValue *value) | ||||
| { | ||||
|     QuorumVoteVersion *version; | ||||
| @ -376,7 +302,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb, | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /* quorum_rewrite_aio_cb will count down this to zero */ | ||||
|     /* quorum_rewrite_entry will count down this to zero */ | ||||
|     acb->rewrite_count = count; | ||||
| 
 | ||||
|     /* now fire the correcting rewrites */ | ||||
| @ -385,9 +311,14 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb, | ||||
|             continue; | ||||
|         } | ||||
|         QLIST_FOREACH(item, &version->items, next) { | ||||
|             bdrv_aio_writev(s->children[item->index], acb->sector_num, | ||||
|                             acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb, | ||||
|                             acb); | ||||
|             Coroutine *co; | ||||
|             QuorumCo data = { | ||||
|                 .acb = acb, | ||||
|                 .idx = item->index, | ||||
|             }; | ||||
| 
 | ||||
|             co = qemu_coroutine_create(quorum_rewrite_entry, &data); | ||||
|             qemu_coroutine_enter(co); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| @ -507,8 +438,8 @@ static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb, | ||||
|     va_list ap; | ||||
| 
 | ||||
|     va_start(ap, fmt); | ||||
|     fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ", | ||||
|             acb->sector_num, acb->nb_sectors); | ||||
|     fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ", | ||||
|             acb->offset, acb->bytes); | ||||
|     vfprintf(stderr, fmt, ap); | ||||
|     fprintf(stderr, "\n"); | ||||
|     va_end(ap); | ||||
| @ -519,16 +450,15 @@ static bool quorum_compare(QuorumAIOCB *acb, | ||||
|                            QEMUIOVector *a, | ||||
|                            QEMUIOVector *b) | ||||
| { | ||||
|     BDRVQuorumState *s = acb->common.bs->opaque; | ||||
|     BDRVQuorumState *s = acb->bs->opaque; | ||||
|     ssize_t offset; | ||||
| 
 | ||||
|     /* This driver will replace blkverify in this particular case */ | ||||
|     if (s->is_blkverify) { | ||||
|         offset = qemu_iovec_compare(a, b); | ||||
|         if (offset != -1) { | ||||
|             quorum_err(acb, "contents mismatch in sector %" PRId64, | ||||
|                        acb->sector_num + | ||||
|                        (uint64_t)(offset / BDRV_SECTOR_SIZE)); | ||||
|             quorum_err(acb, "contents mismatch at offset %" PRIu64, | ||||
|                        acb->offset + offset); | ||||
|         } | ||||
|         return true; | ||||
|     } | ||||
| @ -539,7 +469,7 @@ static bool quorum_compare(QuorumAIOCB *acb, | ||||
| /* Do a vote to get the error code */ | ||||
| static int quorum_vote_error(QuorumAIOCB *acb) | ||||
| { | ||||
|     BDRVQuorumState *s = acb->common.bs->opaque; | ||||
|     BDRVQuorumState *s = acb->bs->opaque; | ||||
|     QuorumVoteVersion *winner = NULL; | ||||
|     QuorumVotes error_votes; | ||||
|     QuorumVoteValue result_value; | ||||
| @ -568,17 +498,16 @@ static int quorum_vote_error(QuorumAIOCB *acb) | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| static bool quorum_vote(QuorumAIOCB *acb) | ||||
| static void quorum_vote(QuorumAIOCB *acb) | ||||
| { | ||||
|     bool quorum = true; | ||||
|     bool rewrite = false; | ||||
|     int i, j, ret; | ||||
|     QuorumVoteValue hash; | ||||
|     BDRVQuorumState *s = acb->common.bs->opaque; | ||||
|     BDRVQuorumState *s = acb->bs->opaque; | ||||
|     QuorumVoteVersion *winner; | ||||
| 
 | ||||
|     if (quorum_has_too_much_io_failed(acb)) { | ||||
|         return false; | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     /* get the index of the first successful read */ | ||||
| @ -606,7 +535,7 @@ static bool quorum_vote(QuorumAIOCB *acb) | ||||
|     /* Every successful read agrees */ | ||||
|     if (quorum) { | ||||
|         quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov); | ||||
|         return false; | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     /* compute hashes for each successful read, also store indexes */ | ||||
| @ -641,19 +570,46 @@ static bool quorum_vote(QuorumAIOCB *acb) | ||||
| 
 | ||||
|     /* corruption correction is enabled */ | ||||
|     if (s->rewrite_corrupted) { | ||||
|         rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value); | ||||
|         quorum_rewrite_bad_versions(acb, &winner->value); | ||||
|     } | ||||
| 
 | ||||
| free_exit: | ||||
|     /* free lists */ | ||||
|     quorum_free_vote_list(&acb->votes); | ||||
|     return rewrite; | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) | ||||
| static void read_quorum_children_entry(void *opaque) | ||||
| { | ||||
|     BDRVQuorumState *s = acb->common.bs->opaque; | ||||
|     int i; | ||||
|     QuorumCo *co = opaque; | ||||
|     QuorumAIOCB *acb = co->acb; | ||||
|     BDRVQuorumState *s = acb->bs->opaque; | ||||
|     int i = co->idx; | ||||
|     QuorumChildRequest *sacb = &acb->qcrs[i]; | ||||
| 
 | ||||
|     sacb->bs = s->children[i]->bs; | ||||
|     sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes, | ||||
|                                &acb->qcrs[i].qiov, 0); | ||||
| 
 | ||||
|     if (sacb->ret == 0) { | ||||
|         acb->success_count++; | ||||
|     } else { | ||||
|         quorum_report_bad_acb(sacb, sacb->ret); | ||||
|     } | ||||
| 
 | ||||
|     acb->count++; | ||||
|     assert(acb->count <= s->num_children); | ||||
|     assert(acb->success_count <= s->num_children); | ||||
| 
 | ||||
|     /* Wake up the caller after the last read */ | ||||
|     if (acb->count == s->num_children) { | ||||
|         qemu_coroutine_enter_if_inactive(acb->co); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static int read_quorum_children(QuorumAIOCB *acb) | ||||
| { | ||||
|     BDRVQuorumState *s = acb->bs->opaque; | ||||
|     int i, ret; | ||||
| 
 | ||||
|     acb->children_read = s->num_children; | ||||
|     for (i = 0; i < s->num_children; i++) { | ||||
| @ -663,65 +619,131 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) | ||||
|     } | ||||
| 
 | ||||
|     for (i = 0; i < s->num_children; i++) { | ||||
|         acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num, | ||||
|                                             &acb->qcrs[i].qiov, acb->nb_sectors, | ||||
|                                             quorum_aio_cb, &acb->qcrs[i]); | ||||
|         Coroutine *co; | ||||
|         QuorumCo data = { | ||||
|             .acb = acb, | ||||
|             .idx = i, | ||||
|         }; | ||||
| 
 | ||||
|         co = qemu_coroutine_create(read_quorum_children_entry, &data); | ||||
|         qemu_coroutine_enter(co); | ||||
|     } | ||||
| 
 | ||||
|     return &acb->common; | ||||
|     while (acb->count < s->num_children) { | ||||
|         qemu_coroutine_yield(); | ||||
|     } | ||||
| 
 | ||||
|     /* Do the vote on read */ | ||||
|     quorum_vote(acb); | ||||
|     for (i = 0; i < s->num_children; i++) { | ||||
|         qemu_vfree(acb->qcrs[i].buf); | ||||
|         qemu_iovec_destroy(&acb->qcrs[i].qiov); | ||||
|     } | ||||
| 
 | ||||
|     while (acb->rewrite_count) { | ||||
|         qemu_coroutine_yield(); | ||||
|     } | ||||
| 
 | ||||
|     ret = acb->vote_ret; | ||||
| 
 | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb) | ||||
| static int read_fifo_child(QuorumAIOCB *acb) | ||||
| { | ||||
|     BDRVQuorumState *s = acb->common.bs->opaque; | ||||
|     int n = acb->children_read++; | ||||
|     BDRVQuorumState *s = acb->bs->opaque; | ||||
|     int n, ret; | ||||
| 
 | ||||
|     acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num, | ||||
|                                         acb->qiov, acb->nb_sectors, | ||||
|                                         quorum_fifo_aio_cb, &acb->qcrs[n]); | ||||
|     /* We try to read the next child in FIFO order if we failed to read */ | ||||
|     do { | ||||
|         n = acb->children_read++; | ||||
|         acb->qcrs[n].bs = s->children[n]->bs; | ||||
|         ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes, | ||||
|                              acb->qiov, 0); | ||||
|         if (ret < 0) { | ||||
|             quorum_report_bad_acb(&acb->qcrs[n], ret); | ||||
|         } | ||||
|     } while (ret < 0 && acb->children_read < s->num_children); | ||||
| 
 | ||||
|     return &acb->common; | ||||
|     /* FIXME: rewrite failed children if acb->children_read > 1? */ | ||||
| 
 | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs, | ||||
|                                     int64_t sector_num, | ||||
|                                     QEMUIOVector *qiov, | ||||
|                                     int nb_sectors, | ||||
|                                     BlockCompletionFunc *cb, | ||||
|                                     void *opaque) | ||||
| static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset, | ||||
|                             uint64_t bytes, QEMUIOVector *qiov, int flags) | ||||
| { | ||||
|     BDRVQuorumState *s = bs->opaque; | ||||
|     QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, | ||||
|                                       nb_sectors, cb, opaque); | ||||
|     QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes); | ||||
|     int ret; | ||||
| 
 | ||||
|     acb->is_read = true; | ||||
|     acb->children_read = 0; | ||||
| 
 | ||||
|     if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { | ||||
|         return read_quorum_children(acb); | ||||
|         ret = read_quorum_children(acb); | ||||
|     } else { | ||||
|         ret = read_fifo_child(acb); | ||||
|     } | ||||
|     quorum_aio_finalize(acb); | ||||
| 
 | ||||
|     return read_fifo_child(acb); | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs, | ||||
|                                      int64_t sector_num, | ||||
|                                      QEMUIOVector *qiov, | ||||
|                                      int nb_sectors, | ||||
|                                      BlockCompletionFunc *cb, | ||||
|                                      void *opaque) | ||||
| static void write_quorum_entry(void *opaque) | ||||
| { | ||||
|     QuorumCo *co = opaque; | ||||
|     QuorumAIOCB *acb = co->acb; | ||||
|     BDRVQuorumState *s = acb->bs->opaque; | ||||
|     int i = co->idx; | ||||
|     QuorumChildRequest *sacb = &acb->qcrs[i]; | ||||
| 
 | ||||
|     sacb->bs = s->children[i]->bs; | ||||
|     sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes, | ||||
|                                 acb->qiov, 0); | ||||
|     if (sacb->ret == 0) { | ||||
|         acb->success_count++; | ||||
|     } else { | ||||
|         quorum_report_bad_acb(sacb, sacb->ret); | ||||
|     } | ||||
|     acb->count++; | ||||
|     assert(acb->count <= s->num_children); | ||||
|     assert(acb->success_count <= s->num_children); | ||||
| 
 | ||||
|     /* Wake up the caller after the last write */ | ||||
|     if (acb->count == s->num_children) { | ||||
|         qemu_coroutine_enter_if_inactive(acb->co); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset, | ||||
|                              uint64_t bytes, QEMUIOVector *qiov, int flags) | ||||
| { | ||||
|     BDRVQuorumState *s = bs->opaque; | ||||
|     QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors, | ||||
|                                       cb, opaque); | ||||
|     int i; | ||||
|     QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes); | ||||
|     int i, ret; | ||||
| 
 | ||||
|     for (i = 0; i < s->num_children; i++) { | ||||
|         acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num, | ||||
|                                              qiov, nb_sectors, &quorum_aio_cb, | ||||
|                                              &acb->qcrs[i]); | ||||
|         Coroutine *co; | ||||
|         QuorumCo data = { | ||||
|             .acb = acb, | ||||
|             .idx = i, | ||||
|         }; | ||||
| 
 | ||||
|         co = qemu_coroutine_create(write_quorum_entry, &data); | ||||
|         qemu_coroutine_enter(co); | ||||
|     } | ||||
| 
 | ||||
|     return &acb->common; | ||||
|     while (acb->count < s->num_children) { | ||||
|         qemu_coroutine_yield(); | ||||
|     } | ||||
| 
 | ||||
|     quorum_has_too_much_io_failed(acb); | ||||
| 
 | ||||
|     ret = acb->vote_ret; | ||||
|     quorum_aio_finalize(acb); | ||||
| 
 | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| static int64_t quorum_getlength(BlockDriverState *bs) | ||||
| @ -765,7 +787,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs) | ||||
|         result = bdrv_co_flush(s->children[i]->bs); | ||||
|         if (result) { | ||||
|             quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0, | ||||
|                               bdrv_nb_sectors(s->children[i]->bs), | ||||
|                               bdrv_getlength(s->children[i]->bs), | ||||
|                               s->children[i]->bs->node_name, result); | ||||
|             result_value.l = result; | ||||
|             quorum_count_vote(&error_votes, &result_value, i); | ||||
| @ -1098,8 +1120,8 @@ static BlockDriver bdrv_quorum = { | ||||
| 
 | ||||
|     .bdrv_getlength                     = quorum_getlength, | ||||
| 
 | ||||
|     .bdrv_aio_readv                     = quorum_aio_readv, | ||||
|     .bdrv_aio_writev                    = quorum_aio_writev, | ||||
|     .bdrv_co_preadv                     = quorum_co_preadv, | ||||
|     .bdrv_co_pwritev                    = quorum_co_pwritev, | ||||
| 
 | ||||
|     .bdrv_add_child                     = quorum_add_child, | ||||
|     .bdrv_del_child                     = quorum_del_child, | ||||
|  | ||||
| @ -1,4 +1,4 @@ | ||||
| /* BlockDriver implementation for "raw"
 | ||||
| /* BlockDriver implementation for "raw" format driver
 | ||||
|  * | ||||
|  * Copyright (C) 2010-2016 Red Hat, Inc. | ||||
|  * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com> | ||||
| @ -53,8 +53,8 @@ qmp_block_job_resume(void *job) "job %p" | ||||
| qmp_block_job_complete(void *job) "job %p" | ||||
| qmp_block_stream(void *bs, void *job) "bs %p job %p" | ||||
| 
 | ||||
| # block/raw-win32.c | ||||
| # block/raw-posix.c | ||||
| # block/file-win32.c | ||||
| # block/file-posix.c | ||||
| paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d" | ||||
| paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d" | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										2
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								configure
									
									
									
									
										vendored
									
									
								
							| @ -2750,7 +2750,7 @@ if compile_prog "" "" ; then | ||||
| fi | ||||
| 
 | ||||
| ########################################## | ||||
| # xfsctl() probe, used for raw-posix | ||||
| # xfsctl() probe, used for file-posix.c | ||||
| if test "$xfs" != "no" ; then | ||||
|   cat > $TMPC << EOF | ||||
| #include <stddef.h>  /* NULL */ | ||||
|  | ||||
| @ -184,7 +184,7 @@ struct BlockDriver { | ||||
| 
 | ||||
|     /*
 | ||||
|      * Flushes all data that was already written to the OS all the way down to | ||||
|      * the disk (for example raw-posix calls fsync()). | ||||
|      * the disk (for example file-posix.c calls fsync()). | ||||
|      */ | ||||
|     int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); | ||||
| 
 | ||||
|  | ||||
| @ -70,6 +70,12 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque); | ||||
|  */ | ||||
| void qemu_coroutine_enter(Coroutine *coroutine); | ||||
| 
 | ||||
| /**
 | ||||
|  * Transfer control to a coroutine if it's not active (i.e. part of the call | ||||
|  * stack of the running coroutine). Otherwise, do nothing. | ||||
|  */ | ||||
| void qemu_coroutine_enter_if_inactive(Coroutine *co); | ||||
| 
 | ||||
| /**
 | ||||
|  * Transfer control back to a coroutine's caller | ||||
|  * | ||||
|  | ||||
							
								
								
									
										17
									
								
								qemu-img.c
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								qemu-img.c
									
									
									
									
									
								
							| @ -3559,20 +3559,23 @@ static void bench_cb(void *opaque, int ret) | ||||
|     } | ||||
| 
 | ||||
|     while (b->n > b->in_flight && b->in_flight < b->nrreq) { | ||||
|         int64_t offset = b->offset; | ||||
|         /* blk_aio_* might look for completed I/Os and kick bench_cb
 | ||||
|          * again, so make sure this operation is counted by in_flight | ||||
|          * and b->offset is ready for the next submission. | ||||
|          */ | ||||
|         b->in_flight++; | ||||
|         b->offset += b->step; | ||||
|         b->offset %= b->image_size; | ||||
|         if (b->write) { | ||||
|             acb = blk_aio_pwritev(b->blk, b->offset, b->qiov, 0, | ||||
|                                   bench_cb, b); | ||||
|             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b); | ||||
|         } else { | ||||
|             acb = blk_aio_preadv(b->blk, b->offset, b->qiov, 0, | ||||
|                                  bench_cb, b); | ||||
|             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b); | ||||
|         } | ||||
|         if (!acb) { | ||||
|             error_report("Failed to issue request"); | ||||
|             exit(EXIT_FAILURE); | ||||
|         } | ||||
|         b->in_flight++; | ||||
|         b->offset += b->step; | ||||
|         b->offset %= b->image_size; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -12,7 +12,7 @@ read 512/512 bytes at offset 229376 | ||||
| 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||||
| wrote 512/512 bytes at offset 0 | ||||
| 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||||
| blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 | ||||
| blkverify: read offset=0 bytes=512 contents mismatch at offset 0 | ||||
| 
 | ||||
| === Testing blkverify through file blockref === | ||||
| 
 | ||||
| @ -26,7 +26,7 @@ read 512/512 bytes at offset 229376 | ||||
| 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||||
| wrote 512/512 bytes at offset 0 | ||||
| 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||||
| blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 | ||||
| blkverify: read offset=0 bytes=512 contents mismatch at offset 0 | ||||
| 
 | ||||
| === Testing blkdebug through filename === | ||||
| 
 | ||||
| @ -56,7 +56,7 @@ QMP_VERSION | ||||
| {"return": {}} | ||||
| {"return": {}} | ||||
| {"return": {}} | ||||
| blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 | ||||
| blkverify: read offset=0 bytes=512 contents mismatch at offset 0 | ||||
| 
 | ||||
| 
 | ||||
| === Testing blkverify on existing raw block device === | ||||
| @ -66,7 +66,7 @@ QMP_VERSION | ||||
| {"return": {}} | ||||
| {"return": {}} | ||||
| {"return": {}} | ||||
| blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 | ||||
| blkverify: read offset=0 bytes=512 contents mismatch at offset 0 | ||||
| 
 | ||||
| 
 | ||||
| === Testing blkdebug's set-state through QMP === | ||||
|  | ||||
| @ -131,6 +131,13 @@ void qemu_coroutine_enter(Coroutine *co) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void qemu_coroutine_enter_if_inactive(Coroutine *co) | ||||
| { | ||||
|     if (!qemu_coroutine_entered(co)) { | ||||
|         qemu_coroutine_enter(co); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void coroutine_fn qemu_coroutine_yield(void) | ||||
| { | ||||
|     Coroutine *self = qemu_coroutine_self(); | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Peter Maydell
						Peter Maydell