Merge branch 'guilt/xlog-write-rework' into xfs-5.19-for-next
This commit is contained in:
commit
463260d767
@ -69,7 +69,6 @@ static inline uint xlog_get_cycle(char *ptr)
|
||||
|
||||
/* Log Clients */
|
||||
#define XFS_TRANSACTION 0x69
|
||||
#define XFS_VOLUME 0x2
|
||||
#define XFS_LOG 0xaa
|
||||
|
||||
#define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */
|
||||
|
756
fs/xfs/xfs_log.c
756
fs/xfs/xfs_log.c
@ -49,7 +49,6 @@ xlog_state_get_iclog_space(
|
||||
int len,
|
||||
struct xlog_in_core **iclog,
|
||||
struct xlog_ticket *ticket,
|
||||
int *continued_write,
|
||||
int *logoffsetp);
|
||||
STATIC void
|
||||
xlog_grant_push_ail(
|
||||
@ -61,10 +60,6 @@ xlog_sync(
|
||||
struct xlog_in_core *iclog);
|
||||
#if defined(DEBUG)
|
||||
STATIC void
|
||||
xlog_verify_dest_ptr(
|
||||
struct xlog *log,
|
||||
void *ptr);
|
||||
STATIC void
|
||||
xlog_verify_grant_tail(
|
||||
struct xlog *log);
|
||||
STATIC void
|
||||
@ -77,7 +72,6 @@ xlog_verify_tail_lsn(
|
||||
struct xlog *log,
|
||||
struct xlog_in_core *iclog);
|
||||
#else
|
||||
#define xlog_verify_dest_ptr(a,b)
|
||||
#define xlog_verify_grant_tail(a)
|
||||
#define xlog_verify_iclog(a,b,c)
|
||||
#define xlog_verify_tail_lsn(a,b)
|
||||
@ -90,6 +84,62 @@ xlog_iclogs_empty(
|
||||
static int
|
||||
xfs_log_cover(struct xfs_mount *);
|
||||
|
||||
/*
|
||||
* We need to make sure the buffer pointer returned is naturally aligned for the
|
||||
* biggest basic data type we put into it. We have already accounted for this
|
||||
* padding when sizing the buffer.
|
||||
*
|
||||
* However, this padding does not get written into the log, and hence we have to
|
||||
* track the space used by the log vectors separately to prevent log space hangs
|
||||
* due to inaccurate accounting (i.e. a leak) of the used log space through the
|
||||
* CIL context ticket.
|
||||
*
|
||||
* We also add space for the xlog_op_header that describes this region in the
|
||||
* log. This prepends the data region we return to the caller to copy their data
|
||||
* into, so do all the static initialisation of the ophdr now. Because the ophdr
|
||||
* is not 8 byte aligned, we have to be careful to ensure that we align the
|
||||
* start of the buffer such that the region we return to the call is 8 byte
|
||||
* aligned and packed against the tail of the ophdr.
|
||||
*/
|
||||
void *
|
||||
xlog_prepare_iovec(
|
||||
struct xfs_log_vec *lv,
|
||||
struct xfs_log_iovec **vecp,
|
||||
uint type)
|
||||
{
|
||||
struct xfs_log_iovec *vec = *vecp;
|
||||
struct xlog_op_header *oph;
|
||||
uint32_t len;
|
||||
void *buf;
|
||||
|
||||
if (vec) {
|
||||
ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
|
||||
vec++;
|
||||
} else {
|
||||
vec = &lv->lv_iovecp[0];
|
||||
}
|
||||
|
||||
len = lv->lv_buf_len + sizeof(struct xlog_op_header);
|
||||
if (!IS_ALIGNED(len, sizeof(uint64_t))) {
|
||||
lv->lv_buf_len = round_up(len, sizeof(uint64_t)) -
|
||||
sizeof(struct xlog_op_header);
|
||||
}
|
||||
|
||||
vec->i_type = type;
|
||||
vec->i_addr = lv->lv_buf + lv->lv_buf_len;
|
||||
|
||||
oph = vec->i_addr;
|
||||
oph->oh_clientid = XFS_TRANSACTION;
|
||||
oph->oh_res2 = 0;
|
||||
oph->oh_flags = 0;
|
||||
|
||||
buf = vec->i_addr + sizeof(struct xlog_op_header);
|
||||
ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t)));
|
||||
|
||||
*vecp = vec;
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void
|
||||
xlog_grant_sub_space(
|
||||
struct xlog *log,
|
||||
@ -322,30 +372,6 @@ xlog_grant_head_check(
|
||||
return error;
|
||||
}
|
||||
|
||||
static void
|
||||
xlog_tic_reset_res(xlog_ticket_t *tic)
|
||||
{
|
||||
tic->t_res_num = 0;
|
||||
tic->t_res_arr_sum = 0;
|
||||
tic->t_res_num_ophdrs = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
|
||||
{
|
||||
if (tic->t_res_num == XLOG_TIC_LEN_MAX) {
|
||||
/* add to overflow and start again */
|
||||
tic->t_res_o_flow += tic->t_res_arr_sum;
|
||||
tic->t_res_num = 0;
|
||||
tic->t_res_arr_sum = 0;
|
||||
}
|
||||
|
||||
tic->t_res_arr[tic->t_res_num].r_len = len;
|
||||
tic->t_res_arr[tic->t_res_num].r_type = type;
|
||||
tic->t_res_arr_sum += len;
|
||||
tic->t_res_num++;
|
||||
}
|
||||
|
||||
bool
|
||||
xfs_log_writable(
|
||||
struct xfs_mount *mp)
|
||||
@ -395,8 +421,6 @@ xfs_log_regrant(
|
||||
xlog_grant_push_ail(log, tic->t_unit_res);
|
||||
|
||||
tic->t_curr_res = tic->t_unit_res;
|
||||
xlog_tic_reset_res(tic);
|
||||
|
||||
if (tic->t_cnt > 0)
|
||||
return 0;
|
||||
|
||||
@ -437,7 +461,6 @@ xfs_log_reserve(
|
||||
int unit_bytes,
|
||||
int cnt,
|
||||
struct xlog_ticket **ticp,
|
||||
uint8_t client,
|
||||
bool permanent)
|
||||
{
|
||||
struct xlog *log = mp->m_log;
|
||||
@ -445,15 +468,13 @@ xfs_log_reserve(
|
||||
int need_bytes;
|
||||
int error = 0;
|
||||
|
||||
ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
|
||||
|
||||
if (xlog_is_shutdown(log))
|
||||
return -EIO;
|
||||
|
||||
XFS_STATS_INC(mp, xs_try_logspace);
|
||||
|
||||
ASSERT(*ticp == NULL);
|
||||
tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent);
|
||||
tic = xlog_ticket_alloc(log, unit_bytes, cnt, permanent);
|
||||
*ticp = tic;
|
||||
|
||||
xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
|
||||
@ -901,12 +922,22 @@ xlog_write_unmount_record(
|
||||
struct xlog *log,
|
||||
struct xlog_ticket *ticket)
|
||||
{
|
||||
struct xfs_unmount_log_format ulf = {
|
||||
struct {
|
||||
struct xlog_op_header ophdr;
|
||||
struct xfs_unmount_log_format ulf;
|
||||
} unmount_rec = {
|
||||
.ophdr = {
|
||||
.oh_clientid = XFS_LOG,
|
||||
.oh_tid = cpu_to_be32(ticket->t_tid),
|
||||
.oh_flags = XLOG_UNMOUNT_TRANS,
|
||||
},
|
||||
.ulf = {
|
||||
.magic = XLOG_UNMOUNT_TYPE,
|
||||
},
|
||||
};
|
||||
struct xfs_log_iovec reg = {
|
||||
.i_addr = &ulf,
|
||||
.i_len = sizeof(ulf),
|
||||
.i_addr = &unmount_rec,
|
||||
.i_len = sizeof(unmount_rec),
|
||||
.i_type = XLOG_REG_TYPE_UNMOUNT,
|
||||
};
|
||||
struct xfs_log_vec vec = {
|
||||
@ -914,10 +945,14 @@ xlog_write_unmount_record(
|
||||
.lv_iovecp = ®,
|
||||
};
|
||||
|
||||
/* account for space used by record data */
|
||||
ticket->t_curr_res -= sizeof(ulf);
|
||||
BUILD_BUG_ON((sizeof(struct xlog_op_header) +
|
||||
sizeof(struct xfs_unmount_log_format)) !=
|
||||
sizeof(unmount_rec));
|
||||
|
||||
return xlog_write(log, NULL, &vec, ticket, XLOG_UNMOUNT_TRANS);
|
||||
/* account for space used by record data */
|
||||
ticket->t_curr_res -= sizeof(unmount_rec);
|
||||
|
||||
return xlog_write(log, NULL, &vec, ticket, reg.i_len);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -933,7 +968,7 @@ xlog_unmount_write(
|
||||
struct xlog_ticket *tic = NULL;
|
||||
int error;
|
||||
|
||||
error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);
|
||||
error = xfs_log_reserve(mp, 600, 1, &tic, 0);
|
||||
if (error)
|
||||
goto out_err;
|
||||
|
||||
@ -1584,9 +1619,6 @@ xlog_alloc_log(
|
||||
GFP_KERNEL | __GFP_RETRY_MAYFAIL);
|
||||
if (!iclog->ic_data)
|
||||
goto out_free_iclog;
|
||||
#ifdef DEBUG
|
||||
log->l_iclog_bak[i] = &iclog->ic_header;
|
||||
#endif
|
||||
head = &iclog->ic_header;
|
||||
memset(head, 0, sizeof(xlog_rec_header_t));
|
||||
head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
|
||||
@ -1602,7 +1634,7 @@ xlog_alloc_log(
|
||||
iclog->ic_log = log;
|
||||
atomic_set(&iclog->ic_refcnt, 0);
|
||||
INIT_LIST_HEAD(&iclog->ic_callbacks);
|
||||
iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
|
||||
iclog->ic_datap = (void *)iclog->ic_data + log->l_iclog_hsize;
|
||||
|
||||
init_waitqueue_head(&iclog->ic_force_wait);
|
||||
init_waitqueue_head(&iclog->ic_write_wait);
|
||||
@ -2111,63 +2143,11 @@ xlog_print_tic_res(
|
||||
struct xfs_mount *mp,
|
||||
struct xlog_ticket *ticket)
|
||||
{
|
||||
uint i;
|
||||
uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
|
||||
|
||||
/* match with XLOG_REG_TYPE_* in xfs_log.h */
|
||||
#define REG_TYPE_STR(type, str) [XLOG_REG_TYPE_##type] = str
|
||||
static char *res_type_str[] = {
|
||||
REG_TYPE_STR(BFORMAT, "bformat"),
|
||||
REG_TYPE_STR(BCHUNK, "bchunk"),
|
||||
REG_TYPE_STR(EFI_FORMAT, "efi_format"),
|
||||
REG_TYPE_STR(EFD_FORMAT, "efd_format"),
|
||||
REG_TYPE_STR(IFORMAT, "iformat"),
|
||||
REG_TYPE_STR(ICORE, "icore"),
|
||||
REG_TYPE_STR(IEXT, "iext"),
|
||||
REG_TYPE_STR(IBROOT, "ibroot"),
|
||||
REG_TYPE_STR(ILOCAL, "ilocal"),
|
||||
REG_TYPE_STR(IATTR_EXT, "iattr_ext"),
|
||||
REG_TYPE_STR(IATTR_BROOT, "iattr_broot"),
|
||||
REG_TYPE_STR(IATTR_LOCAL, "iattr_local"),
|
||||
REG_TYPE_STR(QFORMAT, "qformat"),
|
||||
REG_TYPE_STR(DQUOT, "dquot"),
|
||||
REG_TYPE_STR(QUOTAOFF, "quotaoff"),
|
||||
REG_TYPE_STR(LRHEADER, "LR header"),
|
||||
REG_TYPE_STR(UNMOUNT, "unmount"),
|
||||
REG_TYPE_STR(COMMIT, "commit"),
|
||||
REG_TYPE_STR(TRANSHDR, "trans header"),
|
||||
REG_TYPE_STR(ICREATE, "inode create"),
|
||||
REG_TYPE_STR(RUI_FORMAT, "rui_format"),
|
||||
REG_TYPE_STR(RUD_FORMAT, "rud_format"),
|
||||
REG_TYPE_STR(CUI_FORMAT, "cui_format"),
|
||||
REG_TYPE_STR(CUD_FORMAT, "cud_format"),
|
||||
REG_TYPE_STR(BUI_FORMAT, "bui_format"),
|
||||
REG_TYPE_STR(BUD_FORMAT, "bud_format"),
|
||||
};
|
||||
BUILD_BUG_ON(ARRAY_SIZE(res_type_str) != XLOG_REG_TYPE_MAX + 1);
|
||||
#undef REG_TYPE_STR
|
||||
|
||||
xfs_warn(mp, "ticket reservation summary:");
|
||||
xfs_warn(mp, " unit res = %d bytes",
|
||||
ticket->t_unit_res);
|
||||
xfs_warn(mp, " current res = %d bytes",
|
||||
ticket->t_curr_res);
|
||||
xfs_warn(mp, " total reg = %u bytes (o/flow = %u bytes)",
|
||||
ticket->t_res_arr_sum, ticket->t_res_o_flow);
|
||||
xfs_warn(mp, " ophdrs = %u (ophdr space = %u bytes)",
|
||||
ticket->t_res_num_ophdrs, ophdr_spc);
|
||||
xfs_warn(mp, " ophdr + reg = %u bytes",
|
||||
ticket->t_res_arr_sum + ticket->t_res_o_flow + ophdr_spc);
|
||||
xfs_warn(mp, " num regions = %u",
|
||||
ticket->t_res_num);
|
||||
|
||||
for (i = 0; i < ticket->t_res_num; i++) {
|
||||
uint r_type = ticket->t_res_arr[i].r_type;
|
||||
xfs_warn(mp, "region[%u]: %s - %u bytes", i,
|
||||
((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
|
||||
"bad-rtype" : res_type_str[r_type]),
|
||||
ticket->t_res_arr[i].r_len);
|
||||
}
|
||||
xfs_warn(mp, " unit res = %d bytes", ticket->t_unit_res);
|
||||
xfs_warn(mp, " current res = %d bytes", ticket->t_curr_res);
|
||||
xfs_warn(mp, " original count = %d", ticket->t_ocnt);
|
||||
xfs_warn(mp, " remaining count = %d", ticket->t_cnt);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2220,187 +2200,226 @@ xlog_print_trans(
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
xlog_write_iovec(
|
||||
struct xlog_in_core *iclog,
|
||||
uint32_t *log_offset,
|
||||
void *data,
|
||||
uint32_t write_len,
|
||||
int *bytes_left,
|
||||
uint32_t *record_cnt,
|
||||
uint32_t *data_cnt)
|
||||
{
|
||||
ASSERT(*log_offset < iclog->ic_log->l_iclog_size);
|
||||
ASSERT(*log_offset % sizeof(int32_t) == 0);
|
||||
ASSERT(write_len % sizeof(int32_t) == 0);
|
||||
|
||||
memcpy(iclog->ic_datap + *log_offset, data, write_len);
|
||||
*log_offset += write_len;
|
||||
*bytes_left -= write_len;
|
||||
(*record_cnt)++;
|
||||
*data_cnt += write_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the potential space needed by the log vector. We may need a start
|
||||
* record, and each region gets its own struct xlog_op_header and may need to be
|
||||
* double word aligned.
|
||||
* Write log vectors into a single iclog which is guaranteed by the caller
|
||||
* to have enough space to write the entire log vector into.
|
||||
*/
|
||||
static int
|
||||
xlog_write_calc_vec_length(
|
||||
struct xlog_ticket *ticket,
|
||||
struct xfs_log_vec *log_vector,
|
||||
uint optype)
|
||||
{
|
||||
struct xfs_log_vec *lv;
|
||||
int headers = 0;
|
||||
int len = 0;
|
||||
int i;
|
||||
|
||||
if (optype & XLOG_START_TRANS)
|
||||
headers++;
|
||||
|
||||
for (lv = log_vector; lv; lv = lv->lv_next) {
|
||||
/* we don't write ordered log vectors */
|
||||
if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED)
|
||||
continue;
|
||||
|
||||
headers += lv->lv_niovecs;
|
||||
|
||||
for (i = 0; i < lv->lv_niovecs; i++) {
|
||||
struct xfs_log_iovec *vecp = &lv->lv_iovecp[i];
|
||||
|
||||
len += vecp->i_len;
|
||||
xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type);
|
||||
}
|
||||
}
|
||||
|
||||
ticket->t_res_num_ophdrs += headers;
|
||||
len += headers * sizeof(struct xlog_op_header);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static void
|
||||
xlog_write_start_rec(
|
||||
struct xlog_op_header *ophdr,
|
||||
struct xlog_ticket *ticket)
|
||||
{
|
||||
ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
|
||||
ophdr->oh_clientid = ticket->t_clientid;
|
||||
ophdr->oh_len = 0;
|
||||
ophdr->oh_flags = XLOG_START_TRANS;
|
||||
ophdr->oh_res2 = 0;
|
||||
}
|
||||
|
||||
static xlog_op_header_t *
|
||||
xlog_write_setup_ophdr(
|
||||
struct xlog *log,
|
||||
struct xlog_op_header *ophdr,
|
||||
xlog_write_full(
|
||||
struct xfs_log_vec *lv,
|
||||
struct xlog_ticket *ticket,
|
||||
uint flags)
|
||||
struct xlog_in_core *iclog,
|
||||
uint32_t *log_offset,
|
||||
uint32_t *len,
|
||||
uint32_t *record_cnt,
|
||||
uint32_t *data_cnt)
|
||||
{
|
||||
ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
|
||||
ophdr->oh_clientid = ticket->t_clientid;
|
||||
ophdr->oh_res2 = 0;
|
||||
int index;
|
||||
|
||||
/* are we copying a commit or unmount record? */
|
||||
ophdr->oh_flags = flags;
|
||||
ASSERT(*log_offset + *len <= iclog->ic_size ||
|
||||
iclog->ic_state == XLOG_STATE_WANT_SYNC);
|
||||
|
||||
/*
|
||||
* We've seen logs corrupted with bad transaction client ids. This
|
||||
* makes sure that XFS doesn't generate them on. Turn this into an EIO
|
||||
* and shut down the filesystem.
|
||||
* Ordered log vectors have no regions to write so this
|
||||
* loop will naturally skip them.
|
||||
*/
|
||||
switch (ophdr->oh_clientid) {
|
||||
case XFS_TRANSACTION:
|
||||
case XFS_VOLUME:
|
||||
case XFS_LOG:
|
||||
break;
|
||||
default:
|
||||
xfs_warn(log->l_mp,
|
||||
"Bad XFS transaction clientid 0x%x in ticket "PTR_FMT,
|
||||
ophdr->oh_clientid, ticket);
|
||||
return NULL;
|
||||
}
|
||||
for (index = 0; index < lv->lv_niovecs; index++) {
|
||||
struct xfs_log_iovec *reg = &lv->lv_iovecp[index];
|
||||
struct xlog_op_header *ophdr = reg->i_addr;
|
||||
|
||||
return ophdr;
|
||||
ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
|
||||
xlog_write_iovec(iclog, log_offset, reg->i_addr,
|
||||
reg->i_len, len, record_cnt, data_cnt);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the parameters of the region copy into the log. This has
|
||||
* to handle region write split across multiple log buffers - this
|
||||
* state is kept external to this function so that this code can
|
||||
* be written in an obvious, self documenting manner.
|
||||
*/
|
||||
static int
|
||||
xlog_write_setup_copy(
|
||||
xlog_write_get_more_iclog_space(
|
||||
struct xlog_ticket *ticket,
|
||||
struct xlog_op_header *ophdr,
|
||||
int space_available,
|
||||
int space_required,
|
||||
int *copy_off,
|
||||
int *copy_len,
|
||||
int *last_was_partial_copy,
|
||||
int *bytes_consumed)
|
||||
{
|
||||
int still_to_copy;
|
||||
|
||||
still_to_copy = space_required - *bytes_consumed;
|
||||
*copy_off = *bytes_consumed;
|
||||
|
||||
if (still_to_copy <= space_available) {
|
||||
/* write of region completes here */
|
||||
*copy_len = still_to_copy;
|
||||
ophdr->oh_len = cpu_to_be32(*copy_len);
|
||||
if (*last_was_partial_copy)
|
||||
ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
|
||||
*last_was_partial_copy = 0;
|
||||
*bytes_consumed = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* partial write of region, needs extra log op header reservation */
|
||||
*copy_len = space_available;
|
||||
ophdr->oh_len = cpu_to_be32(*copy_len);
|
||||
ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
|
||||
if (*last_was_partial_copy)
|
||||
ophdr->oh_flags |= XLOG_WAS_CONT_TRANS;
|
||||
*bytes_consumed += *copy_len;
|
||||
(*last_was_partial_copy)++;
|
||||
|
||||
/* account for new log op header */
|
||||
ticket->t_curr_res -= sizeof(struct xlog_op_header);
|
||||
ticket->t_res_num_ophdrs++;
|
||||
|
||||
return sizeof(struct xlog_op_header);
|
||||
}
|
||||
|
||||
static int
|
||||
xlog_write_copy_finish(
|
||||
struct xlog *log,
|
||||
struct xlog_in_core *iclog,
|
||||
uint flags,
|
||||
int *record_cnt,
|
||||
int *data_cnt,
|
||||
int *partial_copy,
|
||||
int *partial_copy_len,
|
||||
int log_offset)
|
||||
struct xlog_in_core **iclogp,
|
||||
uint32_t *log_offset,
|
||||
uint32_t len,
|
||||
uint32_t *record_cnt,
|
||||
uint32_t *data_cnt)
|
||||
{
|
||||
struct xlog_in_core *iclog = *iclogp;
|
||||
struct xlog *log = iclog->ic_log;
|
||||
int error;
|
||||
|
||||
if (*partial_copy) {
|
||||
/*
|
||||
* This iclog has already been marked WANT_SYNC by
|
||||
* xlog_state_get_iclog_space.
|
||||
*/
|
||||
spin_lock(&log->l_icloglock);
|
||||
ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC);
|
||||
xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
|
||||
*record_cnt = 0;
|
||||
*data_cnt = 0;
|
||||
goto release_iclog;
|
||||
}
|
||||
|
||||
*partial_copy = 0;
|
||||
*partial_copy_len = 0;
|
||||
|
||||
if (iclog->ic_size - log_offset > sizeof(xlog_op_header_t))
|
||||
return 0;
|
||||
|
||||
/* no more space in this iclog - push it. */
|
||||
spin_lock(&log->l_icloglock);
|
||||
xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
|
||||
*record_cnt = 0;
|
||||
*data_cnt = 0;
|
||||
|
||||
if (iclog->ic_state == XLOG_STATE_ACTIVE)
|
||||
xlog_state_switch_iclogs(log, iclog, 0);
|
||||
else
|
||||
ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
|
||||
xlog_is_shutdown(log));
|
||||
release_iclog:
|
||||
error = xlog_state_release_iclog(log, iclog);
|
||||
spin_unlock(&log->l_icloglock);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
|
||||
log_offset);
|
||||
if (error)
|
||||
return error;
|
||||
*record_cnt = 0;
|
||||
*data_cnt = 0;
|
||||
*iclogp = iclog;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Write log vectors into a single iclog which is smaller than the current chain
|
||||
* length. We write until we cannot fit a full record into the remaining space
|
||||
* and then stop. We return the log vector that is to be written that cannot
|
||||
* wholly fit in the iclog.
|
||||
*/
|
||||
static int
|
||||
xlog_write_partial(
|
||||
struct xfs_log_vec *lv,
|
||||
struct xlog_ticket *ticket,
|
||||
struct xlog_in_core **iclogp,
|
||||
uint32_t *log_offset,
|
||||
uint32_t *len,
|
||||
uint32_t *record_cnt,
|
||||
uint32_t *data_cnt)
|
||||
{
|
||||
struct xlog_in_core *iclog = *iclogp;
|
||||
struct xlog_op_header *ophdr;
|
||||
int index = 0;
|
||||
uint32_t rlen;
|
||||
int error;
|
||||
|
||||
/* walk the logvec, copying until we run out of space in the iclog */
|
||||
for (index = 0; index < lv->lv_niovecs; index++) {
|
||||
struct xfs_log_iovec *reg = &lv->lv_iovecp[index];
|
||||
uint32_t reg_offset = 0;
|
||||
|
||||
/*
|
||||
* The first region of a continuation must have a non-zero
|
||||
* length otherwise log recovery will just skip over it and
|
||||
* start recovering from the next opheader it finds. Because we
|
||||
* mark the next opheader as a continuation, recovery will then
|
||||
* incorrectly add the continuation to the previous region and
|
||||
* that breaks stuff.
|
||||
*
|
||||
* Hence if there isn't space for region data after the
|
||||
* opheader, then we need to start afresh with a new iclog.
|
||||
*/
|
||||
if (iclog->ic_size - *log_offset <=
|
||||
sizeof(struct xlog_op_header)) {
|
||||
error = xlog_write_get_more_iclog_space(ticket,
|
||||
&iclog, log_offset, *len, record_cnt,
|
||||
data_cnt);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
ophdr = reg->i_addr;
|
||||
rlen = min_t(uint32_t, reg->i_len, iclog->ic_size - *log_offset);
|
||||
|
||||
ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
|
||||
ophdr->oh_len = cpu_to_be32(rlen - sizeof(struct xlog_op_header));
|
||||
if (rlen != reg->i_len)
|
||||
ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
|
||||
|
||||
xlog_write_iovec(iclog, log_offset, reg->i_addr,
|
||||
rlen, len, record_cnt, data_cnt);
|
||||
|
||||
/* If we wrote the whole region, move to the next. */
|
||||
if (rlen == reg->i_len)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We now have a partially written iovec, but it can span
|
||||
* multiple iclogs so we loop here. First we release the iclog
|
||||
* we currently have, then we get a new iclog and add a new
|
||||
* opheader. Then we continue copying from where we were until
|
||||
* we either complete the iovec or fill the iclog. If we
|
||||
* complete the iovec, then we increment the index and go right
|
||||
* back to the top of the outer loop. if we fill the iclog, we
|
||||
* run the inner loop again.
|
||||
*
|
||||
* This is complicated by the tail of a region using all the
|
||||
* space in an iclog and hence requiring us to release the iclog
|
||||
* and get a new one before returning to the outer loop. We must
|
||||
* always guarantee that we exit this inner loop with at least
|
||||
* space for log transaction opheaders left in the current
|
||||
* iclog, hence we cannot just terminate the loop at the end
|
||||
* of the of the continuation. So we loop while there is no
|
||||
* space left in the current iclog, and check for the end of the
|
||||
* continuation after getting a new iclog.
|
||||
*/
|
||||
do {
|
||||
/*
|
||||
* Ensure we include the continuation opheader in the
|
||||
* space we need in the new iclog by adding that size
|
||||
* to the length we require. This continuation opheader
|
||||
* needs to be accounted to the ticket as the space it
|
||||
* consumes hasn't been accounted to the lv we are
|
||||
* writing.
|
||||
*/
|
||||
error = xlog_write_get_more_iclog_space(ticket,
|
||||
&iclog, log_offset,
|
||||
*len + sizeof(struct xlog_op_header),
|
||||
record_cnt, data_cnt);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
ophdr = iclog->ic_datap + *log_offset;
|
||||
ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
|
||||
ophdr->oh_clientid = XFS_TRANSACTION;
|
||||
ophdr->oh_res2 = 0;
|
||||
ophdr->oh_flags = XLOG_WAS_CONT_TRANS;
|
||||
|
||||
ticket->t_curr_res -= sizeof(struct xlog_op_header);
|
||||
*log_offset += sizeof(struct xlog_op_header);
|
||||
*data_cnt += sizeof(struct xlog_op_header);
|
||||
|
||||
/*
|
||||
* If rlen fits in the iclog, then end the region
|
||||
* continuation. Otherwise we're going around again.
|
||||
*/
|
||||
reg_offset += rlen;
|
||||
rlen = reg->i_len - reg_offset;
|
||||
if (rlen <= iclog->ic_size - *log_offset)
|
||||
ophdr->oh_flags |= XLOG_END_TRANS;
|
||||
else
|
||||
ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
|
||||
|
||||
rlen = min_t(uint32_t, rlen, iclog->ic_size - *log_offset);
|
||||
ophdr->oh_len = cpu_to_be32(rlen);
|
||||
|
||||
xlog_write_iovec(iclog, log_offset,
|
||||
reg->i_addr + reg_offset,
|
||||
rlen, len, record_cnt, data_cnt);
|
||||
|
||||
} while (ophdr->oh_flags & XLOG_CONTINUE_TRANS);
|
||||
}
|
||||
|
||||
/*
|
||||
* No more iovecs remain in this logvec so return the next log vec to
|
||||
* the caller so it can go back to fast path copying.
|
||||
*/
|
||||
*iclogp = iclog;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2449,27 +2468,16 @@ xlog_write(
|
||||
struct xfs_cil_ctx *ctx,
|
||||
struct xfs_log_vec *log_vector,
|
||||
struct xlog_ticket *ticket,
|
||||
uint optype)
|
||||
uint32_t len)
|
||||
|
||||
{
|
||||
struct xlog_in_core *iclog = NULL;
|
||||
struct xfs_log_vec *lv = log_vector;
|
||||
struct xfs_log_iovec *vecp = lv->lv_iovecp;
|
||||
int index = 0;
|
||||
int len;
|
||||
int partial_copy = 0;
|
||||
int partial_copy_len = 0;
|
||||
int contwr = 0;
|
||||
int record_cnt = 0;
|
||||
int data_cnt = 0;
|
||||
uint32_t record_cnt = 0;
|
||||
uint32_t data_cnt = 0;
|
||||
int error = 0;
|
||||
int log_offset;
|
||||
|
||||
/*
|
||||
* If this is a commit or unmount transaction, we don't need a start
|
||||
* record to be written. We do, however, have to account for the
|
||||
* commit or unmount header that gets written. Hence we always have
|
||||
* to account for an extra xlog_op_header here.
|
||||
*/
|
||||
ticket->t_curr_res -= sizeof(struct xlog_op_header);
|
||||
if (ticket->t_curr_res < 0) {
|
||||
xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
|
||||
"ctx ticket reservation ran out. Need to up reservation");
|
||||
@ -2477,144 +2485,54 @@ xlog_write(
|
||||
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
|
||||
}
|
||||
|
||||
len = xlog_write_calc_vec_length(ticket, log_vector, optype);
|
||||
while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
|
||||
void *ptr;
|
||||
int log_offset;
|
||||
|
||||
error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
|
||||
&contwr, &log_offset);
|
||||
&log_offset);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
ASSERT(log_offset <= iclog->ic_size - 1);
|
||||
ptr = iclog->ic_datap + log_offset;
|
||||
|
||||
/*
|
||||
* If we have a context pointer, pass it the first iclog we are
|
||||
* writing to so it can record state needed for iclog write
|
||||
* ordering.
|
||||
*/
|
||||
if (ctx) {
|
||||
if (ctx)
|
||||
xlog_cil_set_ctx_write_state(ctx, iclog);
|
||||
ctx = NULL;
|
||||
}
|
||||
|
||||
while (lv) {
|
||||
/*
|
||||
* This loop writes out as many regions as can fit in the amount
|
||||
* of space which was allocated by xlog_state_get_iclog_space().
|
||||
* If the entire log vec does not fit in the iclog, punt it to
|
||||
* the partial copy loop which can handle this case.
|
||||
*/
|
||||
while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
|
||||
struct xfs_log_iovec *reg;
|
||||
struct xlog_op_header *ophdr;
|
||||
int copy_len;
|
||||
int copy_off;
|
||||
bool ordered = false;
|
||||
bool wrote_start_rec = false;
|
||||
|
||||
/* ordered log vectors have no regions to write */
|
||||
if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) {
|
||||
ASSERT(lv->lv_niovecs == 0);
|
||||
ordered = true;
|
||||
goto next_lv;
|
||||
}
|
||||
|
||||
reg = &vecp[index];
|
||||
ASSERT(reg->i_len % sizeof(int32_t) == 0);
|
||||
ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
|
||||
|
||||
if (lv->lv_niovecs &&
|
||||
lv->lv_bytes > iclog->ic_size - log_offset) {
|
||||
error = xlog_write_partial(lv, ticket, &iclog,
|
||||
&log_offset, &len, &record_cnt,
|
||||
&data_cnt);
|
||||
if (error) {
|
||||
/*
|
||||
* Before we start formatting log vectors, we need to
|
||||
* write a start record. Only do this for the first
|
||||
* iclog we write to.
|
||||
* We have no iclog to release, so just return
|
||||
* the error immediately.
|
||||
*/
|
||||
if (optype & XLOG_START_TRANS) {
|
||||
xlog_write_start_rec(ptr, ticket);
|
||||
xlog_write_adv_cnt(&ptr, &len, &log_offset,
|
||||
sizeof(struct xlog_op_header));
|
||||
optype &= ~XLOG_START_TRANS;
|
||||
wrote_start_rec = true;
|
||||
}
|
||||
|
||||
ophdr = xlog_write_setup_ophdr(log, ptr, ticket, optype);
|
||||
if (!ophdr)
|
||||
return -EIO;
|
||||
|
||||
xlog_write_adv_cnt(&ptr, &len, &log_offset,
|
||||
sizeof(struct xlog_op_header));
|
||||
|
||||
len += xlog_write_setup_copy(ticket, ophdr,
|
||||
iclog->ic_size-log_offset,
|
||||
reg->i_len,
|
||||
©_off, ©_len,
|
||||
&partial_copy,
|
||||
&partial_copy_len);
|
||||
xlog_verify_dest_ptr(log, ptr);
|
||||
|
||||
/*
|
||||
* Copy region.
|
||||
*
|
||||
* Unmount records just log an opheader, so can have
|
||||
* empty payloads with no data region to copy. Hence we
|
||||
* only copy the payload if the vector says it has data
|
||||
* to copy.
|
||||
*/
|
||||
ASSERT(copy_len >= 0);
|
||||
if (copy_len > 0) {
|
||||
memcpy(ptr, reg->i_addr + copy_off, copy_len);
|
||||
xlog_write_adv_cnt(&ptr, &len, &log_offset,
|
||||
copy_len);
|
||||
}
|
||||
copy_len += sizeof(struct xlog_op_header);
|
||||
record_cnt++;
|
||||
if (wrote_start_rec) {
|
||||
copy_len += sizeof(struct xlog_op_header);
|
||||
record_cnt++;
|
||||
}
|
||||
data_cnt += contwr ? copy_len : 0;
|
||||
|
||||
error = xlog_write_copy_finish(log, iclog, optype,
|
||||
&record_cnt, &data_cnt,
|
||||
&partial_copy,
|
||||
&partial_copy_len,
|
||||
log_offset);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* if we had a partial copy, we need to get more iclog
|
||||
* space but we don't want to increment the region
|
||||
* index because there is still more is this region to
|
||||
* write.
|
||||
*
|
||||
* If we completed writing this region, and we flushed
|
||||
* the iclog (indicated by resetting of the record
|
||||
* count), then we also need to get more log space. If
|
||||
* this was the last record, though, we are done and
|
||||
* can just return.
|
||||
*/
|
||||
if (partial_copy)
|
||||
break;
|
||||
|
||||
if (++index == lv->lv_niovecs) {
|
||||
next_lv:
|
||||
}
|
||||
} else {
|
||||
xlog_write_full(lv, ticket, iclog, &log_offset,
|
||||
&len, &record_cnt, &data_cnt);
|
||||
}
|
||||
lv = lv->lv_next;
|
||||
index = 0;
|
||||
if (lv)
|
||||
vecp = lv->lv_iovecp;
|
||||
}
|
||||
if (record_cnt == 0 && !ordered) {
|
||||
if (!lv)
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(len == 0);
|
||||
|
||||
/*
|
||||
* We've already been guaranteed that the last writes will fit inside
|
||||
* the current iclog, and hence it will already have the space used by
|
||||
* those writes accounted to it. Hence we do not need to update the
|
||||
* iclog with the number of bytes written here.
|
||||
*/
|
||||
spin_lock(&log->l_icloglock);
|
||||
xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
|
||||
xlog_state_finish_copy(log, iclog, record_cnt, 0);
|
||||
error = xlog_state_release_iclog(log, iclog);
|
||||
spin_unlock(&log->l_icloglock);
|
||||
|
||||
@ -2971,7 +2889,6 @@ xlog_state_get_iclog_space(
|
||||
int len,
|
||||
struct xlog_in_core **iclogp,
|
||||
struct xlog_ticket *ticket,
|
||||
int *continued_write,
|
||||
int *logoffsetp)
|
||||
{
|
||||
int log_offset;
|
||||
@ -3008,9 +2925,6 @@ restart:
|
||||
*/
|
||||
if (log_offset == 0) {
|
||||
ticket->t_curr_res -= log->l_iclog_hsize;
|
||||
xlog_tic_add_region(ticket,
|
||||
log->l_iclog_hsize,
|
||||
XLOG_REG_TYPE_LRHEADER);
|
||||
head->h_cycle = cpu_to_be32(log->l_curr_cycle);
|
||||
head->h_lsn = cpu_to_be64(
|
||||
xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block));
|
||||
@ -3052,13 +2966,10 @@ restart:
|
||||
* iclogs (to mark it taken), this particular iclog will release/sync
|
||||
* to disk in xlog_write().
|
||||
*/
|
||||
if (len <= iclog->ic_size - iclog->ic_offset) {
|
||||
*continued_write = 0;
|
||||
if (len <= iclog->ic_size - iclog->ic_offset)
|
||||
iclog->ic_offset += len;
|
||||
} else {
|
||||
*continued_write = 1;
|
||||
else
|
||||
xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
|
||||
}
|
||||
*iclogp = iclog;
|
||||
|
||||
ASSERT(iclog->ic_offset <= iclog->ic_size);
|
||||
@ -3090,7 +3001,6 @@ xfs_log_ticket_regrant(
|
||||
xlog_grant_sub_space(log, &log->l_write_head.grant,
|
||||
ticket->t_curr_res);
|
||||
ticket->t_curr_res = ticket->t_unit_res;
|
||||
xlog_tic_reset_res(ticket);
|
||||
|
||||
trace_xfs_log_ticket_regrant_sub(log, ticket);
|
||||
|
||||
@ -3101,7 +3011,6 @@ xfs_log_ticket_regrant(
|
||||
trace_xfs_log_ticket_regrant_exit(log, ticket);
|
||||
|
||||
ticket->t_curr_res = ticket->t_unit_res;
|
||||
xlog_tic_reset_res(ticket);
|
||||
}
|
||||
|
||||
xfs_log_ticket_put(ticket);
|
||||
@ -3591,7 +3500,6 @@ xlog_ticket_alloc(
|
||||
struct xlog *log,
|
||||
int unit_bytes,
|
||||
int cnt,
|
||||
char client,
|
||||
bool permanent)
|
||||
{
|
||||
struct xlog_ticket *tic;
|
||||
@ -3609,39 +3517,13 @@ xlog_ticket_alloc(
|
||||
tic->t_cnt = cnt;
|
||||
tic->t_ocnt = cnt;
|
||||
tic->t_tid = prandom_u32();
|
||||
tic->t_clientid = client;
|
||||
if (permanent)
|
||||
tic->t_flags |= XLOG_TIC_PERM_RESERV;
|
||||
|
||||
xlog_tic_reset_res(tic);
|
||||
|
||||
return tic;
|
||||
}
|
||||
|
||||
#if defined(DEBUG)
|
||||
/*
|
||||
* Make sure that the destination ptr is within the valid data region of
|
||||
* one of the iclogs. This uses backup pointers stored in a different
|
||||
* part of the log in case we trash the log structure.
|
||||
*/
|
||||
STATIC void
|
||||
xlog_verify_dest_ptr(
|
||||
struct xlog *log,
|
||||
void *ptr)
|
||||
{
|
||||
int i;
|
||||
int good_ptr = 0;
|
||||
|
||||
for (i = 0; i < log->l_iclog_bufs; i++) {
|
||||
if (ptr >= log->l_iclog_bak[i] &&
|
||||
ptr <= log->l_iclog_bak[i] + log->l_iclog_size)
|
||||
good_ptr++;
|
||||
}
|
||||
|
||||
if (!good_ptr)
|
||||
xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to make sure the grant write head didn't just over lap the tail. If
|
||||
* the cycles are the same, we can't be overlapping. Otherwise, make sure that
|
||||
@ -3769,7 +3651,7 @@ xlog_verify_iclog(
|
||||
if (field_offset & 0x1ff) {
|
||||
clientid = ophead->oh_clientid;
|
||||
} else {
|
||||
idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
|
||||
idx = BTOBBT((void *)&ophead->oh_clientid - iclog->ic_datap);
|
||||
if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
|
||||
j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
|
||||
k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
|
||||
@ -3780,11 +3662,12 @@ xlog_verify_iclog(
|
||||
iclog->ic_header.h_cycle_data[idx]);
|
||||
}
|
||||
}
|
||||
if (clientid != XFS_TRANSACTION && clientid != XFS_LOG)
|
||||
if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) {
|
||||
xfs_warn(log->l_mp,
|
||||
"%s: invalid clientid %d op "PTR_FMT" offset 0x%lx",
|
||||
__func__, clientid, ophead,
|
||||
"%s: op %d invalid clientid %d op "PTR_FMT" offset 0x%lx",
|
||||
__func__, i, clientid, ophead,
|
||||
(unsigned long)field_offset);
|
||||
}
|
||||
|
||||
/* check length */
|
||||
p = &ophead->oh_len;
|
||||
@ -3792,8 +3675,7 @@ xlog_verify_iclog(
|
||||
if (field_offset & 0x1ff) {
|
||||
op_len = be32_to_cpu(ophead->oh_len);
|
||||
} else {
|
||||
idx = BTOBBT((uintptr_t)&ophead->oh_len -
|
||||
(uintptr_t)iclog->ic_datap);
|
||||
idx = BTOBBT((void *)&ophead->oh_len - iclog->ic_datap);
|
||||
if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
|
||||
j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
|
||||
k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
|
||||
|
@ -21,42 +21,19 @@ struct xfs_log_vec {
|
||||
|
||||
#define XFS_LOG_VEC_ORDERED (-1)
|
||||
|
||||
static inline void *
|
||||
xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
|
||||
uint type)
|
||||
{
|
||||
struct xfs_log_iovec *vec = *vecp;
|
||||
void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
|
||||
uint type);
|
||||
|
||||
if (vec) {
|
||||
ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
|
||||
vec++;
|
||||
} else {
|
||||
vec = &lv->lv_iovecp[0];
|
||||
}
|
||||
|
||||
vec->i_type = type;
|
||||
vec->i_addr = lv->lv_buf + lv->lv_buf_len;
|
||||
|
||||
ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t)));
|
||||
|
||||
*vecp = vec;
|
||||
return vec->i_addr;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to make sure the next buffer is naturally aligned for the biggest
|
||||
* basic data type we put into it. We already accounted for this padding when
|
||||
* sizing the buffer.
|
||||
*
|
||||
* However, this padding does not get written into the log, and hence we have to
|
||||
* track the space used by the log vectors separately to prevent log space hangs
|
||||
* due to inaccurate accounting (i.e. a leak) of the used log space through the
|
||||
* CIL context ticket.
|
||||
*/
|
||||
static inline void
|
||||
xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
|
||||
{
|
||||
lv->lv_buf_len += round_up(len, sizeof(uint64_t));
|
||||
struct xlog_op_header *oph = vec->i_addr;
|
||||
|
||||
/* opheader tracks payload length, logvec tracks region length */
|
||||
oph->oh_len = cpu_to_be32(len);
|
||||
|
||||
len += sizeof(struct xlog_op_header);
|
||||
lv->lv_buf_len += len;
|
||||
lv->lv_bytes += len;
|
||||
vec->i_len = len;
|
||||
}
|
||||
@ -118,12 +95,8 @@ void xfs_log_mount_cancel(struct xfs_mount *);
|
||||
xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
|
||||
xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
|
||||
void xfs_log_space_wake(struct xfs_mount *mp);
|
||||
int xfs_log_reserve(struct xfs_mount *mp,
|
||||
int length,
|
||||
int count,
|
||||
struct xlog_ticket **ticket,
|
||||
uint8_t clientid,
|
||||
bool permanent);
|
||||
int xfs_log_reserve(struct xfs_mount *mp, int length, int count,
|
||||
struct xlog_ticket **ticket, bool permanent);
|
||||
int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
|
||||
void xfs_log_unmount(struct xfs_mount *mp);
|
||||
bool xfs_log_writable(struct xfs_mount *mp);
|
||||
|
@ -37,7 +37,7 @@ xlog_cil_ticket_alloc(
|
||||
{
|
||||
struct xlog_ticket *tic;
|
||||
|
||||
tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0);
|
||||
tic = xlog_ticket_alloc(log, 0, 1, 0);
|
||||
|
||||
/*
|
||||
* set the current reservation to zero so we know to steal the basic
|
||||
@ -214,13 +214,20 @@ xlog_cil_alloc_shadow_bufs(
|
||||
}
|
||||
|
||||
/*
|
||||
* We 64-bit align the length of each iovec so that the start
|
||||
* of the next one is naturally aligned. We'll need to
|
||||
* account for that slack space here. Then round nbytes up
|
||||
* to 64-bit alignment so that the initial buffer alignment is
|
||||
* easy to calculate and verify.
|
||||
* We 64-bit align the length of each iovec so that the start of
|
||||
* the next one is naturally aligned. We'll need to account for
|
||||
* that slack space here.
|
||||
*
|
||||
* We also add the xlog_op_header to each region when
|
||||
* formatting, but that's not accounted to the size of the item
|
||||
* at this point. Hence we'll need an addition number of bytes
|
||||
* for each vector to hold an opheader.
|
||||
*
|
||||
* Then round nbytes up to 64-bit alignment so that the initial
|
||||
* buffer alignment is easy to calculate and verify.
|
||||
*/
|
||||
nbytes += niovecs * sizeof(uint64_t);
|
||||
nbytes += niovecs *
|
||||
(sizeof(uint64_t) + sizeof(struct xlog_op_header));
|
||||
nbytes = round_up(nbytes, sizeof(uint64_t));
|
||||
|
||||
/*
|
||||
@ -277,22 +284,18 @@ xlog_cil_alloc_shadow_bufs(
|
||||
|
||||
/*
|
||||
* Prepare the log item for insertion into the CIL. Calculate the difference in
|
||||
* log space and vectors it will consume, and if it is a new item pin it as
|
||||
* well.
|
||||
* log space it will consume, and if it is a new item pin it as well.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_cil_prepare_item(
|
||||
struct xlog *log,
|
||||
struct xfs_log_vec *lv,
|
||||
struct xfs_log_vec *old_lv,
|
||||
int *diff_len,
|
||||
int *diff_iovecs)
|
||||
int *diff_len)
|
||||
{
|
||||
/* Account for the new LV being passed in */
|
||||
if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
|
||||
if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
|
||||
*diff_len += lv->lv_bytes;
|
||||
*diff_iovecs += lv->lv_niovecs;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is no old LV, this is the first time we've seen the item in
|
||||
@ -309,7 +312,6 @@ xfs_cil_prepare_item(
|
||||
ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
|
||||
|
||||
*diff_len -= old_lv->lv_bytes;
|
||||
*diff_iovecs -= old_lv->lv_niovecs;
|
||||
lv->lv_item->li_lv_shadow = old_lv;
|
||||
}
|
||||
|
||||
@ -358,12 +360,10 @@ static void
|
||||
xlog_cil_insert_format_items(
|
||||
struct xlog *log,
|
||||
struct xfs_trans *tp,
|
||||
int *diff_len,
|
||||
int *diff_iovecs)
|
||||
int *diff_len)
|
||||
{
|
||||
struct xfs_log_item *lip;
|
||||
|
||||
|
||||
/* Bail out if we didn't find a log item. */
|
||||
if (list_empty(&tp->t_items)) {
|
||||
ASSERT(0);
|
||||
@ -406,7 +406,6 @@ xlog_cil_insert_format_items(
|
||||
* set the item up as though it is a new insertion so
|
||||
* that the space reservation accounting is correct.
|
||||
*/
|
||||
*diff_iovecs -= lv->lv_niovecs;
|
||||
*diff_len -= lv->lv_bytes;
|
||||
|
||||
/* Ensure the lv is set up according to ->iop_size */
|
||||
@ -431,7 +430,7 @@ xlog_cil_insert_format_items(
|
||||
ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
|
||||
lip->li_ops->iop_format(lip, lv);
|
||||
insert:
|
||||
xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
|
||||
xfs_cil_prepare_item(log, lv, old_lv, diff_len);
|
||||
}
|
||||
}
|
||||
|
||||
@ -451,7 +450,6 @@ xlog_cil_insert_items(
|
||||
struct xfs_cil_ctx *ctx = cil->xc_ctx;
|
||||
struct xfs_log_item *lip;
|
||||
int len = 0;
|
||||
int diff_iovecs = 0;
|
||||
int iclog_space;
|
||||
int iovhdr_res = 0, split_res = 0, ctx_res = 0;
|
||||
|
||||
@ -461,15 +459,10 @@ xlog_cil_insert_items(
|
||||
* We can do this safely because the context can't checkpoint until we
|
||||
* are done so it doesn't matter exactly how we update the CIL.
|
||||
*/
|
||||
xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
|
||||
xlog_cil_insert_format_items(log, tp, &len);
|
||||
|
||||
spin_lock(&cil->xc_cil_lock);
|
||||
|
||||
/* account for space used by new iovec headers */
|
||||
iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
|
||||
len += iovhdr_res;
|
||||
ctx->nvecs += diff_iovecs;
|
||||
|
||||
/* attach the transaction to the CIL if it has any busy extents */
|
||||
if (!list_empty(&tp->t_busy))
|
||||
list_splice_init(&tp->t_busy, &ctx->busy_extents);
|
||||
@ -822,7 +815,8 @@ restart:
|
||||
static int
|
||||
xlog_cil_write_chain(
|
||||
struct xfs_cil_ctx *ctx,
|
||||
struct xfs_log_vec *chain)
|
||||
struct xfs_log_vec *chain,
|
||||
uint32_t chain_len)
|
||||
{
|
||||
struct xlog *log = ctx->cil->xc_log;
|
||||
int error;
|
||||
@ -830,7 +824,7 @@ xlog_cil_write_chain(
|
||||
error = xlog_cil_order_write(ctx->cil, ctx->sequence, _START_RECORD);
|
||||
if (error)
|
||||
return error;
|
||||
return xlog_write(log, ctx, chain, ctx->ticket, XLOG_START_TRANS);
|
||||
return xlog_write(log, ctx, chain, ctx->ticket, chain_len);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -844,9 +838,14 @@ xlog_cil_write_commit_record(
|
||||
struct xfs_cil_ctx *ctx)
|
||||
{
|
||||
struct xlog *log = ctx->cil->xc_log;
|
||||
struct xlog_op_header ophdr = {
|
||||
.oh_clientid = XFS_TRANSACTION,
|
||||
.oh_tid = cpu_to_be32(ctx->ticket->t_tid),
|
||||
.oh_flags = XLOG_COMMIT_TRANS,
|
||||
};
|
||||
struct xfs_log_iovec reg = {
|
||||
.i_addr = NULL,
|
||||
.i_len = 0,
|
||||
.i_addr = &ophdr,
|
||||
.i_len = sizeof(struct xlog_op_header),
|
||||
.i_type = XLOG_REG_TYPE_COMMIT,
|
||||
};
|
||||
struct xfs_log_vec vec = {
|
||||
@ -862,12 +861,79 @@ xlog_cil_write_commit_record(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS);
|
||||
/* account for space used by record data */
|
||||
ctx->ticket->t_curr_res -= reg.i_len;
|
||||
error = xlog_write(log, ctx, &vec, ctx->ticket, reg.i_len);
|
||||
if (error)
|
||||
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
|
||||
return error;
|
||||
}
|
||||
|
||||
struct xlog_cil_trans_hdr {
|
||||
struct xlog_op_header oph[2];
|
||||
struct xfs_trans_header thdr;
|
||||
struct xfs_log_iovec lhdr[2];
|
||||
};
|
||||
|
||||
/*
|
||||
* Build a checkpoint transaction header to begin the journal transaction. We
|
||||
* need to account for the space used by the transaction header here as it is
|
||||
* not accounted for in xlog_write().
|
||||
*
|
||||
* This is the only place we write a transaction header, so we also build the
|
||||
* log opheaders that indicate the start of a log transaction and wrap the
|
||||
* transaction header. We keep the start record in it's own log vector rather
|
||||
* than compacting them into a single region as this ends up making the logic
|
||||
* in xlog_write() for handling empty opheaders for start, commit and unmount
|
||||
* records much simpler.
|
||||
*/
|
||||
static void
|
||||
xlog_cil_build_trans_hdr(
|
||||
struct xfs_cil_ctx *ctx,
|
||||
struct xlog_cil_trans_hdr *hdr,
|
||||
struct xfs_log_vec *lvhdr,
|
||||
int num_iovecs)
|
||||
{
|
||||
struct xlog_ticket *tic = ctx->ticket;
|
||||
__be32 tid = cpu_to_be32(tic->t_tid);
|
||||
|
||||
memset(hdr, 0, sizeof(*hdr));
|
||||
|
||||
/* Log start record */
|
||||
hdr->oph[0].oh_tid = tid;
|
||||
hdr->oph[0].oh_clientid = XFS_TRANSACTION;
|
||||
hdr->oph[0].oh_flags = XLOG_START_TRANS;
|
||||
|
||||
/* log iovec region pointer */
|
||||
hdr->lhdr[0].i_addr = &hdr->oph[0];
|
||||
hdr->lhdr[0].i_len = sizeof(struct xlog_op_header);
|
||||
hdr->lhdr[0].i_type = XLOG_REG_TYPE_LRHEADER;
|
||||
|
||||
/* log opheader */
|
||||
hdr->oph[1].oh_tid = tid;
|
||||
hdr->oph[1].oh_clientid = XFS_TRANSACTION;
|
||||
hdr->oph[1].oh_len = cpu_to_be32(sizeof(struct xfs_trans_header));
|
||||
|
||||
/* transaction header in host byte order format */
|
||||
hdr->thdr.th_magic = XFS_TRANS_HEADER_MAGIC;
|
||||
hdr->thdr.th_type = XFS_TRANS_CHECKPOINT;
|
||||
hdr->thdr.th_tid = tic->t_tid;
|
||||
hdr->thdr.th_num_items = num_iovecs;
|
||||
|
||||
/* log iovec region pointer */
|
||||
hdr->lhdr[1].i_addr = &hdr->oph[1];
|
||||
hdr->lhdr[1].i_len = sizeof(struct xlog_op_header) +
|
||||
sizeof(struct xfs_trans_header);
|
||||
hdr->lhdr[1].i_type = XLOG_REG_TYPE_TRANSHDR;
|
||||
|
||||
lvhdr->lv_niovecs = 2;
|
||||
lvhdr->lv_iovecp = &hdr->lhdr[0];
|
||||
lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len;
|
||||
lvhdr->lv_next = ctx->lv_chain;
|
||||
|
||||
tic->t_curr_res -= lvhdr->lv_bytes;
|
||||
}
|
||||
|
||||
/*
|
||||
* Push the Committed Item List to the log.
|
||||
*
|
||||
@ -892,11 +958,10 @@ xlog_cil_push_work(
|
||||
struct xlog *log = cil->xc_log;
|
||||
struct xfs_log_vec *lv;
|
||||
struct xfs_cil_ctx *new_ctx;
|
||||
struct xlog_ticket *tic;
|
||||
int num_iovecs;
|
||||
int num_iovecs = 0;
|
||||
int num_bytes = 0;
|
||||
int error = 0;
|
||||
struct xfs_trans_header thdr;
|
||||
struct xfs_log_iovec lhdr;
|
||||
struct xlog_cil_trans_hdr thdr;
|
||||
struct xfs_log_vec lvhdr = { NULL };
|
||||
xfs_csn_t push_seq;
|
||||
bool push_commit_stable;
|
||||
@ -975,7 +1040,6 @@ xlog_cil_push_work(
|
||||
* by the flush lock.
|
||||
*/
|
||||
lv = NULL;
|
||||
num_iovecs = 0;
|
||||
while (!list_empty(&cil->xc_cil)) {
|
||||
struct xfs_log_item *item;
|
||||
|
||||
@ -989,6 +1053,10 @@ xlog_cil_push_work(
|
||||
lv = item->li_lv;
|
||||
item->li_lv = NULL;
|
||||
num_iovecs += lv->lv_niovecs;
|
||||
|
||||
/* we don't write ordered log vectors */
|
||||
if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
|
||||
num_bytes += lv->lv_bytes;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1025,26 +1093,11 @@ xlog_cil_push_work(
|
||||
* Build a checkpoint transaction header and write it to the log to
|
||||
* begin the transaction. We need to account for the space used by the
|
||||
* transaction header here as it is not accounted for in xlog_write().
|
||||
*
|
||||
* The LSN we need to pass to the log items on transaction commit is
|
||||
* the LSN reported by the first log vector write. If we use the commit
|
||||
* record lsn then we can move the tail beyond the grant write head.
|
||||
*/
|
||||
tic = ctx->ticket;
|
||||
thdr.th_magic = XFS_TRANS_HEADER_MAGIC;
|
||||
thdr.th_type = XFS_TRANS_CHECKPOINT;
|
||||
thdr.th_tid = tic->t_tid;
|
||||
thdr.th_num_items = num_iovecs;
|
||||
lhdr.i_addr = &thdr;
|
||||
lhdr.i_len = sizeof(xfs_trans_header_t);
|
||||
lhdr.i_type = XLOG_REG_TYPE_TRANSHDR;
|
||||
tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t);
|
||||
xlog_cil_build_trans_hdr(ctx, &thdr, &lvhdr, num_iovecs);
|
||||
num_bytes += lvhdr.lv_bytes;
|
||||
|
||||
lvhdr.lv_niovecs = 1;
|
||||
lvhdr.lv_iovecp = &lhdr;
|
||||
lvhdr.lv_next = ctx->lv_chain;
|
||||
|
||||
error = xlog_cil_write_chain(ctx, &lvhdr);
|
||||
error = xlog_cil_write_chain(ctx, &lvhdr, num_bytes);
|
||||
if (error)
|
||||
goto out_abort_free_ticket;
|
||||
|
||||
@ -1052,7 +1105,7 @@ xlog_cil_push_work(
|
||||
if (error)
|
||||
goto out_abort_free_ticket;
|
||||
|
||||
xfs_log_ticket_ungrant(log, tic);
|
||||
xfs_log_ticket_ungrant(log, ctx->ticket);
|
||||
|
||||
/*
|
||||
* If the checkpoint spans multiple iclogs, wait for all previous iclogs
|
||||
@ -1116,7 +1169,7 @@ out_skip:
|
||||
return;
|
||||
|
||||
out_abort_free_ticket:
|
||||
xfs_log_ticket_ungrant(log, tic);
|
||||
xfs_log_ticket_ungrant(log, ctx->ticket);
|
||||
ASSERT(xlog_is_shutdown(log));
|
||||
if (!ctx->commit_iclog) {
|
||||
xlog_cil_committed(ctx);
|
||||
|
@ -142,19 +142,6 @@ enum xlog_iclog_state {
|
||||
|
||||
#define XLOG_COVER_OPS 5
|
||||
|
||||
/* Ticket reservation region accounting */
|
||||
#define XLOG_TIC_LEN_MAX 15
|
||||
|
||||
/*
|
||||
* Reservation region
|
||||
* As would be stored in xfs_log_iovec but without the i_addr which
|
||||
* we don't care about.
|
||||
*/
|
||||
typedef struct xlog_res {
|
||||
uint r_len; /* region length :4 */
|
||||
uint r_type; /* region's transaction type :4 */
|
||||
} xlog_res_t;
|
||||
|
||||
typedef struct xlog_ticket {
|
||||
struct list_head t_queue; /* reserve/write queue */
|
||||
struct task_struct *t_task; /* task that owns this ticket */
|
||||
@ -164,15 +151,7 @@ typedef struct xlog_ticket {
|
||||
int t_unit_res; /* unit reservation in bytes : 4 */
|
||||
char t_ocnt; /* original count : 1 */
|
||||
char t_cnt; /* current count : 1 */
|
||||
char t_clientid; /* who does this belong to; : 1 */
|
||||
uint8_t t_flags; /* properties of reservation : 1 */
|
||||
|
||||
/* reservation array fields */
|
||||
uint t_res_num; /* num in array : 4 */
|
||||
uint t_res_num_ophdrs; /* num op hdrs : 4 */
|
||||
uint t_res_arr_sum; /* array sum : 4 */
|
||||
uint t_res_o_flow; /* sum overflow : 4 */
|
||||
xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : 8 * 15 */
|
||||
} xlog_ticket_t;
|
||||
|
||||
/*
|
||||
@ -211,7 +190,7 @@ typedef struct xlog_in_core {
|
||||
u32 ic_offset;
|
||||
enum xlog_iclog_state ic_state;
|
||||
unsigned int ic_flags;
|
||||
char *ic_datap; /* pointer to iclog data */
|
||||
void *ic_datap; /* pointer to iclog data */
|
||||
struct list_head ic_callbacks;
|
||||
|
||||
/* reference counts need their own cacheline */
|
||||
@ -242,7 +221,6 @@ struct xfs_cil_ctx {
|
||||
xfs_lsn_t commit_lsn; /* chkpt commit record lsn */
|
||||
struct xlog_in_core *commit_iclog;
|
||||
struct xlog_ticket *ticket; /* chkpt ticket */
|
||||
int nvecs; /* number of regions */
|
||||
int space_used; /* aggregate size of regions */
|
||||
struct list_head busy_extents; /* busy extents in chkpt */
|
||||
struct xfs_log_vec *lv_chain; /* logvecs being pushed */
|
||||
@ -441,10 +419,6 @@ struct xlog {
|
||||
|
||||
struct xfs_kobj l_kobj;
|
||||
|
||||
/* The following field are used for debugging; need to hold icloglock */
|
||||
#ifdef DEBUG
|
||||
void *l_iclog_bak[XLOG_MAX_ICLOGS];
|
||||
#endif
|
||||
/* log recovery lsn tracking (for buffer submission */
|
||||
xfs_lsn_t l_recovery_lsn;
|
||||
|
||||
@ -509,27 +483,14 @@ extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
|
||||
char *dp, int size);
|
||||
|
||||
extern struct kmem_cache *xfs_log_ticket_cache;
|
||||
struct xlog_ticket *
|
||||
xlog_ticket_alloc(
|
||||
struct xlog *log,
|
||||
int unit_bytes,
|
||||
int count,
|
||||
char client,
|
||||
bool permanent);
|
||||
|
||||
static inline void
|
||||
xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
|
||||
{
|
||||
*ptr += bytes;
|
||||
*len -= bytes;
|
||||
*off += bytes;
|
||||
}
|
||||
struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes,
|
||||
int count, bool permanent);
|
||||
|
||||
void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
|
||||
void xlog_print_trans(struct xfs_trans *);
|
||||
int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx,
|
||||
struct xfs_log_vec *log_vector, struct xlog_ticket *tic,
|
||||
uint optype);
|
||||
uint32_t len);
|
||||
void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
|
||||
void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
|
||||
|
||||
|
@ -194,11 +194,9 @@ xfs_trans_reserve(
|
||||
ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES);
|
||||
error = xfs_log_regrant(mp, tp->t_ticket);
|
||||
} else {
|
||||
error = xfs_log_reserve(mp,
|
||||
resp->tr_logres,
|
||||
error = xfs_log_reserve(mp, resp->tr_logres,
|
||||
resp->tr_logcount,
|
||||
&tp->t_ticket, XFS_TRANSACTION,
|
||||
permanent);
|
||||
&tp->t_ticket, permanent);
|
||||
}
|
||||
|
||||
if (error)
|
||||
|
Loading…
x
Reference in New Issue
Block a user