Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
463260d7
Commit
463260d7
authored
Apr 21, 2022
by
Dave Chinner
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'guilt/xlog-write-rework' into xfs-5.19-for-next
parents
898a768f
593e3439
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
454 additions
and
588 deletions
+454
-588
fs/xfs/libxfs/xfs_log_format.h
fs/xfs/libxfs/xfs_log_format.h
+0
-1
fs/xfs/xfs_log.c
fs/xfs/xfs_log.c
+323
-441
fs/xfs/xfs_log.h
fs/xfs/xfs_log.h
+14
-41
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_log_cil.c
+111
-58
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_log_priv.h
+4
-43
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.c
+2
-4
No files found.
fs/xfs/libxfs/xfs_log_format.h
View file @
463260d7
...
...
@@ -69,7 +69,6 @@ static inline uint xlog_get_cycle(char *ptr)
/* Log Clients */
#define XFS_TRANSACTION 0x69
#define XFS_VOLUME 0x2
#define XFS_LOG 0xaa
#define XLOG_UNMOUNT_TYPE 0x556e
/* Un for Unmount */
...
...
fs/xfs/xfs_log.c
View file @
463260d7
...
...
@@ -49,7 +49,6 @@ xlog_state_get_iclog_space(
int
len
,
struct
xlog_in_core
**
iclog
,
struct
xlog_ticket
*
ticket
,
int
*
continued_write
,
int
*
logoffsetp
);
STATIC
void
xlog_grant_push_ail
(
...
...
@@ -61,10 +60,6 @@ xlog_sync(
struct
xlog_in_core
*
iclog
);
#if defined(DEBUG)
STATIC
void
xlog_verify_dest_ptr
(
struct
xlog
*
log
,
void
*
ptr
);
STATIC
void
xlog_verify_grant_tail
(
struct
xlog
*
log
);
STATIC
void
...
...
@@ -77,7 +72,6 @@ xlog_verify_tail_lsn(
struct
xlog
*
log
,
struct
xlog_in_core
*
iclog
);
#else
#define xlog_verify_dest_ptr(a,b)
#define xlog_verify_grant_tail(a)
#define xlog_verify_iclog(a,b,c)
#define xlog_verify_tail_lsn(a,b)
...
...
@@ -90,6 +84,62 @@ xlog_iclogs_empty(
static
int
xfs_log_cover
(
struct
xfs_mount
*
);
/*
* We need to make sure the buffer pointer returned is naturally aligned for the
* biggest basic data type we put into it. We have already accounted for this
* padding when sizing the buffer.
*
* However, this padding does not get written into the log, and hence we have to
* track the space used by the log vectors separately to prevent log space hangs
* due to inaccurate accounting (i.e. a leak) of the used log space through the
* CIL context ticket.
*
* We also add space for the xlog_op_header that describes this region in the
* log. This prepends the data region we return to the caller to copy their data
* into, so do all the static initialisation of the ophdr now. Because the ophdr
* is not 8 byte aligned, we have to be careful to ensure that we align the
* start of the buffer such that the region we return to the call is 8 byte
* aligned and packed against the tail of the ophdr.
*/
void
*
xlog_prepare_iovec
(
struct
xfs_log_vec
*
lv
,
struct
xfs_log_iovec
**
vecp
,
uint
type
)
{
struct
xfs_log_iovec
*
vec
=
*
vecp
;
struct
xlog_op_header
*
oph
;
uint32_t
len
;
void
*
buf
;
if
(
vec
)
{
ASSERT
(
vec
-
lv
->
lv_iovecp
<
lv
->
lv_niovecs
);
vec
++
;
}
else
{
vec
=
&
lv
->
lv_iovecp
[
0
];
}
len
=
lv
->
lv_buf_len
+
sizeof
(
struct
xlog_op_header
);
if
(
!
IS_ALIGNED
(
len
,
sizeof
(
uint64_t
)))
{
lv
->
lv_buf_len
=
round_up
(
len
,
sizeof
(
uint64_t
))
-
sizeof
(
struct
xlog_op_header
);
}
vec
->
i_type
=
type
;
vec
->
i_addr
=
lv
->
lv_buf
+
lv
->
lv_buf_len
;
oph
=
vec
->
i_addr
;
oph
->
oh_clientid
=
XFS_TRANSACTION
;
oph
->
oh_res2
=
0
;
oph
->
oh_flags
=
0
;
buf
=
vec
->
i_addr
+
sizeof
(
struct
xlog_op_header
);
ASSERT
(
IS_ALIGNED
((
unsigned
long
)
buf
,
sizeof
(
uint64_t
)));
*
vecp
=
vec
;
return
buf
;
}
static
void
xlog_grant_sub_space
(
struct
xlog
*
log
,
...
...
@@ -322,30 +372,6 @@ xlog_grant_head_check(
return
error
;
}
static
void
xlog_tic_reset_res
(
xlog_ticket_t
*
tic
)
{
tic
->
t_res_num
=
0
;
tic
->
t_res_arr_sum
=
0
;
tic
->
t_res_num_ophdrs
=
0
;
}
static
void
xlog_tic_add_region
(
xlog_ticket_t
*
tic
,
uint
len
,
uint
type
)
{
if
(
tic
->
t_res_num
==
XLOG_TIC_LEN_MAX
)
{
/* add to overflow and start again */
tic
->
t_res_o_flow
+=
tic
->
t_res_arr_sum
;
tic
->
t_res_num
=
0
;
tic
->
t_res_arr_sum
=
0
;
}
tic
->
t_res_arr
[
tic
->
t_res_num
].
r_len
=
len
;
tic
->
t_res_arr
[
tic
->
t_res_num
].
r_type
=
type
;
tic
->
t_res_arr_sum
+=
len
;
tic
->
t_res_num
++
;
}
bool
xfs_log_writable
(
struct
xfs_mount
*
mp
)
...
...
@@ -395,8 +421,6 @@ xfs_log_regrant(
xlog_grant_push_ail
(
log
,
tic
->
t_unit_res
);
tic
->
t_curr_res
=
tic
->
t_unit_res
;
xlog_tic_reset_res
(
tic
);
if
(
tic
->
t_cnt
>
0
)
return
0
;
...
...
@@ -437,7 +461,6 @@ xfs_log_reserve(
int
unit_bytes
,
int
cnt
,
struct
xlog_ticket
**
ticp
,
uint8_t
client
,
bool
permanent
)
{
struct
xlog
*
log
=
mp
->
m_log
;
...
...
@@ -445,15 +468,13 @@ xfs_log_reserve(
int
need_bytes
;
int
error
=
0
;
ASSERT
(
client
==
XFS_TRANSACTION
||
client
==
XFS_LOG
);
if
(
xlog_is_shutdown
(
log
))
return
-
EIO
;
XFS_STATS_INC
(
mp
,
xs_try_logspace
);
ASSERT
(
*
ticp
==
NULL
);
tic
=
xlog_ticket_alloc
(
log
,
unit_bytes
,
cnt
,
client
,
permanent
);
tic
=
xlog_ticket_alloc
(
log
,
unit_bytes
,
cnt
,
permanent
);
*
ticp
=
tic
;
xlog_grant_push_ail
(
log
,
tic
->
t_cnt
?
tic
->
t_unit_res
*
tic
->
t_cnt
...
...
@@ -901,12 +922,22 @@ xlog_write_unmount_record(
struct
xlog
*
log
,
struct
xlog_ticket
*
ticket
)
{
struct
xfs_unmount_log_format
ulf
=
{
struct
{
struct
xlog_op_header
ophdr
;
struct
xfs_unmount_log_format
ulf
;
}
unmount_rec
=
{
.
ophdr
=
{
.
oh_clientid
=
XFS_LOG
,
.
oh_tid
=
cpu_to_be32
(
ticket
->
t_tid
),
.
oh_flags
=
XLOG_UNMOUNT_TRANS
,
},
.
ulf
=
{
.
magic
=
XLOG_UNMOUNT_TYPE
,
},
};
struct
xfs_log_iovec
reg
=
{
.
i_addr
=
&
u
lf
,
.
i_len
=
sizeof
(
u
lf
),
.
i_addr
=
&
u
nmount_rec
,
.
i_len
=
sizeof
(
u
nmount_rec
),
.
i_type
=
XLOG_REG_TYPE_UNMOUNT
,
};
struct
xfs_log_vec
vec
=
{
...
...
@@ -914,10 +945,14 @@ xlog_write_unmount_record(
.
lv_iovecp
=
&
reg
,
};
BUILD_BUG_ON
((
sizeof
(
struct
xlog_op_header
)
+
sizeof
(
struct
xfs_unmount_log_format
))
!=
sizeof
(
unmount_rec
));
/* account for space used by record data */
ticket
->
t_curr_res
-=
sizeof
(
u
lf
);
ticket
->
t_curr_res
-=
sizeof
(
u
nmount_rec
);
return
xlog_write
(
log
,
NULL
,
&
vec
,
ticket
,
XLOG_UNMOUNT_TRANS
);
return
xlog_write
(
log
,
NULL
,
&
vec
,
ticket
,
reg
.
i_len
);
}
/*
...
...
@@ -933,7 +968,7 @@ xlog_unmount_write(
struct
xlog_ticket
*
tic
=
NULL
;
int
error
;
error
=
xfs_log_reserve
(
mp
,
600
,
1
,
&
tic
,
XFS_LOG
,
0
);
error
=
xfs_log_reserve
(
mp
,
600
,
1
,
&
tic
,
0
);
if
(
error
)
goto
out_err
;
...
...
@@ -1584,9 +1619,6 @@ xlog_alloc_log(
GFP_KERNEL
|
__GFP_RETRY_MAYFAIL
);
if
(
!
iclog
->
ic_data
)
goto
out_free_iclog
;
#ifdef DEBUG
log
->
l_iclog_bak
[
i
]
=
&
iclog
->
ic_header
;
#endif
head
=
&
iclog
->
ic_header
;
memset
(
head
,
0
,
sizeof
(
xlog_rec_header_t
));
head
->
h_magicno
=
cpu_to_be32
(
XLOG_HEADER_MAGIC_NUM
);
...
...
@@ -1602,7 +1634,7 @@ xlog_alloc_log(
iclog
->
ic_log
=
log
;
atomic_set
(
&
iclog
->
ic_refcnt
,
0
);
INIT_LIST_HEAD
(
&
iclog
->
ic_callbacks
);
iclog
->
ic_datap
=
(
char
*
)
iclog
->
ic_data
+
log
->
l_iclog_hsize
;
iclog
->
ic_datap
=
(
void
*
)
iclog
->
ic_data
+
log
->
l_iclog_hsize
;
init_waitqueue_head
(
&
iclog
->
ic_force_wait
);
init_waitqueue_head
(
&
iclog
->
ic_write_wait
);
...
...
@@ -2111,63 +2143,11 @@ xlog_print_tic_res(
struct
xfs_mount
*
mp
,
struct
xlog_ticket
*
ticket
)
{
uint
i
;
uint
ophdr_spc
=
ticket
->
t_res_num_ophdrs
*
(
uint
)
sizeof
(
xlog_op_header_t
);
/* match with XLOG_REG_TYPE_* in xfs_log.h */
#define REG_TYPE_STR(type, str) [XLOG_REG_TYPE_##type] = str
static
char
*
res_type_str
[]
=
{
REG_TYPE_STR
(
BFORMAT
,
"bformat"
),
REG_TYPE_STR
(
BCHUNK
,
"bchunk"
),
REG_TYPE_STR
(
EFI_FORMAT
,
"efi_format"
),
REG_TYPE_STR
(
EFD_FORMAT
,
"efd_format"
),
REG_TYPE_STR
(
IFORMAT
,
"iformat"
),
REG_TYPE_STR
(
ICORE
,
"icore"
),
REG_TYPE_STR
(
IEXT
,
"iext"
),
REG_TYPE_STR
(
IBROOT
,
"ibroot"
),
REG_TYPE_STR
(
ILOCAL
,
"ilocal"
),
REG_TYPE_STR
(
IATTR_EXT
,
"iattr_ext"
),
REG_TYPE_STR
(
IATTR_BROOT
,
"iattr_broot"
),
REG_TYPE_STR
(
IATTR_LOCAL
,
"iattr_local"
),
REG_TYPE_STR
(
QFORMAT
,
"qformat"
),
REG_TYPE_STR
(
DQUOT
,
"dquot"
),
REG_TYPE_STR
(
QUOTAOFF
,
"quotaoff"
),
REG_TYPE_STR
(
LRHEADER
,
"LR header"
),
REG_TYPE_STR
(
UNMOUNT
,
"unmount"
),
REG_TYPE_STR
(
COMMIT
,
"commit"
),
REG_TYPE_STR
(
TRANSHDR
,
"trans header"
),
REG_TYPE_STR
(
ICREATE
,
"inode create"
),
REG_TYPE_STR
(
RUI_FORMAT
,
"rui_format"
),
REG_TYPE_STR
(
RUD_FORMAT
,
"rud_format"
),
REG_TYPE_STR
(
CUI_FORMAT
,
"cui_format"
),
REG_TYPE_STR
(
CUD_FORMAT
,
"cud_format"
),
REG_TYPE_STR
(
BUI_FORMAT
,
"bui_format"
),
REG_TYPE_STR
(
BUD_FORMAT
,
"bud_format"
),
};
BUILD_BUG_ON
(
ARRAY_SIZE
(
res_type_str
)
!=
XLOG_REG_TYPE_MAX
+
1
);
#undef REG_TYPE_STR
xfs_warn
(
mp
,
"ticket reservation summary:"
);
xfs_warn
(
mp
,
" unit res = %d bytes"
,
ticket
->
t_unit_res
);
xfs_warn
(
mp
,
" current res = %d bytes"
,
ticket
->
t_curr_res
);
xfs_warn
(
mp
,
" total reg = %u bytes (o/flow = %u bytes)"
,
ticket
->
t_res_arr_sum
,
ticket
->
t_res_o_flow
);
xfs_warn
(
mp
,
" ophdrs = %u (ophdr space = %u bytes)"
,
ticket
->
t_res_num_ophdrs
,
ophdr_spc
);
xfs_warn
(
mp
,
" ophdr + reg = %u bytes"
,
ticket
->
t_res_arr_sum
+
ticket
->
t_res_o_flow
+
ophdr_spc
);
xfs_warn
(
mp
,
" num regions = %u"
,
ticket
->
t_res_num
);
for
(
i
=
0
;
i
<
ticket
->
t_res_num
;
i
++
)
{
uint
r_type
=
ticket
->
t_res_arr
[
i
].
r_type
;
xfs_warn
(
mp
,
"region[%u]: %s - %u bytes"
,
i
,
((
r_type
<=
0
||
r_type
>
XLOG_REG_TYPE_MAX
)
?
"bad-rtype"
:
res_type_str
[
r_type
]),
ticket
->
t_res_arr
[
i
].
r_len
);
}
xfs_warn
(
mp
,
" unit res = %d bytes"
,
ticket
->
t_unit_res
);
xfs_warn
(
mp
,
" current res = %d bytes"
,
ticket
->
t_curr_res
);
xfs_warn
(
mp
,
" original count = %d"
,
ticket
->
t_ocnt
);
xfs_warn
(
mp
,
" remaining count = %d"
,
ticket
->
t_cnt
);
}
/*
...
...
@@ -2220,187 +2200,226 @@ xlog_print_trans(
}
}
static
inline
void
xlog_write_iovec
(
struct
xlog_in_core
*
iclog
,
uint32_t
*
log_offset
,
void
*
data
,
uint32_t
write_len
,
int
*
bytes_left
,
uint32_t
*
record_cnt
,
uint32_t
*
data_cnt
)
{
ASSERT
(
*
log_offset
<
iclog
->
ic_log
->
l_iclog_size
);
ASSERT
(
*
log_offset
%
sizeof
(
int32_t
)
==
0
);
ASSERT
(
write_len
%
sizeof
(
int32_t
)
==
0
);
memcpy
(
iclog
->
ic_datap
+
*
log_offset
,
data
,
write_len
);
*
log_offset
+=
write_len
;
*
bytes_left
-=
write_len
;
(
*
record_cnt
)
++
;
*
data_cnt
+=
write_len
;
}
/*
* Calculate the potential space needed by the log vector. We may need a start
* record, and each region gets its own struct xlog_op_header and may need to be
* double word aligned.
* Write log vectors into a single iclog which is guaranteed by the caller
* to have enough space to write the entire log vector into.
*/
static
int
xlog_write_calc_vec_length
(
static
void
xlog_write_full
(
struct
xfs_log_vec
*
lv
,
struct
xlog_ticket
*
ticket
,
struct
xfs_log_vec
*
log_vector
,
uint
optype
)
struct
xlog_in_core
*
iclog
,
uint32_t
*
log_offset
,
uint32_t
*
len
,
uint32_t
*
record_cnt
,
uint32_t
*
data_cnt
)
{
struct
xfs_log_vec
*
lv
;
int
headers
=
0
;
int
len
=
0
;
int
i
;
int
index
;
if
(
optype
&
XLOG_START_TRANS
)
headers
++
;
ASSERT
(
*
log_offset
+
*
len
<=
iclog
->
ic_size
||
iclog
->
ic_state
==
XLOG_STATE_WANT_SYNC
)
;
for
(
lv
=
log_vector
;
lv
;
lv
=
lv
->
lv_next
)
{
/* we don't write ordered log vectors */
if
(
lv
->
lv_buf_len
==
XFS_LOG_VEC_ORDERED
)
continue
;
headers
+=
lv
->
lv_niovecs
;
for
(
i
=
0
;
i
<
lv
->
lv_niovecs
;
i
++
)
{
struct
xfs_log_iovec
*
vecp
=
&
lv
->
lv_iovecp
[
i
];
len
+=
vecp
->
i_len
;
xlog_tic_add_region
(
ticket
,
vecp
->
i_len
,
vecp
->
i_type
);
}
}
ticket
->
t_res_num_ophdrs
+=
headers
;
len
+=
headers
*
sizeof
(
struct
xlog_op_header
);
return
len
;
}
/*
* Ordered log vectors have no regions to write so this
* loop will naturally skip them.
*/
for
(
index
=
0
;
index
<
lv
->
lv_niovecs
;
index
++
)
{
struct
xfs_log_iovec
*
reg
=
&
lv
->
lv_iovecp
[
index
];
struct
xlog_op_header
*
ophdr
=
reg
->
i_addr
;
static
void
xlog_write_start_rec
(
struct
xlog_op_header
*
ophdr
,
struct
xlog_ticket
*
ticket
)
{
ophdr
->
oh_tid
=
cpu_to_be32
(
ticket
->
t_tid
);
ophdr
->
oh_clientid
=
ticket
->
t_clientid
;
ophdr
->
oh_len
=
0
;
ophdr
->
oh_flags
=
XLOG_START_TRANS
;
ophdr
->
oh_res2
=
0
;
xlog_write_iovec
(
iclog
,
log_offset
,
reg
->
i_addr
,
reg
->
i_len
,
len
,
record_cnt
,
data_cnt
);
}
}
static
xlog_op_header_t
*
xlog_write_setup_ophdr
(
struct
xlog
*
log
,
struct
xlog_op_header
*
ophdr
,
static
int
xlog_write_get_more_iclog_space
(
struct
xlog_ticket
*
ticket
,
uint
flags
)
struct
xlog_in_core
**
iclogp
,
uint32_t
*
log_offset
,
uint32_t
len
,
uint32_t
*
record_cnt
,
uint32_t
*
data_cnt
)
{
ophdr
->
oh_tid
=
cpu_to_be32
(
ticket
->
t_tid
);
ophdr
->
oh_clientid
=
ticket
->
t_clientid
;
ophdr
->
oh_res2
=
0
;
/* are we copying a commit or unmount record? */
ophdr
->
oh_flags
=
flags
;
struct
xlog_in_core
*
iclog
=
*
iclogp
;
struct
xlog
*
log
=
iclog
->
ic_log
;
int
error
;
/*
* We've seen logs corrupted with bad transaction client ids. This
* makes sure that XFS doesn't generate them on. Turn this into an EIO
* and shut down the filesystem.
*/
switch
(
ophdr
->
oh_clientid
)
{
case
XFS_TRANSACTION
:
case
XFS_VOLUME
:
case
XFS_LOG
:
break
;
default:
xfs_warn
(
log
->
l_mp
,
"Bad XFS transaction clientid 0x%x in ticket "
PTR_FMT
,
ophdr
->
oh_clientid
,
ticket
);
return
NULL
;
}
spin_lock
(
&
log
->
l_icloglock
);
ASSERT
(
iclog
->
ic_state
==
XLOG_STATE_WANT_SYNC
);
xlog_state_finish_copy
(
log
,
iclog
,
*
record_cnt
,
*
data_cnt
);
error
=
xlog_state_release_iclog
(
log
,
iclog
);
spin_unlock
(
&
log
->
l_icloglock
);
if
(
error
)
return
error
;
return
ophdr
;
error
=
xlog_state_get_iclog_space
(
log
,
len
,
&
iclog
,
ticket
,
log_offset
);
if
(
error
)
return
error
;
*
record_cnt
=
0
;
*
data_cnt
=
0
;
*
iclogp
=
iclog
;
return
0
;
}
/*
*
Set up the parameters of the region copy into the log. This has
*
to handle region write split across multiple log buffers - this
*
state is kept external to this function so that this code can
*
be written in an obvious, self documenting manner
.
*
Write log vectors into a single iclog which is smaller than the current chain
*
length. We write until we cannot fit a full record into the remaining space
*
and then stop. We return the log vector that is to be written that cannot
*
wholly fit in the iclog
.
*/
static
int
xlog_write_setup_copy
(
xlog_write_partial
(
struct
xfs_log_vec
*
lv
,
struct
xlog_ticket
*
ticket
,
struct
xlog_op_header
*
ophdr
,
int
space_available
,
int
space_required
,
int
*
copy_off
,
int
*
copy_len
,
int
*
last_was_partial_copy
,
int
*
bytes_consumed
)
{
int
still_to_copy
;
still_to_copy
=
space_required
-
*
bytes_consumed
;
*
copy_off
=
*
bytes_consumed
;
if
(
still_to_copy
<=
space_available
)
{
/* write of region completes here */
*
copy_len
=
still_to_copy
;
ophdr
->
oh_len
=
cpu_to_be32
(
*
copy_len
);
if
(
*
last_was_partial_copy
)
ophdr
->
oh_flags
|=
(
XLOG_END_TRANS
|
XLOG_WAS_CONT_TRANS
);
*
last_was_partial_copy
=
0
;
*
bytes_consumed
=
0
;
return
0
;
struct
xlog_in_core
**
iclogp
,
uint32_t
*
log_offset
,
uint32_t
*
len
,
uint32_t
*
record_cnt
,
uint32_t
*
data_cnt
)
{
struct
xlog_in_core
*
iclog
=
*
iclogp
;
struct
xlog_op_header
*
ophdr
;
int
index
=
0
;
uint32_t
rlen
;
int
error
;
/* walk the logvec, copying until we run out of space in the iclog */
for
(
index
=
0
;
index
<
lv
->
lv_niovecs
;
index
++
)
{
struct
xfs_log_iovec
*
reg
=
&
lv
->
lv_iovecp
[
index
];
uint32_t
reg_offset
=
0
;
/*
* The first region of a continuation must have a non-zero
* length otherwise log recovery will just skip over it and
* start recovering from the next opheader it finds. Because we
* mark the next opheader as a continuation, recovery will then
* incorrectly add the continuation to the previous region and
* that breaks stuff.
*
* Hence if there isn't space for region data after the
* opheader, then we need to start afresh with a new iclog.
*/
if
(
iclog
->
ic_size
-
*
log_offset
<=
sizeof
(
struct
xlog_op_header
))
{
error
=
xlog_write_get_more_iclog_space
(
ticket
,
&
iclog
,
log_offset
,
*
len
,
record_cnt
,
data_cnt
);
if
(
error
)
return
error
;
}
/* partial write of region, needs extra log op header reservation */
*
copy_len
=
space_available
;
ophdr
->
oh_len
=
cpu_to_be32
(
*
copy_len
);
ophdr
=
reg
->
i_addr
;
rlen
=
min_t
(
uint32_t
,
reg
->
i_len
,
iclog
->
ic_size
-
*
log_offset
);
ophdr
->
oh_tid
=
cpu_to_be32
(
ticket
->
t_tid
);
ophdr
->
oh_len
=
cpu_to_be32
(
rlen
-
sizeof
(
struct
xlog_op_header
));
if
(
rlen
!=
reg
->
i_len
)
ophdr
->
oh_flags
|=
XLOG_CONTINUE_TRANS
;
if
(
*
last_was_partial_copy
)
ophdr
->
oh_flags
|=
XLOG_WAS_CONT_TRANS
;
*
bytes_consumed
+=
*
copy_len
;
(
*
last_was_partial_copy
)
++
;
/* account for new log op header */
ticket
->
t_curr_res
-=
sizeof
(
struct
xlog_op_header
);
ticket
->
t_res_num_ophdrs
++
;
xlog_write_iovec
(
iclog
,
log_offset
,
reg
->
i_addr
,
rlen
,
len
,
record_cnt
,
data_cnt
);
return
sizeof
(
struct
xlog_op_header
);
}
/* If we wrote the whole region, move to the next. */
if
(
rlen
==
reg
->
i_len
)
continue
;
static
int
xlog_write_copy_finish
(
struct
xlog
*
log
,
struct
xlog_in_core
*
iclog
,
uint
flags
,
int
*
record_cnt
,
int
*
data_cnt
,
int
*
partial_copy
,
int
*
partial_copy_len
,
int
log_offset
)
{
int
error
;
/*
* We now have a partially written iovec, but it can span
* multiple iclogs so we loop here. First we release the iclog
* we currently have, then we get a new iclog and add a new
* opheader. Then we continue copying from where we were until
* we either complete the iovec or fill the iclog. If we
* complete the iovec, then we increment the index and go right
* back to the top of the outer loop. if we fill the iclog, we
* run the inner loop again.
*
* This is complicated by the tail of a region using all the
* space in an iclog and hence requiring us to release the iclog
* and get a new one before returning to the outer loop. We must
* always guarantee that we exit this inner loop with at least
* space for log transaction opheaders left in the current
* iclog, hence we cannot just terminate the loop at the end
* of the of the continuation. So we loop while there is no
* space left in the current iclog, and check for the end of the
* continuation after getting a new iclog.
*/
do
{
/*
* Ensure we include the continuation opheader in the
* space we need in the new iclog by adding that size
* to the length we require. This continuation opheader
* needs to be accounted to the ticket as the space it
* consumes hasn't been accounted to the lv we are
* writing.
*/
error
=
xlog_write_get_more_iclog_space
(
ticket
,
&
iclog
,
log_offset
,
*
len
+
sizeof
(
struct
xlog_op_header
),
record_cnt
,
data_cnt
);
if
(
error
)
return
error
;
ophdr
=
iclog
->
ic_datap
+
*
log_offset
;
ophdr
->
oh_tid
=
cpu_to_be32
(
ticket
->
t_tid
);
ophdr
->
oh_clientid
=
XFS_TRANSACTION
;
ophdr
->
oh_res2
=
0
;
ophdr
->
oh_flags
=
XLOG_WAS_CONT_TRANS
;
ticket
->
t_curr_res
-=
sizeof
(
struct
xlog_op_header
);
*
log_offset
+=
sizeof
(
struct
xlog_op_header
);
*
data_cnt
+=
sizeof
(
struct
xlog_op_header
);
if
(
*
partial_copy
)
{
/*
* This iclog has already been marked WANT_SYNC by
* xlog_state_get_iclog_space
.
* If rlen fits in the iclog, then end the region
* continuation. Otherwise we're going around again
.
*/
spin_lock
(
&
log
->
l_icloglock
)
;
xlog_state_finish_copy
(
log
,
iclog
,
*
record_cnt
,
*
data_cnt
)
;
*
record_cnt
=
0
;
*
data_cnt
=
0
;
goto
release_iclog
;
}
reg_offset
+=
rlen
;
rlen
=
reg
->
i_len
-
reg_offset
;
if
(
rlen
<=
iclog
->
ic_size
-
*
log_offset
)
ophdr
->
oh_flags
|=
XLOG_END_TRANS
;
else
ophdr
->
oh_flags
|=
XLOG_CONTINUE_TRANS
;
*
partial_copy
=
0
;
*
partial_copy_len
=
0
;
rlen
=
min_t
(
uint32_t
,
rlen
,
iclog
->
ic_size
-
*
log_offset
)
;
ophdr
->
oh_len
=
cpu_to_be32
(
rlen
)
;
if
(
iclog
->
ic_size
-
log_offset
>
sizeof
(
xlog_op_header_t
))
return
0
;
xlog_write_iovec
(
iclog
,
log_offset
,
reg
->
i_addr
+
reg_offset
,
rlen
,
len
,
record_cnt
,
data_cnt
);
/* no more space in this iclog - push it. */
spin_lock
(
&
log
->
l_icloglock
);
xlog_state_finish_copy
(
log
,
iclog
,
*
record_cnt
,
*
data_cnt
);
*
record_cnt
=
0
;
*
data_cnt
=
0
;
}
while
(
ophdr
->
oh_flags
&
XLOG_CONTINUE_TRANS
);
}
if
(
iclog
->
ic_state
==
XLOG_STATE_ACTIVE
)
xlog_state_switch_iclogs
(
log
,
iclog
,
0
);
else
ASSERT
(
iclog
->
ic_state
==
XLOG_STATE_WANT_SYNC
||
xlog_is_shutdown
(
log
));
release_iclog:
error
=
xlog_state_release_iclog
(
log
,
iclog
);
spin_unlock
(
&
log
->
l_icloglock
);
return
error
;
/*
* No more iovecs remain in this logvec so return the next log vec to
* the caller so it can go back to fast path copying.
*/
*
iclogp
=
iclog
;
return
0
;
}
/*
...
...
@@ -2449,27 +2468,16 @@ xlog_write(
struct
xfs_cil_ctx
*
ctx
,
struct
xfs_log_vec
*
log_vector
,
struct
xlog_ticket
*
ticket
,
uint
optype
)
uint32_t
len
)
{
struct
xlog_in_core
*
iclog
=
NULL
;
struct
xfs_log_vec
*
lv
=
log_vector
;
struct
xfs_log_iovec
*
vecp
=
lv
->
lv_iovecp
;
int
index
=
0
;
int
len
;
int
partial_copy
=
0
;
int
partial_copy_len
=
0
;
int
contwr
=
0
;
int
record_cnt
=
0
;
int
data_cnt
=
0
;
uint32_t
record_cnt
=
0
;
uint32_t
data_cnt
=
0
;
int
error
=
0
;
int
log_offset
;
/*
* If this is a commit or unmount transaction, we don't need a start
* record to be written. We do, however, have to account for the
* commit or unmount header that gets written. Hence we always have
* to account for an extra xlog_op_header here.
*/
ticket
->
t_curr_res
-=
sizeof
(
struct
xlog_op_header
);
if
(
ticket
->
t_curr_res
<
0
)
{
xfs_alert_tag
(
log
->
l_mp
,
XFS_PTAG_LOGRES
,
"ctx ticket reservation ran out. Need to up reservation"
);
...
...
@@ -2477,144 +2485,54 @@ xlog_write(
xlog_force_shutdown
(
log
,
SHUTDOWN_LOG_IO_ERROR
);
}
len
=
xlog_write_calc_vec_length
(
ticket
,
log_vector
,
optype
);
while
(
lv
&&
(
!
lv
->
lv_niovecs
||
index
<
lv
->
lv_niovecs
))
{
void
*
ptr
;
int
log_offset
;
error
=
xlog_state_get_iclog_space
(
log
,
len
,
&
iclog
,
ticket
,
&
contwr
,
&
log_offset
);
&
log_offset
);
if
(
error
)
return
error
;
ASSERT
(
log_offset
<=
iclog
->
ic_size
-
1
);
ptr
=
iclog
->
ic_datap
+
log_offset
;
/*
* If we have a context pointer, pass it the first iclog we are
* writing to so it can record state needed for iclog write
* ordering.
*/
if
(
ctx
)
{
if
(
ctx
)
xlog_cil_set_ctx_write_state
(
ctx
,
iclog
);
ctx
=
NULL
;
}
while
(
lv
)
{
/*
*
This loop writes out as many regions as can fit in the amount
*
of space which was allocated by xlog_state_get_iclog_space()
.
*
If the entire log vec does not fit in the iclog, punt it to
*
the partial copy loop which can handle this case
.
*/
while
(
lv
&&
(
!
lv
->
lv_niovecs
||
index
<
lv
->
lv_niovecs
))
{
struct
xfs_log_iovec
*
reg
;
struct
xlog_op_header
*
ophdr
;
int
copy_len
;
int
copy_off
;
bool
ordered
=
false
;
bool
wrote_start_rec
=
false
;
/* ordered log vectors have no regions to write */
if
(
lv
->
lv_buf_len
==
XFS_LOG_VEC_ORDERED
)
{
ASSERT
(
lv
->
lv_niovecs
==
0
);
ordered
=
true
;
goto
next_lv
;
}
reg
=
&
vecp
[
index
];
ASSERT
(
reg
->
i_len
%
sizeof
(
int32_t
)
==
0
);
ASSERT
((
unsigned
long
)
ptr
%
sizeof
(
int32_t
)
==
0
);
if
(
lv
->
lv_niovecs
&&
lv
->
lv_bytes
>
iclog
->
ic_size
-
log_offset
)
{
error
=
xlog_write_partial
(
lv
,
ticket
,
&
iclog
,
&
log_offset
,
&
len
,
&
record_cnt
,
&
data_cnt
);
if
(
error
)
{
/*
* Before we start formatting log vectors, we need to
* write a start record. Only do this for the first
* iclog we write to.
* We have no iclog to release, so just return
* the error immediately.
*/
if
(
optype
&
XLOG_START_TRANS
)
{
xlog_write_start_rec
(
ptr
,
ticket
);
xlog_write_adv_cnt
(
&
ptr
,
&
len
,
&
log_offset
,
sizeof
(
struct
xlog_op_header
));
optype
&=
~
XLOG_START_TRANS
;
wrote_start_rec
=
true
;
}
ophdr
=
xlog_write_setup_ophdr
(
log
,
ptr
,
ticket
,
optype
);
if
(
!
ophdr
)
return
-
EIO
;
xlog_write_adv_cnt
(
&
ptr
,
&
len
,
&
log_offset
,
sizeof
(
struct
xlog_op_header
));
len
+=
xlog_write_setup_copy
(
ticket
,
ophdr
,
iclog
->
ic_size
-
log_offset
,
reg
->
i_len
,
&
copy_off
,
&
copy_len
,
&
partial_copy
,
&
partial_copy_len
);
xlog_verify_dest_ptr
(
log
,
ptr
);
/*
* Copy region.
*
* Unmount records just log an opheader, so can have
* empty payloads with no data region to copy. Hence we
* only copy the payload if the vector says it has data
* to copy.
*/
ASSERT
(
copy_len
>=
0
);
if
(
copy_len
>
0
)
{
memcpy
(
ptr
,
reg
->
i_addr
+
copy_off
,
copy_len
);
xlog_write_adv_cnt
(
&
ptr
,
&
len
,
&
log_offset
,
copy_len
);
}
copy_len
+=
sizeof
(
struct
xlog_op_header
);
record_cnt
++
;
if
(
wrote_start_rec
)
{
copy_len
+=
sizeof
(
struct
xlog_op_header
);
record_cnt
++
;
}
data_cnt
+=
contwr
?
copy_len
:
0
;
error
=
xlog_write_copy_finish
(
log
,
iclog
,
optype
,
&
record_cnt
,
&
data_cnt
,
&
partial_copy
,
&
partial_copy_len
,
log_offset
);
if
(
error
)
return
error
;
/*
* if we had a partial copy, we need to get more iclog
* space but we don't want to increment the region
* index because there is still more is this region to
* write.
*
* If we completed writing this region, and we flushed
* the iclog (indicated by resetting of the record
* count), then we also need to get more log space. If
* this was the last record, though, we are done and
* can just return.
*/
if
(
partial_copy
)
break
;
if
(
++
index
==
lv
->
lv_niovecs
)
{
next_lv:
lv
=
lv
->
lv_next
;
index
=
0
;
if
(
lv
)
vecp
=
lv
->
lv_iovecp
;
}
if
(
record_cnt
==
0
&&
!
ordered
)
{
if
(
!
lv
)
return
0
;
break
;
}
}
else
{
xlog_write_full
(
lv
,
ticket
,
iclog
,
&
log_offset
,
&
len
,
&
record_cnt
,
&
data_cnt
);
}
lv
=
lv
->
lv_next
;
}
ASSERT
(
len
==
0
);
/*
* We've already been guaranteed that the last writes will fit inside
* the current iclog, and hence it will already have the space used by
* those writes accounted to it. Hence we do not need to update the
* iclog with the number of bytes written here.
*/
spin_lock
(
&
log
->
l_icloglock
);
xlog_state_finish_copy
(
log
,
iclog
,
record_cnt
,
data_cnt
);
xlog_state_finish_copy
(
log
,
iclog
,
record_cnt
,
0
);
error
=
xlog_state_release_iclog
(
log
,
iclog
);
spin_unlock
(
&
log
->
l_icloglock
);
...
...
@@ -2971,7 +2889,6 @@ xlog_state_get_iclog_space(
int
len
,
struct
xlog_in_core
**
iclogp
,
struct
xlog_ticket
*
ticket
,
int
*
continued_write
,
int
*
logoffsetp
)
{
int
log_offset
;
...
...
@@ -3008,9 +2925,6 @@ xlog_state_get_iclog_space(
*/
if
(
log_offset
==
0
)
{
ticket
->
t_curr_res
-=
log
->
l_iclog_hsize
;
xlog_tic_add_region
(
ticket
,
log
->
l_iclog_hsize
,
XLOG_REG_TYPE_LRHEADER
);
head
->
h_cycle
=
cpu_to_be32
(
log
->
l_curr_cycle
);
head
->
h_lsn
=
cpu_to_be64
(
xlog_assign_lsn
(
log
->
l_curr_cycle
,
log
->
l_curr_block
));
...
...
@@ -3052,13 +2966,10 @@ xlog_state_get_iclog_space(
* iclogs (to mark it taken), this particular iclog will release/sync
* to disk in xlog_write().
*/
if
(
len
<=
iclog
->
ic_size
-
iclog
->
ic_offset
)
{
*
continued_write
=
0
;
if
(
len
<=
iclog
->
ic_size
-
iclog
->
ic_offset
)
iclog
->
ic_offset
+=
len
;
}
else
{
*
continued_write
=
1
;
else
xlog_state_switch_iclogs
(
log
,
iclog
,
iclog
->
ic_size
);
}
*
iclogp
=
iclog
;
ASSERT
(
iclog
->
ic_offset
<=
iclog
->
ic_size
);
...
...
@@ -3090,7 +3001,6 @@ xfs_log_ticket_regrant(
xlog_grant_sub_space
(
log
,
&
log
->
l_write_head
.
grant
,
ticket
->
t_curr_res
);
ticket
->
t_curr_res
=
ticket
->
t_unit_res
;
xlog_tic_reset_res
(
ticket
);
trace_xfs_log_ticket_regrant_sub
(
log
,
ticket
);
...
...
@@ -3101,7 +3011,6 @@ xfs_log_ticket_regrant(
trace_xfs_log_ticket_regrant_exit
(
log
,
ticket
);
ticket
->
t_curr_res
=
ticket
->
t_unit_res
;
xlog_tic_reset_res
(
ticket
);
}
xfs_log_ticket_put
(
ticket
);
...
...
@@ -3591,7 +3500,6 @@ xlog_ticket_alloc(
struct
xlog
*
log
,
int
unit_bytes
,
int
cnt
,
char
client
,
bool
permanent
)
{
struct
xlog_ticket
*
tic
;
...
...
@@ -3609,39 +3517,13 @@ xlog_ticket_alloc(
tic
->
t_cnt
=
cnt
;
tic
->
t_ocnt
=
cnt
;
tic
->
t_tid
=
prandom_u32
();
tic
->
t_clientid
=
client
;
if
(
permanent
)
tic
->
t_flags
|=
XLOG_TIC_PERM_RESERV
;
xlog_tic_reset_res
(
tic
);
return
tic
;
}
#if defined(DEBUG)
/*
* Make sure that the destination ptr is within the valid data region of
* one of the iclogs. This uses backup pointers stored in a different
* part of the log in case we trash the log structure.
*/
STATIC
void
xlog_verify_dest_ptr
(
struct
xlog
*
log
,
void
*
ptr
)
{
int
i
;
int
good_ptr
=
0
;
for
(
i
=
0
;
i
<
log
->
l_iclog_bufs
;
i
++
)
{
if
(
ptr
>=
log
->
l_iclog_bak
[
i
]
&&
ptr
<=
log
->
l_iclog_bak
[
i
]
+
log
->
l_iclog_size
)
good_ptr
++
;
}
if
(
!
good_ptr
)
xfs_emerg
(
log
->
l_mp
,
"%s: invalid ptr"
,
__func__
);
}
/*
* Check to make sure the grant write head didn't just over lap the tail. If
* the cycles are the same, we can't be overlapping. Otherwise, make sure that
...
...
@@ -3769,7 +3651,7 @@ xlog_verify_iclog(
if
(
field_offset
&
0x1ff
)
{
clientid
=
ophead
->
oh_clientid
;
}
else
{
idx
=
BTOBBT
((
char
*
)
&
ophead
->
oh_clientid
-
iclog
->
ic_datap
);
idx
=
BTOBBT
((
void
*
)
&
ophead
->
oh_clientid
-
iclog
->
ic_datap
);
if
(
idx
>=
(
XLOG_HEADER_CYCLE_SIZE
/
BBSIZE
))
{
j
=
idx
/
(
XLOG_HEADER_CYCLE_SIZE
/
BBSIZE
);
k
=
idx
%
(
XLOG_HEADER_CYCLE_SIZE
/
BBSIZE
);
...
...
@@ -3780,11 +3662,12 @@ xlog_verify_iclog(
iclog
->
ic_header
.
h_cycle_data
[
idx
]);
}
}
if
(
clientid
!=
XFS_TRANSACTION
&&
clientid
!=
XFS_LOG
)
if
(
clientid
!=
XFS_TRANSACTION
&&
clientid
!=
XFS_LOG
)
{
xfs_warn
(
log
->
l_mp
,
"%s: invalid clientid %d op "
PTR_FMT
" offset 0x%lx"
,
__func__
,
clientid
,
ophead
,
"%s:
op %d
invalid clientid %d op "
PTR_FMT
" offset 0x%lx"
,
__func__
,
i
,
clientid
,
ophead
,
(
unsigned
long
)
field_offset
);
}
/* check length */
p
=
&
ophead
->
oh_len
;
...
...
@@ -3792,8 +3675,7 @@ xlog_verify_iclog(
if
(
field_offset
&
0x1ff
)
{
op_len
=
be32_to_cpu
(
ophead
->
oh_len
);
}
else
{
idx
=
BTOBBT
((
uintptr_t
)
&
ophead
->
oh_len
-
(
uintptr_t
)
iclog
->
ic_datap
);
idx
=
BTOBBT
((
void
*
)
&
ophead
->
oh_len
-
iclog
->
ic_datap
);
if
(
idx
>=
(
XLOG_HEADER_CYCLE_SIZE
/
BBSIZE
))
{
j
=
idx
/
(
XLOG_HEADER_CYCLE_SIZE
/
BBSIZE
);
k
=
idx
%
(
XLOG_HEADER_CYCLE_SIZE
/
BBSIZE
);
...
...
fs/xfs/xfs_log.h
View file @
463260d7
...
...
@@ -21,42 +21,19 @@ struct xfs_log_vec {
#define XFS_LOG_VEC_ORDERED (-1)
static
inline
void
*
xlog_prepare_iovec
(
struct
xfs_log_vec
*
lv
,
struct
xfs_log_iovec
**
vecp
,
uint
type
)
{
struct
xfs_log_iovec
*
vec
=
*
vecp
;
if
(
vec
)
{
ASSERT
(
vec
-
lv
->
lv_iovecp
<
lv
->
lv_niovecs
);
vec
++
;
}
else
{
vec
=
&
lv
->
lv_iovecp
[
0
];
}
vec
->
i_type
=
type
;
vec
->
i_addr
=
lv
->
lv_buf
+
lv
->
lv_buf_len
;
ASSERT
(
IS_ALIGNED
((
unsigned
long
)
vec
->
i_addr
,
sizeof
(
uint64_t
)));
*
vecp
=
vec
;
return
vec
->
i_addr
;
}
void
*
xlog_prepare_iovec
(
struct
xfs_log_vec
*
lv
,
struct
xfs_log_iovec
**
vecp
,
uint
type
);
/*
* We need to make sure the next buffer is naturally aligned for the biggest
* basic data type we put into it. We already accounted for this padding when
* sizing the buffer.
*
* However, this padding does not get written into the log, and hence we have to
* track the space used by the log vectors separately to prevent log space hangs
* due to inaccurate accounting (i.e. a leak) of the used log space through the
* CIL context ticket.
*/
static
inline
void
xlog_finish_iovec
(
struct
xfs_log_vec
*
lv
,
struct
xfs_log_iovec
*
vec
,
int
len
)
{
lv
->
lv_buf_len
+=
round_up
(
len
,
sizeof
(
uint64_t
));
struct
xlog_op_header
*
oph
=
vec
->
i_addr
;
/* opheader tracks payload length, logvec tracks region length */
oph
->
oh_len
=
cpu_to_be32
(
len
);
len
+=
sizeof
(
struct
xlog_op_header
);
lv
->
lv_buf_len
+=
len
;
lv
->
lv_bytes
+=
len
;
vec
->
i_len
=
len
;
}
...
...
@@ -118,12 +95,8 @@ void xfs_log_mount_cancel(struct xfs_mount *);
xfs_lsn_t
xlog_assign_tail_lsn
(
struct
xfs_mount
*
mp
);
xfs_lsn_t
xlog_assign_tail_lsn_locked
(
struct
xfs_mount
*
mp
);
void
xfs_log_space_wake
(
struct
xfs_mount
*
mp
);
int
xfs_log_reserve
(
struct
xfs_mount
*
mp
,
int
length
,
int
count
,
struct
xlog_ticket
**
ticket
,
uint8_t
clientid
,
bool
permanent
);
int
xfs_log_reserve
(
struct
xfs_mount
*
mp
,
int
length
,
int
count
,
struct
xlog_ticket
**
ticket
,
bool
permanent
);
int
xfs_log_regrant
(
struct
xfs_mount
*
mp
,
struct
xlog_ticket
*
tic
);
void
xfs_log_unmount
(
struct
xfs_mount
*
mp
);
bool
xfs_log_writable
(
struct
xfs_mount
*
mp
);
...
...
fs/xfs/xfs_log_cil.c
View file @
463260d7
...
...
@@ -37,7 +37,7 @@ xlog_cil_ticket_alloc(
{
struct
xlog_ticket
*
tic
;
tic
=
xlog_ticket_alloc
(
log
,
0
,
1
,
XFS_TRANSACTION
,
0
);
tic
=
xlog_ticket_alloc
(
log
,
0
,
1
,
0
);
/*
* set the current reservation to zero so we know to steal the basic
...
...
@@ -214,13 +214,20 @@ xlog_cil_alloc_shadow_bufs(
}
/*
* We 64-bit align the length of each iovec so that the start
* of the next one is naturally aligned. We'll need to
* account for that slack space here. Then round nbytes up
* to 64-bit alignment so that the initial buffer alignment is
* easy to calculate and verify.
* We 64-bit align the length of each iovec so that the start of
* the next one is naturally aligned. We'll need to account for
* that slack space here.
*
* We also add the xlog_op_header to each region when
* formatting, but that's not accounted to the size of the item
* at this point. Hence we'll need an addition number of bytes
* for each vector to hold an opheader.
*
* Then round nbytes up to 64-bit alignment so that the initial
* buffer alignment is easy to calculate and verify.
*/
nbytes
+=
niovecs
*
sizeof
(
uint64_t
);
nbytes
+=
niovecs
*
(
sizeof
(
uint64_t
)
+
sizeof
(
struct
xlog_op_header
));
nbytes
=
round_up
(
nbytes
,
sizeof
(
uint64_t
));
/*
...
...
@@ -277,22 +284,18 @@ xlog_cil_alloc_shadow_bufs(
/*
* Prepare the log item for insertion into the CIL. Calculate the difference in
* log space and vectors it will consume, and if it is a new item pin it as
* well.
* log space it will consume, and if it is a new item pin it as well.
*/
STATIC
void
xfs_cil_prepare_item
(
struct
xlog
*
log
,
struct
xfs_log_vec
*
lv
,
struct
xfs_log_vec
*
old_lv
,
int
*
diff_len
,
int
*
diff_iovecs
)
int
*
diff_len
)
{
/* Account for the new LV being passed in */
if
(
lv
->
lv_buf_len
!=
XFS_LOG_VEC_ORDERED
)
{
if
(
lv
->
lv_buf_len
!=
XFS_LOG_VEC_ORDERED
)
*
diff_len
+=
lv
->
lv_bytes
;
*
diff_iovecs
+=
lv
->
lv_niovecs
;
}
/*
* If there is no old LV, this is the first time we've seen the item in
...
...
@@ -309,7 +312,6 @@ xfs_cil_prepare_item(
ASSERT
(
lv
->
lv_buf_len
!=
XFS_LOG_VEC_ORDERED
);
*
diff_len
-=
old_lv
->
lv_bytes
;
*
diff_iovecs
-=
old_lv
->
lv_niovecs
;
lv
->
lv_item
->
li_lv_shadow
=
old_lv
;
}
...
...
@@ -358,12 +360,10 @@ static void
xlog_cil_insert_format_items
(
struct
xlog
*
log
,
struct
xfs_trans
*
tp
,
int
*
diff_len
,
int
*
diff_iovecs
)
int
*
diff_len
)
{
struct
xfs_log_item
*
lip
;
/* Bail out if we didn't find a log item. */
if
(
list_empty
(
&
tp
->
t_items
))
{
ASSERT
(
0
);
...
...
@@ -406,7 +406,6 @@ xlog_cil_insert_format_items(
* set the item up as though it is a new insertion so
* that the space reservation accounting is correct.
*/
*
diff_iovecs
-=
lv
->
lv_niovecs
;
*
diff_len
-=
lv
->
lv_bytes
;
/* Ensure the lv is set up according to ->iop_size */
...
...
@@ -431,7 +430,7 @@ xlog_cil_insert_format_items(
ASSERT
(
IS_ALIGNED
((
unsigned
long
)
lv
->
lv_buf
,
sizeof
(
uint64_t
)));
lip
->
li_ops
->
iop_format
(
lip
,
lv
);
insert:
xfs_cil_prepare_item
(
log
,
lv
,
old_lv
,
diff_len
,
diff_iovecs
);
xfs_cil_prepare_item
(
log
,
lv
,
old_lv
,
diff_len
);
}
}
...
...
@@ -451,7 +450,6 @@ xlog_cil_insert_items(
struct
xfs_cil_ctx
*
ctx
=
cil
->
xc_ctx
;
struct
xfs_log_item
*
lip
;
int
len
=
0
;
int
diff_iovecs
=
0
;
int
iclog_space
;
int
iovhdr_res
=
0
,
split_res
=
0
,
ctx_res
=
0
;
...
...
@@ -461,15 +459,10 @@ xlog_cil_insert_items(
* We can do this safely because the context can't checkpoint until we
* are done so it doesn't matter exactly how we update the CIL.
*/
xlog_cil_insert_format_items
(
log
,
tp
,
&
len
,
&
diff_iovecs
);
xlog_cil_insert_format_items
(
log
,
tp
,
&
len
);
spin_lock
(
&
cil
->
xc_cil_lock
);
/* account for space used by new iovec headers */
iovhdr_res
=
diff_iovecs
*
sizeof
(
xlog_op_header_t
);
len
+=
iovhdr_res
;
ctx
->
nvecs
+=
diff_iovecs
;
/* attach the transaction to the CIL if it has any busy extents */
if
(
!
list_empty
(
&
tp
->
t_busy
))
list_splice_init
(
&
tp
->
t_busy
,
&
ctx
->
busy_extents
);
...
...
@@ -822,7 +815,8 @@ xlog_cil_order_write(
static
int
xlog_cil_write_chain
(
struct
xfs_cil_ctx
*
ctx
,
struct
xfs_log_vec
*
chain
)
struct
xfs_log_vec
*
chain
,
uint32_t
chain_len
)
{
struct
xlog
*
log
=
ctx
->
cil
->
xc_log
;
int
error
;
...
...
@@ -830,7 +824,7 @@ xlog_cil_write_chain(
error
=
xlog_cil_order_write
(
ctx
->
cil
,
ctx
->
sequence
,
_START_RECORD
);
if
(
error
)
return
error
;
return
xlog_write
(
log
,
ctx
,
chain
,
ctx
->
ticket
,
XLOG_START_TRANS
);
return
xlog_write
(
log
,
ctx
,
chain
,
ctx
->
ticket
,
chain_len
);
}
/*
...
...
@@ -844,9 +838,14 @@ xlog_cil_write_commit_record(
struct
xfs_cil_ctx
*
ctx
)
{
struct
xlog
*
log
=
ctx
->
cil
->
xc_log
;
struct
xlog_op_header
ophdr
=
{
.
oh_clientid
=
XFS_TRANSACTION
,
.
oh_tid
=
cpu_to_be32
(
ctx
->
ticket
->
t_tid
),
.
oh_flags
=
XLOG_COMMIT_TRANS
,
};
struct
xfs_log_iovec
reg
=
{
.
i_addr
=
NULL
,
.
i_len
=
0
,
.
i_addr
=
&
ophdr
,
.
i_len
=
sizeof
(
struct
xlog_op_header
)
,
.
i_type
=
XLOG_REG_TYPE_COMMIT
,
};
struct
xfs_log_vec
vec
=
{
...
...
@@ -862,12 +861,79 @@ xlog_cil_write_commit_record(
if
(
error
)
return
error
;
error
=
xlog_write
(
log
,
ctx
,
&
vec
,
ctx
->
ticket
,
XLOG_COMMIT_TRANS
);
/* account for space used by record data */
ctx
->
ticket
->
t_curr_res
-=
reg
.
i_len
;
error
=
xlog_write
(
log
,
ctx
,
&
vec
,
ctx
->
ticket
,
reg
.
i_len
);
if
(
error
)
xlog_force_shutdown
(
log
,
SHUTDOWN_LOG_IO_ERROR
);
return
error
;
}
struct
xlog_cil_trans_hdr
{
struct
xlog_op_header
oph
[
2
];
struct
xfs_trans_header
thdr
;
struct
xfs_log_iovec
lhdr
[
2
];
};
/*
* Build a checkpoint transaction header to begin the journal transaction. We
* need to account for the space used by the transaction header here as it is
* not accounted for in xlog_write().
*
* This is the only place we write a transaction header, so we also build the
* log opheaders that indicate the start of a log transaction and wrap the
* transaction header. We keep the start record in it's own log vector rather
* than compacting them into a single region as this ends up making the logic
* in xlog_write() for handling empty opheaders for start, commit and unmount
* records much simpler.
*/
static
void
xlog_cil_build_trans_hdr
(
struct
xfs_cil_ctx
*
ctx
,
struct
xlog_cil_trans_hdr
*
hdr
,
struct
xfs_log_vec
*
lvhdr
,
int
num_iovecs
)
{
struct
xlog_ticket
*
tic
=
ctx
->
ticket
;
__be32
tid
=
cpu_to_be32
(
tic
->
t_tid
);
memset
(
hdr
,
0
,
sizeof
(
*
hdr
));
/* Log start record */
hdr
->
oph
[
0
].
oh_tid
=
tid
;
hdr
->
oph
[
0
].
oh_clientid
=
XFS_TRANSACTION
;
hdr
->
oph
[
0
].
oh_flags
=
XLOG_START_TRANS
;
/* log iovec region pointer */
hdr
->
lhdr
[
0
].
i_addr
=
&
hdr
->
oph
[
0
];
hdr
->
lhdr
[
0
].
i_len
=
sizeof
(
struct
xlog_op_header
);
hdr
->
lhdr
[
0
].
i_type
=
XLOG_REG_TYPE_LRHEADER
;
/* log opheader */
hdr
->
oph
[
1
].
oh_tid
=
tid
;
hdr
->
oph
[
1
].
oh_clientid
=
XFS_TRANSACTION
;
hdr
->
oph
[
1
].
oh_len
=
cpu_to_be32
(
sizeof
(
struct
xfs_trans_header
));
/* transaction header in host byte order format */
hdr
->
thdr
.
th_magic
=
XFS_TRANS_HEADER_MAGIC
;
hdr
->
thdr
.
th_type
=
XFS_TRANS_CHECKPOINT
;
hdr
->
thdr
.
th_tid
=
tic
->
t_tid
;
hdr
->
thdr
.
th_num_items
=
num_iovecs
;
/* log iovec region pointer */
hdr
->
lhdr
[
1
].
i_addr
=
&
hdr
->
oph
[
1
];
hdr
->
lhdr
[
1
].
i_len
=
sizeof
(
struct
xlog_op_header
)
+
sizeof
(
struct
xfs_trans_header
);
hdr
->
lhdr
[
1
].
i_type
=
XLOG_REG_TYPE_TRANSHDR
;
lvhdr
->
lv_niovecs
=
2
;
lvhdr
->
lv_iovecp
=
&
hdr
->
lhdr
[
0
];
lvhdr
->
lv_bytes
=
hdr
->
lhdr
[
0
].
i_len
+
hdr
->
lhdr
[
1
].
i_len
;
lvhdr
->
lv_next
=
ctx
->
lv_chain
;
tic
->
t_curr_res
-=
lvhdr
->
lv_bytes
;
}
/*
* Push the Committed Item List to the log.
*
...
...
@@ -892,11 +958,10 @@ xlog_cil_push_work(
struct
xlog
*
log
=
cil
->
xc_log
;
struct
xfs_log_vec
*
lv
;
struct
xfs_cil_ctx
*
new_ctx
;
struct
xlog_ticket
*
tic
;
int
num_
iovecs
;
int
num_iovecs
=
0
;
int
num_
bytes
=
0
;
int
error
=
0
;
struct
xfs_trans_header
thdr
;
struct
xfs_log_iovec
lhdr
;
struct
xlog_cil_trans_hdr
thdr
;
struct
xfs_log_vec
lvhdr
=
{
NULL
};
xfs_csn_t
push_seq
;
bool
push_commit_stable
;
...
...
@@ -975,7 +1040,6 @@ xlog_cil_push_work(
* by the flush lock.
*/
lv
=
NULL
;
num_iovecs
=
0
;
while
(
!
list_empty
(
&
cil
->
xc_cil
))
{
struct
xfs_log_item
*
item
;
...
...
@@ -989,6 +1053,10 @@ xlog_cil_push_work(
lv
=
item
->
li_lv
;
item
->
li_lv
=
NULL
;
num_iovecs
+=
lv
->
lv_niovecs
;
/* we don't write ordered log vectors */
if
(
lv
->
lv_buf_len
!=
XFS_LOG_VEC_ORDERED
)
num_bytes
+=
lv
->
lv_bytes
;
}
/*
...
...
@@ -1025,26 +1093,11 @@ xlog_cil_push_work(
* Build a checkpoint transaction header and write it to the log to
* begin the transaction. We need to account for the space used by the
* transaction header here as it is not accounted for in xlog_write().
*
* The LSN we need to pass to the log items on transaction commit is
* the LSN reported by the first log vector write. If we use the commit
* record lsn then we can move the tail beyond the grant write head.
*/
tic
=
ctx
->
ticket
;
thdr
.
th_magic
=
XFS_TRANS_HEADER_MAGIC
;
thdr
.
th_type
=
XFS_TRANS_CHECKPOINT
;
thdr
.
th_tid
=
tic
->
t_tid
;
thdr
.
th_num_items
=
num_iovecs
;
lhdr
.
i_addr
=
&
thdr
;
lhdr
.
i_len
=
sizeof
(
xfs_trans_header_t
);
lhdr
.
i_type
=
XLOG_REG_TYPE_TRANSHDR
;
tic
->
t_curr_res
-=
lhdr
.
i_len
+
sizeof
(
xlog_op_header_t
);
lvhdr
.
lv_niovecs
=
1
;
lvhdr
.
lv_iovecp
=
&
lhdr
;
lvhdr
.
lv_next
=
ctx
->
lv_chain
;
error
=
xlog_cil_write_chain
(
ctx
,
&
lvhdr
);
*/
xlog_cil_build_trans_hdr
(
ctx
,
&
thdr
,
&
lvhdr
,
num_iovecs
);
num_bytes
+=
lvhdr
.
lv_bytes
;
error
=
xlog_cil_write_chain
(
ctx
,
&
lvhdr
,
num_bytes
);
if
(
error
)
goto
out_abort_free_ticket
;
...
...
@@ -1052,7 +1105,7 @@ xlog_cil_push_work(
if
(
error
)
goto
out_abort_free_ticket
;
xfs_log_ticket_ungrant
(
log
,
tic
);
xfs_log_ticket_ungrant
(
log
,
ctx
->
ticket
);
/*
* If the checkpoint spans multiple iclogs, wait for all previous iclogs
...
...
@@ -1116,7 +1169,7 @@ xlog_cil_push_work(
return
;
out_abort_free_ticket:
xfs_log_ticket_ungrant
(
log
,
tic
);
xfs_log_ticket_ungrant
(
log
,
ctx
->
ticket
);
ASSERT
(
xlog_is_shutdown
(
log
));
if
(
!
ctx
->
commit_iclog
)
{
xlog_cil_committed
(
ctx
);
...
...
fs/xfs/xfs_log_priv.h
View file @
463260d7
...
...
@@ -142,19 +142,6 @@ enum xlog_iclog_state {
#define XLOG_COVER_OPS 5
/* Ticket reservation region accounting */
#define XLOG_TIC_LEN_MAX 15
/*
* Reservation region
* As would be stored in xfs_log_iovec but without the i_addr which
* we don't care about.
*/
typedef
struct
xlog_res
{
uint
r_len
;
/* region length :4 */
uint
r_type
;
/* region's transaction type :4 */
}
xlog_res_t
;
typedef
struct
xlog_ticket
{
struct
list_head
t_queue
;
/* reserve/write queue */
struct
task_struct
*
t_task
;
/* task that owns this ticket */
...
...
@@ -164,15 +151,7 @@ typedef struct xlog_ticket {
int
t_unit_res
;
/* unit reservation in bytes : 4 */
char
t_ocnt
;
/* original count : 1 */
char
t_cnt
;
/* current count : 1 */
char
t_clientid
;
/* who does this belong to; : 1 */
uint8_t
t_flags
;
/* properties of reservation : 1 */
/* reservation array fields */
uint
t_res_num
;
/* num in array : 4 */
uint
t_res_num_ophdrs
;
/* num op hdrs : 4 */
uint
t_res_arr_sum
;
/* array sum : 4 */
uint
t_res_o_flow
;
/* sum overflow : 4 */
xlog_res_t
t_res_arr
[
XLOG_TIC_LEN_MAX
];
/* array of res : 8 * 15 */
}
xlog_ticket_t
;
/*
...
...
@@ -211,7 +190,7 @@ typedef struct xlog_in_core {
u32
ic_offset
;
enum
xlog_iclog_state
ic_state
;
unsigned
int
ic_flags
;
char
*
ic_datap
;
/* pointer to iclog data */
void
*
ic_datap
;
/* pointer to iclog data */
struct
list_head
ic_callbacks
;
/* reference counts need their own cacheline */
...
...
@@ -242,7 +221,6 @@ struct xfs_cil_ctx {
xfs_lsn_t
commit_lsn
;
/* chkpt commit record lsn */
struct
xlog_in_core
*
commit_iclog
;
struct
xlog_ticket
*
ticket
;
/* chkpt ticket */
int
nvecs
;
/* number of regions */
int
space_used
;
/* aggregate size of regions */
struct
list_head
busy_extents
;
/* busy extents in chkpt */
struct
xfs_log_vec
*
lv_chain
;
/* logvecs being pushed */
...
...
@@ -441,10 +419,6 @@ struct xlog {
struct
xfs_kobj
l_kobj
;
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
void
*
l_iclog_bak
[
XLOG_MAX_ICLOGS
];
#endif
/* log recovery lsn tracking (for buffer submission */
xfs_lsn_t
l_recovery_lsn
;
...
...
@@ -509,27 +483,14 @@ extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
char
*
dp
,
int
size
);
extern
struct
kmem_cache
*
xfs_log_ticket_cache
;
struct
xlog_ticket
*
xlog_ticket_alloc
(
struct
xlog
*
log
,
int
unit_bytes
,
int
count
,
char
client
,
bool
permanent
);
static
inline
void
xlog_write_adv_cnt
(
void
**
ptr
,
int
*
len
,
int
*
off
,
size_t
bytes
)
{
*
ptr
+=
bytes
;
*
len
-=
bytes
;
*
off
+=
bytes
;
}
struct
xlog_ticket
*
xlog_ticket_alloc
(
struct
xlog
*
log
,
int
unit_bytes
,
int
count
,
bool
permanent
);
void
xlog_print_tic_res
(
struct
xfs_mount
*
mp
,
struct
xlog_ticket
*
ticket
);
void
xlog_print_trans
(
struct
xfs_trans
*
);
int
xlog_write
(
struct
xlog
*
log
,
struct
xfs_cil_ctx
*
ctx
,
struct
xfs_log_vec
*
log_vector
,
struct
xlog_ticket
*
tic
,
uint
optype
);
uint
32_t
len
);
void
xfs_log_ticket_ungrant
(
struct
xlog
*
log
,
struct
xlog_ticket
*
ticket
);
void
xfs_log_ticket_regrant
(
struct
xlog
*
log
,
struct
xlog_ticket
*
ticket
);
...
...
fs/xfs/xfs_trans.c
View file @
463260d7
...
...
@@ -194,11 +194,9 @@ xfs_trans_reserve(
ASSERT
(
resp
->
tr_logflags
&
XFS_TRANS_PERM_LOG_RES
);
error
=
xfs_log_regrant
(
mp
,
tp
->
t_ticket
);
}
else
{
error
=
xfs_log_reserve
(
mp
,
resp
->
tr_logres
,
error
=
xfs_log_reserve
(
mp
,
resp
->
tr_logres
,
resp
->
tr_logcount
,
&
tp
->
t_ticket
,
XFS_TRANSACTION
,
permanent
);
&
tp
->
t_ticket
,
permanent
);
}
if
(
error
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment