Commit cddfa11a authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge more updates from Andrew Morton:

 - more ocfs2 work

 - various leftovers

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  memory_hotplug: cond_resched in __remove_pages
  bfs: add sanity check at bfs_fill_super()
  kernel/sysctl.c: remove duplicated include
  kernel/kexec_file.c: remove some duplicated includes
  mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask
  ocfs2: fix clusters leak in ocfs2_defrag_extent()
  ocfs2: dlmglue: clean up timestamp handling
  ocfs2: don't put and assigning null to bh allocated outside
  ocfs2: fix a misuse a of brelse after failing ocfs2_check_dir_entry
  ocfs2: don't use iocb when EIOCBQUEUED returns
  ocfs2: without quota support, avoid calling quota recovery
  ocfs2: remove ocfs2_is_o2cb_active()
  mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings
  include/linux/notifier.h: SRCU: fix ctags
  mm: handle no memcg case in memcg_kmem_charge() properly
parents 5f215853 dd33ad7b
...@@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) ...@@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
s->s_magic = BFS_MAGIC; s->s_magic = BFS_MAGIC;
if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) ||
le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) {
printf("Superblock is corrupted\n"); printf("Superblock is corrupted\n");
goto out1; goto out1;
} }
...@@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) ...@@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
sizeof(struct bfs_inode) sizeof(struct bfs_inode)
+ BFS_ROOT_INO - 1; + BFS_ROOT_INO - 1;
imap_len = (info->si_lasti / 8) + 1; imap_len = (info->si_lasti / 8) + 1;
info->si_imap = kzalloc(imap_len, GFP_KERNEL); info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN);
if (!info->si_imap) if (!info->si_imap) {
printf("Cannot allocate %u bytes\n", imap_len);
goto out1; goto out1;
}
for (i = 0; i < BFS_ROOT_INO; i++) for (i = 0; i < BFS_ROOT_INO; i++)
set_bit(i, info->si_imap); set_bit(i, info->si_imap);
......
...@@ -99,25 +99,34 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, ...@@ -99,25 +99,34 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
return ret; return ret;
} }
/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
* will be easier to handle read failure.
*/
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
unsigned int nr, struct buffer_head *bhs[]) unsigned int nr, struct buffer_head *bhs[])
{ {
int status = 0; int status = 0;
unsigned int i; unsigned int i;
struct buffer_head *bh; struct buffer_head *bh;
int new_bh = 0;
trace_ocfs2_read_blocks_sync((unsigned long long)block, nr); trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
if (!nr) if (!nr)
goto bail; goto bail;
/* Don't put buffer head and re-assign it to NULL if it is allocated
* outside since the caller can't be aware of this alternation!
*/
new_bh = (bhs[0] == NULL);
for (i = 0 ; i < nr ; i++) { for (i = 0 ; i < nr ; i++) {
if (bhs[i] == NULL) { if (bhs[i] == NULL) {
bhs[i] = sb_getblk(osb->sb, block++); bhs[i] = sb_getblk(osb->sb, block++);
if (bhs[i] == NULL) { if (bhs[i] == NULL) {
status = -ENOMEM; status = -ENOMEM;
mlog_errno(status); mlog_errno(status);
goto bail; break;
} }
} }
bh = bhs[i]; bh = bhs[i];
...@@ -158,9 +167,26 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, ...@@ -158,9 +167,26 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
submit_bh(REQ_OP_READ, 0, bh); submit_bh(REQ_OP_READ, 0, bh);
} }
read_failure:
for (i = nr; i > 0; i--) { for (i = nr; i > 0; i--) {
bh = bhs[i - 1]; bh = bhs[i - 1];
if (unlikely(status)) {
if (new_bh && bh) {
/* If middle bh fails, let previous bh
* finish its read and then put it to
* aovoid bh leak
*/
if (!buffer_jbd(bh))
wait_on_buffer(bh);
put_bh(bh);
bhs[i - 1] = NULL;
} else if (bh && buffer_uptodate(bh)) {
clear_buffer_uptodate(bh);
}
continue;
}
/* No need to wait on the buffer if it's managed by JBD. */ /* No need to wait on the buffer if it's managed by JBD. */
if (!buffer_jbd(bh)) if (!buffer_jbd(bh))
wait_on_buffer(bh); wait_on_buffer(bh);
...@@ -170,8 +196,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, ...@@ -170,8 +196,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
* so we can safely record this and loop back * so we can safely record this and loop back
* to cleanup the other buffers. */ * to cleanup the other buffers. */
status = -EIO; status = -EIO;
put_bh(bh); goto read_failure;
bhs[i - 1] = NULL;
} }
} }
...@@ -179,6 +204,9 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, ...@@ -179,6 +204,9 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
return status; return status;
} }
/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
* will be easier to handle read failure.
*/
int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
struct buffer_head *bhs[], int flags, struct buffer_head *bhs[], int flags,
int (*validate)(struct super_block *sb, int (*validate)(struct super_block *sb,
...@@ -188,6 +216,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, ...@@ -188,6 +216,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
int i, ignore_cache = 0; int i, ignore_cache = 0;
struct buffer_head *bh; struct buffer_head *bh;
struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
int new_bh = 0;
trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags); trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
...@@ -213,6 +242,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, ...@@ -213,6 +242,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
goto bail; goto bail;
} }
/* Don't put buffer head and re-assign it to NULL if it is allocated
* outside since the caller can't be aware of this alternation!
*/
new_bh = (bhs[0] == NULL);
ocfs2_metadata_cache_io_lock(ci); ocfs2_metadata_cache_io_lock(ci);
for (i = 0 ; i < nr ; i++) { for (i = 0 ; i < nr ; i++) {
if (bhs[i] == NULL) { if (bhs[i] == NULL) {
...@@ -221,7 +255,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, ...@@ -221,7 +255,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
ocfs2_metadata_cache_io_unlock(ci); ocfs2_metadata_cache_io_unlock(ci);
status = -ENOMEM; status = -ENOMEM;
mlog_errno(status); mlog_errno(status);
goto bail; /* Don't forget to put previous bh! */
break;
} }
} }
bh = bhs[i]; bh = bhs[i];
...@@ -316,16 +351,27 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, ...@@ -316,16 +351,27 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
} }
} }
status = 0; read_failure:
for (i = (nr - 1); i >= 0; i--) { for (i = (nr - 1); i >= 0; i--) {
bh = bhs[i]; bh = bhs[i];
if (!(flags & OCFS2_BH_READAHEAD)) { if (!(flags & OCFS2_BH_READAHEAD)) {
if (status) { if (unlikely(status)) {
/* Clear the rest of the buffers on error */ /* Clear the buffers on error including those
put_bh(bh); * ever succeeded in reading
bhs[i] = NULL; */
if (new_bh && bh) {
/* If middle bh fails, let previous bh
* finish its read and then put it to
* aovoid bh leak
*/
if (!buffer_jbd(bh))
wait_on_buffer(bh);
put_bh(bh);
bhs[i] = NULL;
} else if (bh && buffer_uptodate(bh)) {
clear_buffer_uptodate(bh);
}
continue; continue;
} }
/* We know this can't have changed as we hold the /* We know this can't have changed as we hold the
...@@ -343,9 +389,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, ...@@ -343,9 +389,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
* uptodate. */ * uptodate. */
status = -EIO; status = -EIO;
clear_buffer_needs_validate(bh); clear_buffer_needs_validate(bh);
put_bh(bh); goto read_failure;
bhs[i] = NULL;
continue;
} }
if (buffer_needs_validate(bh)) { if (buffer_needs_validate(bh)) {
...@@ -355,11 +399,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, ...@@ -355,11 +399,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
BUG_ON(buffer_jbd(bh)); BUG_ON(buffer_jbd(bh));
clear_buffer_needs_validate(bh); clear_buffer_needs_validate(bh);
status = validate(sb, bh); status = validate(sb, bh);
if (status) { if (status)
put_bh(bh); goto read_failure;
bhs[i] = NULL;
continue;
}
} }
} }
......
...@@ -1897,8 +1897,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, ...@@ -1897,8 +1897,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
/* On error, skip the f_pos to the /* On error, skip the f_pos to the
next block. */ next block. */
ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
brelse(bh); break;
continue;
} }
if (le64_to_cpu(de->inode)) { if (le64_to_cpu(de->inode)) {
unsigned char d_type = DT_UNKNOWN; unsigned char d_type = DT_UNKNOWN;
......
...@@ -2123,10 +2123,10 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, ...@@ -2123,10 +2123,10 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
/* LVB only has room for 64 bits of time here so we pack it for /* LVB only has room for 64 bits of time here so we pack it for
* now. */ * now. */
static u64 ocfs2_pack_timespec(struct timespec *spec) static u64 ocfs2_pack_timespec(struct timespec64 *spec)
{ {
u64 res; u64 res;
u64 sec = spec->tv_sec; u64 sec = clamp_t(time64_t, spec->tv_sec, 0, 0x3ffffffffull);
u32 nsec = spec->tv_nsec; u32 nsec = spec->tv_nsec;
res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
...@@ -2142,7 +2142,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) ...@@ -2142,7 +2142,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
struct ocfs2_meta_lvb *lvb; struct ocfs2_meta_lvb *lvb;
struct timespec ts;
lvb = ocfs2_dlm_lvb(&lockres->l_lksb); lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
...@@ -2163,15 +2162,12 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) ...@@ -2163,15 +2162,12 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
lvb->lvb_igid = cpu_to_be32(i_gid_read(inode)); lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
lvb->lvb_imode = cpu_to_be16(inode->i_mode); lvb->lvb_imode = cpu_to_be16(inode->i_mode);
lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
ts = timespec64_to_timespec(inode->i_atime);
lvb->lvb_iatime_packed = lvb->lvb_iatime_packed =
cpu_to_be64(ocfs2_pack_timespec(&ts)); cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
ts = timespec64_to_timespec(inode->i_ctime);
lvb->lvb_ictime_packed = lvb->lvb_ictime_packed =
cpu_to_be64(ocfs2_pack_timespec(&ts)); cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
ts = timespec64_to_timespec(inode->i_mtime);
lvb->lvb_imtime_packed = lvb->lvb_imtime_packed =
cpu_to_be64(ocfs2_pack_timespec(&ts)); cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
...@@ -2180,7 +2176,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) ...@@ -2180,7 +2176,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
mlog_meta_lvb(0, lockres); mlog_meta_lvb(0, lockres);
} }
static void ocfs2_unpack_timespec(struct timespec *spec, static void ocfs2_unpack_timespec(struct timespec64 *spec,
u64 packed_time) u64 packed_time)
{ {
spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
...@@ -2189,7 +2185,6 @@ static void ocfs2_unpack_timespec(struct timespec *spec, ...@@ -2189,7 +2185,6 @@ static void ocfs2_unpack_timespec(struct timespec *spec,
static void ocfs2_refresh_inode_from_lvb(struct inode *inode) static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
{ {
struct timespec ts;
struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
struct ocfs2_meta_lvb *lvb; struct ocfs2_meta_lvb *lvb;
...@@ -2217,15 +2212,12 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) ...@@ -2217,15 +2212,12 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
i_gid_write(inode, be32_to_cpu(lvb->lvb_igid)); i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
inode->i_mode = be16_to_cpu(lvb->lvb_imode); inode->i_mode = be16_to_cpu(lvb->lvb_imode);
set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
ocfs2_unpack_timespec(&ts, ocfs2_unpack_timespec(&inode->i_atime,
be64_to_cpu(lvb->lvb_iatime_packed)); be64_to_cpu(lvb->lvb_iatime_packed));
inode->i_atime = timespec_to_timespec64(ts); ocfs2_unpack_timespec(&inode->i_mtime,
ocfs2_unpack_timespec(&ts,
be64_to_cpu(lvb->lvb_imtime_packed)); be64_to_cpu(lvb->lvb_imtime_packed));
inode->i_mtime = timespec_to_timespec64(ts); ocfs2_unpack_timespec(&inode->i_ctime,
ocfs2_unpack_timespec(&ts,
be64_to_cpu(lvb->lvb_ictime_packed)); be64_to_cpu(lvb->lvb_ictime_packed));
inode->i_ctime = timespec_to_timespec64(ts);
spin_unlock(&oi->ip_lock); spin_unlock(&oi->ip_lock);
} }
...@@ -3603,7 +3595,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, ...@@ -3603,7 +3595,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
* we can recover correctly from node failure. Otherwise, we may get * we can recover correctly from node failure. Otherwise, we may get
* invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
*/ */
if (!ocfs2_is_o2cb_active() && if (ocfs2_userspace_stack(osb) &&
lockres->l_ops->flags & LOCK_TYPE_USES_LVB) lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
lvb = 1; lvb = 1;
......
...@@ -2343,7 +2343,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, ...@@ -2343,7 +2343,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
written = __generic_file_write_iter(iocb, from); written = __generic_file_write_iter(iocb, from);
/* buffered aio wouldn't have proper lock coverage today */ /* buffered aio wouldn't have proper lock coverage today */
BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); BUG_ON(written == -EIOCBQUEUED && !direct_io);
/* /*
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
...@@ -2463,7 +2463,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, ...@@ -2463,7 +2463,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
trace_generic_file_read_iter_ret(ret); trace_generic_file_read_iter_ret(ret);
/* buffered aio wouldn't have proper lock coverage today */ /* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); BUG_ON(ret == -EIOCBQUEUED && !direct_io);
/* see ocfs2_file_write_iter */ /* see ocfs2_file_write_iter */
if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
......
...@@ -1378,15 +1378,23 @@ static int __ocfs2_recovery_thread(void *arg) ...@@ -1378,15 +1378,23 @@ static int __ocfs2_recovery_thread(void *arg)
int rm_quota_used = 0, i; int rm_quota_used = 0, i;
struct ocfs2_quota_recovery *qrec; struct ocfs2_quota_recovery *qrec;
/* Whether the quota supported. */
int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
|| OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA);
status = ocfs2_wait_on_mount(osb); status = ocfs2_wait_on_mount(osb);
if (status < 0) { if (status < 0) {
goto bail; goto bail;
} }
rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS); if (quota_enabled) {
if (!rm_quota) { rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
status = -ENOMEM; if (!rm_quota) {
goto bail; status = -ENOMEM;
goto bail;
}
} }
restart: restart:
status = ocfs2_super_lock(osb, 1); status = ocfs2_super_lock(osb, 1);
...@@ -1422,9 +1430,14 @@ static int __ocfs2_recovery_thread(void *arg) ...@@ -1422,9 +1430,14 @@ static int __ocfs2_recovery_thread(void *arg)
* then quota usage would be out of sync until some node takes * then quota usage would be out of sync until some node takes
* the slot. So we remember which nodes need quota recovery * the slot. So we remember which nodes need quota recovery
* and when everything else is done, we recover quotas. */ * and when everything else is done, we recover quotas. */
for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++); if (quota_enabled) {
if (i == rm_quota_used) for (i = 0; i < rm_quota_used
rm_quota[rm_quota_used++] = slot_num; && rm_quota[i] != slot_num; i++)
;
if (i == rm_quota_used)
rm_quota[rm_quota_used++] = slot_num;
}
status = ocfs2_recover_node(osb, node_num, slot_num); status = ocfs2_recover_node(osb, node_num, slot_num);
skip_recovery: skip_recovery:
...@@ -1452,16 +1465,19 @@ static int __ocfs2_recovery_thread(void *arg) ...@@ -1452,16 +1465,19 @@ static int __ocfs2_recovery_thread(void *arg)
/* Now it is right time to recover quotas... We have to do this under /* Now it is right time to recover quotas... We have to do this under
* superblock lock so that no one can start using the slot (and crash) * superblock lock so that no one can start using the slot (and crash)
* before we recover it */ * before we recover it */
for (i = 0; i < rm_quota_used; i++) { if (quota_enabled) {
qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); for (i = 0; i < rm_quota_used; i++) {
if (IS_ERR(qrec)) { qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
status = PTR_ERR(qrec); if (IS_ERR(qrec)) {
mlog_errno(status); status = PTR_ERR(qrec);
continue; mlog_errno(status);
continue;
}
ocfs2_queue_recovery_completion(osb->journal,
rm_quota[i],
NULL, NULL, qrec,
ORPHAN_NEED_TRUNCATE);
} }
ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
NULL, NULL, qrec,
ORPHAN_NEED_TRUNCATE);
} }
ocfs2_super_unlock(osb, 1); ocfs2_super_unlock(osb, 1);
...@@ -1483,7 +1499,8 @@ static int __ocfs2_recovery_thread(void *arg) ...@@ -1483,7 +1499,8 @@ static int __ocfs2_recovery_thread(void *arg)
mutex_unlock(&osb->recovery_lock); mutex_unlock(&osb->recovery_lock);
kfree(rm_quota); if (quota_enabled)
kfree(rm_quota);
/* no one is callint kthread_stop() for us so the kthread() api /* no one is callint kthread_stop() for us so the kthread() api
* requires that we call do_exit(). And it isn't exported, but * requires that we call do_exit(). And it isn't exported, but
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "ocfs2_ioctl.h" #include "ocfs2_ioctl.h"
#include "alloc.h" #include "alloc.h"
#include "localalloc.h"
#include "aops.h" #include "aops.h"
#include "dlmglue.h" #include "dlmglue.h"
#include "extent_map.h" #include "extent_map.h"
...@@ -233,6 +234,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, ...@@ -233,6 +234,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
struct ocfs2_refcount_tree *ref_tree = NULL; struct ocfs2_refcount_tree *ref_tree = NULL;
u32 new_phys_cpos, new_len; u32 new_phys_cpos, new_len;
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
int need_free = 0;
if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
BUG_ON(!ocfs2_is_refcount_inode(inode)); BUG_ON(!ocfs2_is_refcount_inode(inode));
...@@ -308,6 +310,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, ...@@ -308,6 +310,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
if (!partial) { if (!partial) {
context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
ret = -ENOSPC; ret = -ENOSPC;
need_free = 1;
goto out_commit; goto out_commit;
} }
} }
...@@ -332,6 +335,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, ...@@ -332,6 +335,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
mlog_errno(ret); mlog_errno(ret);
out_commit: out_commit:
if (need_free && context->data_ac) {
struct ocfs2_alloc_context *data_ac = context->data_ac;
if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL)
ocfs2_free_local_alloc_bits(osb, handle, data_ac,
new_phys_cpos, new_len);
else
ocfs2_free_clusters(handle,
data_ac->ac_inode,
data_ac->ac_bh,
ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos),
new_len);
}
ocfs2_commit_trans(osb, handle); ocfs2_commit_trans(osb, handle);
out_unlock_mutex: out_unlock_mutex:
......
...@@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; ...@@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
*/ */
static struct ocfs2_stack_plugin *active_stack; static struct ocfs2_stack_plugin *active_stack;
inline int ocfs2_is_o2cb_active(void)
{
return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
}
EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
{ {
struct ocfs2_stack_plugin *p; struct ocfs2_stack_plugin *p;
......
...@@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p ...@@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
int ocfs2_is_o2cb_active(void);
extern struct kset *ocfs2_kset; extern struct kset *ocfs2_kset;
#endif /* STACKGLUE_H */ #endif /* STACKGLUE_H */
...@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) ...@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
} }
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
struct vm_area_struct *vma, unsigned long addr, struct vm_area_struct *vma, unsigned long addr,
int node, bool hugepage); int node);
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
#else #else
#define alloc_pages(gfp_mask, order) \ #define alloc_pages(gfp_mask, order) \
alloc_pages_node(numa_node_id(), gfp_mask, order) alloc_pages_node(numa_node_id(), gfp_mask, order)
#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ #define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
alloc_pages(gfp_mask, order)
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
alloc_pages(gfp_mask, order) alloc_pages(gfp_mask, order)
#endif #endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
#define alloc_page_vma(gfp_mask, vma, addr) \ #define alloc_page_vma(gfp_mask, vma, addr) \
alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ #define alloc_page_vma_node(gfp_mask, vma, addr, node) \
alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) alloc_pages_vma(gfp_mask, 0, vma, addr, node)
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask); extern unsigned long get_zeroed_page(gfp_t gfp_mask);
......
...@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, ...@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *get_task_policy(struct task_struct *p);
struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
unsigned long addr); unsigned long addr);
struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
unsigned long addr);
bool vma_policy_mof(struct vm_area_struct *vma); bool vma_policy_mof(struct vm_area_struct *vma);
extern void numa_default_policy(void); extern void numa_default_policy(void);
......
...@@ -122,8 +122,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); ...@@ -122,8 +122,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
#ifdef CONFIG_TREE_SRCU #ifdef CONFIG_TREE_SRCU
#define _SRCU_NOTIFIER_HEAD(name, mod) \ #define _SRCU_NOTIFIER_HEAD(name, mod) \
static DEFINE_PER_CPU(struct srcu_data, \ static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \
name##_head_srcu_data); \
mod struct srcu_notifier_head name = \ mod struct srcu_notifier_head name = \
SRCU_NOTIFIER_INIT(name, name##_head_srcu_data) SRCU_NOTIFIER_INIT(name, name##_head_srcu_data)
......
...@@ -25,8 +25,6 @@ ...@@ -25,8 +25,6 @@
#include <linux/elf.h> #include <linux/elf.h>
#include <linux/elfcore.h> #include <linux/elfcore.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/kexec.h>
#include <linux/slab.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include "kexec_internal.h" #include "kexec_internal.h"
......
...@@ -66,7 +66,6 @@ ...@@ -66,7 +66,6 @@
#include <linux/kexec.h> #include <linux/kexec.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/pipe_fs_i.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <asm/processor.h> #include <asm/processor.h>
......
...@@ -629,21 +629,40 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, ...@@ -629,21 +629,40 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
* available * available
* never: never stall for any thp allocation * never: never stall for any thp allocation
*/ */
static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma) static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
{ {
const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
gfp_t this_node = 0;
#ifdef CONFIG_NUMA
struct mempolicy *pol;
/*
* __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
* specified, to express a general desire to stay on the current
* node for optimistic allocation attempts. If the defrag mode
* and/or madvise hint requires the direct reclaim then we prefer
* to fallback to other node rather than node reclaim because that
* can lead to excessive reclaim even though there is free memory
* on other nodes. We expect that NUMA preferences are specified
* by memory policies.
*/
pol = get_vma_policy(vma, addr);
if (pol->mode != MPOL_BIND)
this_node = __GFP_THISNODE;
mpol_cond_put(pol);
#endif
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM; return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
__GFP_KSWAPD_RECLAIM); __GFP_KSWAPD_RECLAIM | this_node);
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
0); this_node);
return GFP_TRANSHUGE_LIGHT; return GFP_TRANSHUGE_LIGHT | this_node;
} }
/* Caller must hold page table lock. */ /* Caller must hold page table lock. */
...@@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) ...@@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
pte_free(vma->vm_mm, pgtable); pte_free(vma->vm_mm, pgtable);
return ret; return ret;
} }
gfp = alloc_hugepage_direct_gfpmask(vma); gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
if (unlikely(!page)) { if (unlikely(!page)) {
count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
...@@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) ...@@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
alloc: alloc:
if (transparent_hugepage_enabled(vma) && if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow()) { !transparent_hugepage_debug_cow()) {
huge_gfp = alloc_hugepage_direct_gfpmask(vma); huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER); new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
haddr, numa_node_id());
} else } else
new_page = NULL; new_page = NULL;
......
...@@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) ...@@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
int ret = 0; int ret = 0;
if (memcg_kmem_bypass()) if (mem_cgroup_disabled() || memcg_kmem_bypass())
return 0; return 0;
memcg = get_mem_cgroup_from_current(); memcg = get_mem_cgroup_from_current();
......
...@@ -586,6 +586,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, ...@@ -586,6 +586,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
for (i = 0; i < sections_to_remove; i++) { for (i = 0; i < sections_to_remove; i++) {
unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
cond_resched();
ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
altmap); altmap);
map_offset = 0; map_offset = 0;
......
...@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start) ...@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
} else if (PageTransHuge(page)) { } else if (PageTransHuge(page)) {
struct page *thp; struct page *thp;
thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
HPAGE_PMD_ORDER); address, numa_node_id());
if (!thp) if (!thp)
return NULL; return NULL;
prep_transhuge_page(thp); prep_transhuge_page(thp);
...@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, ...@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
* freeing by another task. It is the caller's responsibility to free the * freeing by another task. It is the caller's responsibility to free the
* extra reference for shared policies. * extra reference for shared policies.
*/ */
static struct mempolicy *get_vma_policy(struct vm_area_struct *vma, struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
unsigned long addr) unsigned long addr)
{ {
struct mempolicy *pol = __get_vma_policy(vma, addr); struct mempolicy *pol = __get_vma_policy(vma, addr);
...@@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, ...@@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
* @vma: Pointer to VMA or NULL if not available. * @vma: Pointer to VMA or NULL if not available.
* @addr: Virtual Address of the allocation. Must be inside the VMA. * @addr: Virtual Address of the allocation. Must be inside the VMA.
* @node: Which node to prefer for allocation (modulo policy). * @node: Which node to prefer for allocation (modulo policy).
* @hugepage: for hugepages try only the preferred node if possible
* *
* This function allocates a page from the kernel page pool and applies * This function allocates a page from the kernel page pool and applies
* a NUMA policy associated with the VMA or the current process. * a NUMA policy associated with the VMA or the current process.
...@@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, ...@@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
*/ */
struct page * struct page *
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, int node, bool hugepage) unsigned long addr, int node)
{ {
struct mempolicy *pol; struct mempolicy *pol;
struct page *page; struct page *page;
...@@ -2040,32 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, ...@@ -2040,32 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
goto out; goto out;
} }
if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
int hpage_node = node;
/*
* For hugepage allocation and non-interleave policy which
* allows the current node (or other explicitly preferred
* node) we only try to allocate from the current/preferred
* node and don't fall back to other nodes, as the cost of
* remote accesses would likely offset THP benefits.
*
* If the policy is interleave, or does not allow the current
* node in its nodemask, we allocate the standard way.
*/
if (pol->mode == MPOL_PREFERRED &&
!(pol->flags & MPOL_F_LOCAL))
hpage_node = pol->v.preferred_node;
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(hpage_node, *nmask)) {
mpol_cond_put(pol);
page = __alloc_pages_node(hpage_node,
gfp | __GFP_THISNODE, order);
goto out;
}
}
nmask = policy_nodemask(gfp, pol); nmask = policy_nodemask(gfp, pol);
preferred_nid = policy_node(gfp, pol, node); preferred_nid = policy_node(gfp, pol, node);
page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
......
...@@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, ...@@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
shmem_pseudo_vma_init(&pvma, info, hindex); shmem_pseudo_vma_init(&pvma, info, hindex);
page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
shmem_pseudo_vma_destroy(&pvma); shmem_pseudo_vma_destroy(&pvma);
if (page) if (page)
prep_transhuge_page(page); prep_transhuge_page(page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment