Commit d882962f authored by Matthew L. Creech's avatar Matthew L. Creech Committed by Artem Bityutskiy

UBIFS: handle allocation failures in UBIFS write path

Running kernel 2.6.37, my PPC-based device occasionally gets an
order-2 allocation failure in UBIFS, which causes the root FS to
become unwritable:

kswapd0: page allocation failure. order:2, mode:0x4050
Call Trace:
[c787dc30] [c00085b8] show_stack+0x7c/0x194 (unreliable)
[c787dc70] [c0061aec] __alloc_pages_nodemask+0x4f0/0x57c
[c787dd00] [c0061b98] __get_free_pages+0x20/0x50
[c787dd10] [c00e4f88] ubifs_jnl_write_data+0x54/0x200
[c787dd50] [c00e82d4] do_writepage+0x94/0x198
[c787dd90] [c00675e4] shrink_page_list+0x40c/0x77c
[c787de40] [c0067de0] shrink_inactive_list+0x1e0/0x370
[c787de90] [c0068224] shrink_zone+0x2b4/0x2b8
[c787df00] [c0068854] kswapd+0x408/0x5d4
[c787dfb0] [c0037bcc] kthread+0x80/0x84
[c787dff0] [c000ef44] kernel_thread+0x4c/0x68

Similar problems were encountered last April by Tomasz Stanislawski:

http://patchwork.ozlabs.org/patch/50965/

This patch implements Artem's suggested fix: fall back to a
mutex-protected static buffer, allocated at mount time.  I tested it
by forcing execution down the failure path, and didn't see any ill
effects.

Artem: massaged the patch a little, improved it so that we'd not
allocate the write reserve buffer when we are in R/O mode.
Signed-off-by: default avatarMatthew L. Creech <mlcreech@gmail.com>
Signed-off-by: default avatarArtem Bityutskiy <Artem.Bityutskiy@nokia.com>
parent 2765df7d
...@@ -690,7 +690,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, ...@@ -690,7 +690,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
{ {
struct ubifs_data_node *data; struct ubifs_data_node *data;
int err, lnum, offs, compr_type, out_len; int err, lnum, offs, compr_type, out_len;
int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_inode *ui = ubifs_inode(inode);
dbg_jnl("ino %lu, blk %u, len %d, key %s", dbg_jnl("ino %lu, blk %u, len %d, key %s",
...@@ -698,9 +698,19 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, ...@@ -698,9 +698,19 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
DBGKEY(key)); DBGKEY(key));
ubifs_assert(len <= UBIFS_BLOCK_SIZE); ubifs_assert(len <= UBIFS_BLOCK_SIZE);
data = kmalloc(dlen, GFP_NOFS); data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
if (!data) if (!data) {
return -ENOMEM; /*
* Fall-back to the write reserve buffer. Note, we might be
* currently on the memory reclaim path, when the kernel is
* trying to free some memory by writing out dirty pages. The
* write reserve buffer helps us to guarantee that we are
* always able to write the data.
*/
allocated = 0;
mutex_lock(&c->write_reserve_mutex);
data = c->write_reserve_buf;
}
data->ch.node_type = UBIFS_DATA_NODE; data->ch.node_type = UBIFS_DATA_NODE;
key_write(c, key, &data->key); key_write(c, key, &data->key);
...@@ -736,6 +746,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, ...@@ -736,6 +746,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
goto out_ro; goto out_ro;
finish_reservation(c); finish_reservation(c);
if (!allocated)
mutex_unlock(&c->write_reserve_mutex);
else
kfree(data); kfree(data);
return 0; return 0;
...@@ -745,6 +758,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, ...@@ -745,6 +758,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
ubifs_ro_mode(c, err); ubifs_ro_mode(c, err);
finish_reservation(c); finish_reservation(c);
out_free: out_free:
if (!allocated)
mutex_unlock(&c->write_reserve_mutex);
else
kfree(data); kfree(data);
return err; return err;
} }
......
...@@ -1213,6 +1213,13 @@ static int mount_ubifs(struct ubifs_info *c) ...@@ -1213,6 +1213,13 @@ static int mount_ubifs(struct ubifs_info *c)
if (c->bulk_read == 1) if (c->bulk_read == 1)
bu_init(c); bu_init(c);
if (!c->ro_mount) {
c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ,
GFP_KERNEL);
if (!c->write_reserve_buf)
goto out_free;
}
c->mounting = 1; c->mounting = 1;
err = ubifs_read_superblock(c); err = ubifs_read_superblock(c);
...@@ -1482,6 +1489,7 @@ static int mount_ubifs(struct ubifs_info *c) ...@@ -1482,6 +1489,7 @@ static int mount_ubifs(struct ubifs_info *c)
out_cbuf: out_cbuf:
kfree(c->cbuf); kfree(c->cbuf);
out_free: out_free:
kfree(c->write_reserve_buf);
kfree(c->bu.buf); kfree(c->bu.buf);
vfree(c->ileb_buf); vfree(c->ileb_buf);
vfree(c->sbuf); vfree(c->sbuf);
...@@ -1520,6 +1528,7 @@ static void ubifs_umount(struct ubifs_info *c) ...@@ -1520,6 +1528,7 @@ static void ubifs_umount(struct ubifs_info *c)
kfree(c->cbuf); kfree(c->cbuf);
kfree(c->rcvrd_mst_node); kfree(c->rcvrd_mst_node);
kfree(c->mst_node); kfree(c->mst_node);
kfree(c->write_reserve_buf);
kfree(c->bu.buf); kfree(c->bu.buf);
vfree(c->ileb_buf); vfree(c->ileb_buf);
vfree(c->sbuf); vfree(c->sbuf);
...@@ -1605,6 +1614,10 @@ static int ubifs_remount_rw(struct ubifs_info *c) ...@@ -1605,6 +1614,10 @@ static int ubifs_remount_rw(struct ubifs_info *c)
goto out; goto out;
} }
c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL);
if (!c->write_reserve_buf)
goto out;
err = ubifs_lpt_init(c, 0, 1); err = ubifs_lpt_init(c, 0, 1);
if (err) if (err)
goto out; goto out;
...@@ -1669,6 +1682,8 @@ static int ubifs_remount_rw(struct ubifs_info *c) ...@@ -1669,6 +1682,8 @@ static int ubifs_remount_rw(struct ubifs_info *c)
c->bgt = NULL; c->bgt = NULL;
} }
free_wbufs(c); free_wbufs(c);
kfree(c->write_reserve_buf);
c->write_reserve_buf = NULL;
vfree(c->ileb_buf); vfree(c->ileb_buf);
c->ileb_buf = NULL; c->ileb_buf = NULL;
ubifs_lpt_free(c, 1); ubifs_lpt_free(c, 1);
...@@ -1712,6 +1727,8 @@ static void ubifs_remount_ro(struct ubifs_info *c) ...@@ -1712,6 +1727,8 @@ static void ubifs_remount_ro(struct ubifs_info *c)
free_wbufs(c); free_wbufs(c);
vfree(c->orph_buf); vfree(c->orph_buf);
c->orph_buf = NULL; c->orph_buf = NULL;
kfree(c->write_reserve_buf);
c->write_reserve_buf = NULL;
vfree(c->ileb_buf); vfree(c->ileb_buf);
c->ileb_buf = NULL; c->ileb_buf = NULL;
ubifs_lpt_free(c, 1); ubifs_lpt_free(c, 1);
...@@ -1942,6 +1959,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1942,6 +1959,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&c->mst_mutex); mutex_init(&c->mst_mutex);
mutex_init(&c->umount_mutex); mutex_init(&c->umount_mutex);
mutex_init(&c->bu_mutex); mutex_init(&c->bu_mutex);
mutex_init(&c->write_reserve_mutex);
init_waitqueue_head(&c->cmt_wq); init_waitqueue_head(&c->cmt_wq);
c->buds = RB_ROOT; c->buds = RB_ROOT;
c->old_idx = RB_ROOT; c->old_idx = RB_ROOT;
......
...@@ -151,6 +151,12 @@ ...@@ -151,6 +151,12 @@
*/ */
#define WORST_COMPR_FACTOR 2 #define WORST_COMPR_FACTOR 2
/*
* How much memory is needed for a buffer where we comress a data node.
*/
#define COMPRESSED_DATA_NODE_BUF_SZ \
(UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
/* Maximum expected tree height for use by bottom_up_buf */ /* Maximum expected tree height for use by bottom_up_buf */
#define BOTTOM_UP_HEIGHT 64 #define BOTTOM_UP_HEIGHT 64
...@@ -1005,6 +1011,11 @@ struct ubifs_debug_info; ...@@ -1005,6 +1011,11 @@ struct ubifs_debug_info;
* @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
* @bu: pre-allocated bulk-read information * @bu: pre-allocated bulk-read information
* *
* @write_reserve_mutex: protects @write_reserve_buf
* @write_reserve_buf: on the write path we allocate memory, which might
* sometimes be unavailable, in which case we use this
* write reserve buffer
*
* @log_lebs: number of logical eraseblocks in the log * @log_lebs: number of logical eraseblocks in the log
* @log_bytes: log size in bytes * @log_bytes: log size in bytes
* @log_last: last LEB of the log * @log_last: last LEB of the log
...@@ -1256,6 +1267,9 @@ struct ubifs_info { ...@@ -1256,6 +1267,9 @@ struct ubifs_info {
struct mutex bu_mutex; struct mutex bu_mutex;
struct bu_info bu; struct bu_info bu;
struct mutex write_reserve_mutex;
void *write_reserve_buf;
int log_lebs; int log_lebs;
long long log_bytes; long long log_bytes;
int log_last; int log_last;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment