Commit 80e675f9 authored by Eric Gouriou's avatar Eric Gouriou Committed by Theodore Ts'o

ext4: optimize memmmove lengths in extent/index insertions

ext4_ext_insert_extent() (respectively ext4_ext_insert_index())
was using EXT_MAX_EXTENT() (resp. EXT_MAX_INDEX()) to determine
how many entries needed to be moved beyond the insertion point.
In practice this means that (320 - I) * 24 bytes were memmove()'d
when I is the insertion point, rather than (#entries - I) * 24 bytes.

This patch uses EXT_LAST_EXTENT() (resp. EXT_LAST_INDEX()) instead
to only move existing entries. The code flow is also simplified
slightly to highlight similarities and reduce code duplication in
the insertion logic.

This patch reduces system CPU consumption by over 25% on a 4kB
synchronous append DIO write workload when used with the
pre-2.6.39 x86_64 memmove() implementation. With the much faster
2.6.39 memmove() implementation we still see a decrease in
system CPU usage between 2% and 7%.

Note that the ext_debug() output changes with this patch, splitting
some log information between entries. Users of the ext_debug() output
should note that the "move %d" units changed from reporting the number
of bytes moved to reporting the number of entries moved.
Signed-off-by: default avatarEric Gouriou <egouriou@google.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 6f91bc5f
...@@ -754,31 +754,25 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, ...@@ -754,31 +754,25 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
return -EIO; return -EIO;
} }
len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
if (logical > le32_to_cpu(curp->p_idx->ei_block)) { if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
/* insert after */ /* insert after */
if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { ext_debug("insert new index %d after: %llu\n", logical, ptr);
len = (len - 1) * sizeof(struct ext4_extent_idx);
len = len < 0 ? 0 : len;
ext_debug("insert new index %d after: %llu. "
"move %d from 0x%p to 0x%p\n",
logical, ptr, len,
(curp->p_idx + 1), (curp->p_idx + 2));
memmove(curp->p_idx + 2, curp->p_idx + 1, len);
}
ix = curp->p_idx + 1; ix = curp->p_idx + 1;
} else { } else {
/* insert before */ /* insert before */
len = len * sizeof(struct ext4_extent_idx); ext_debug("insert new index %d before: %llu\n", logical, ptr);
len = len < 0 ? 0 : len;
ext_debug("insert new index %d before: %llu. "
"move %d from 0x%p to 0x%p\n",
logical, ptr, len,
curp->p_idx, (curp->p_idx + 1));
memmove(curp->p_idx + 1, curp->p_idx, len);
ix = curp->p_idx; ix = curp->p_idx;
} }
len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
BUG_ON(len < 0);
if (len > 0) {
ext_debug("insert new index %d: "
"move %d indices from 0x%p to 0x%p\n",
logical, len, ix, ix + 1);
memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
}
if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
return -EIO; return -EIO;
...@@ -1779,41 +1773,46 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, ...@@ -1779,41 +1773,46 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
ext4_ext_pblock(newext), ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext), ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext)); ext4_ext_get_actual_len(newext));
path[depth].p_ext = EXT_FIRST_EXTENT(eh); nearex = EXT_FIRST_EXTENT(eh);
} else if (le32_to_cpu(newext->ee_block) } else {
if (le32_to_cpu(newext->ee_block)
> le32_to_cpu(nearex->ee_block)) { > le32_to_cpu(nearex->ee_block)) {
/* BUG_ON(newext->ee_block == nearex->ee_block); */ /* Insert after */
if (nearex != EXT_LAST_EXTENT(eh)) { ext_debug("insert %d:%llu:[%d]%d %s before: "
len = EXT_MAX_EXTENT(eh) - nearex; "nearest 0x%p\n"
len = (len - 1) * sizeof(struct ext4_extent); le32_to_cpu(newext->ee_block),
len = len < 0 ? 0 : len; ext4_ext_pblock(newext),
ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " ext4_ext_is_uninitialized(newext),
"move %d from 0x%p to 0x%p\n", ext4_ext_get_actual_len(newext),
nearex);
nearex++;
} else {
/* Insert before */
BUG_ON(newext->ee_block == nearex->ee_block);
ext_debug("insert %d:%llu:[%d]%d %s after: "
"nearest 0x%p\n"
le32_to_cpu(newext->ee_block), le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext), ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext), ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext), ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2); nearex);
memmove(nearex + 2, nearex + 1, len); }
len = EXT_LAST_EXTENT(eh) - nearex + 1;
if (len > 0) {
ext_debug("insert %d:%llu:[%d]%d: "
"move %d extents from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
len, nearex, nearex + 1);
memmove(nearex + 1, nearex,
len * sizeof(struct ext4_extent));
} }
path[depth].p_ext = nearex + 1;
} else {
BUG_ON(newext->ee_block == nearex->ee_block);
len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
len = len < 0 ? 0 : len;
ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
nearex, len, nearex, nearex + 1);
memmove(nearex + 1, nearex, len);
path[depth].p_ext = nearex;
} }
le16_add_cpu(&eh->eh_entries, 1); le16_add_cpu(&eh->eh_entries, 1);
nearex = path[depth].p_ext; path[depth].p_ext = nearex;
nearex->ee_block = newext->ee_block; nearex->ee_block = newext->ee_block;
ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
nearex->ee_len = newext->ee_len; nearex->ee_len = newext->ee_len;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment