Commit 2761ece8 authored by Naohiro Aota's avatar Naohiro Aota Committed by David Sterba

btrfs: introduce offload_csum_mode to tweak checksum offloading behavior

We disable offloading checksum to workqueues and do it synchronously when
the checksum algorithm is fast. However, as reported in the link below,
RAID0 with multiple devices may suffer from the sync checksum, because
"fast checksum" is still not fast enough to catch up with RAID0 writing.

We don't have an effective way to determine whether to offload or not,
for now add a sysfs knob so this can be debugged. This is intentionally
under CONFIG_BTRFS_DEBUG so ti's not exposed to users as it may be
removed in the future agin.

Introduce fs_devices->offload_csum_mode, so that a btrfs developer can
change the behavior by writing to /sys/fs/btrfs/<uuid>/offload_csum. The
default is "auto" which is the same as the previous behavior. Or, you
can set "on" or "off" (or "y" or "n" whatever kstrtobool() accepts) to
always/never offload checksum.

More benchmark need to be collected with this knob to implement a proper
criteria to enable/disable checksum offloading.

Link: https://lore.kernel.org/linux-btrfs/20230731152223.4EFB.409509F4@e16-tech.com/
Link: https://lore.kernel.org/linux-btrfs/p3vo3g7pqn664mhmdhlotu5dzcna6vjtcoc2hb2lsgo2fwct7k@xzaxclba5tae/Reviewed-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: default avatarNaohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent b2324e08
...@@ -608,8 +608,20 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free) ...@@ -608,8 +608,20 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
static bool should_async_write(struct btrfs_bio *bbio) static bool should_async_write(struct btrfs_bio *bbio)
{ {
bool auto_csum_mode = true;
#ifdef CONFIG_BTRFS_DEBUG
struct btrfs_fs_devices *fs_devices = bbio->fs_info->fs_devices;
enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);
if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_OFF)
return false;
auto_csum_mode = (csum_mode == BTRFS_OFFLOAD_CSUM_AUTO);
#endif
/* Submit synchronously if the checksum implementation is fast. */ /* Submit synchronously if the checksum implementation is fast. */
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags)) if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
return false; return false;
/* /*
......
...@@ -1307,6 +1307,47 @@ static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj, ...@@ -1307,6 +1307,47 @@ static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show, BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
btrfs_bg_reclaim_threshold_store); btrfs_bg_reclaim_threshold_store);
#ifdef CONFIG_BTRFS_DEBUG
static ssize_t btrfs_offload_csum_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
switch (READ_ONCE(fs_devices->offload_csum_mode)) {
case BTRFS_OFFLOAD_CSUM_AUTO:
return sysfs_emit(buf, "auto\n");
case BTRFS_OFFLOAD_CSUM_FORCE_ON:
return sysfs_emit(buf, "1\n");
case BTRFS_OFFLOAD_CSUM_FORCE_OFF:
return sysfs_emit(buf, "0\n");
default:
WARN_ON(1);
return -EINVAL;
}
}
static ssize_t btrfs_offload_csum_store(struct kobject *kobj,
struct kobj_attribute *a, const char *buf,
size_t len)
{
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
int ret;
bool val;
ret = kstrtobool(buf, &val);
if (ret == 0)
WRITE_ONCE(fs_devices->offload_csum_mode,
val ? BTRFS_OFFLOAD_CSUM_FORCE_ON : BTRFS_OFFLOAD_CSUM_FORCE_OFF);
else if (ret == -EINVAL && sysfs_streq(buf, "auto"))
WRITE_ONCE(fs_devices->offload_csum_mode, BTRFS_OFFLOAD_CSUM_AUTO);
else
return -EINVAL;
return len;
}
BTRFS_ATTR_RW(, offload_csum, btrfs_offload_csum_show, btrfs_offload_csum_store);
#endif
/* /*
* Per-filesystem information and stats. * Per-filesystem information and stats.
* *
...@@ -1326,6 +1367,9 @@ static const struct attribute *btrfs_attrs[] = { ...@@ -1326,6 +1367,9 @@ static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, bg_reclaim_threshold), BTRFS_ATTR_PTR(, bg_reclaim_threshold),
BTRFS_ATTR_PTR(, commit_stats), BTRFS_ATTR_PTR(, commit_stats),
BTRFS_ATTR_PTR(, temp_fsid), BTRFS_ATTR_PTR(, temp_fsid),
#ifdef CONFIG_BTRFS_DEBUG
BTRFS_ATTR_PTR(, offload_csum),
#endif
NULL, NULL,
}; };
......
...@@ -291,6 +291,25 @@ enum btrfs_read_policy { ...@@ -291,6 +291,25 @@ enum btrfs_read_policy {
BTRFS_NR_READ_POLICY, BTRFS_NR_READ_POLICY,
}; };
#ifdef CONFIG_BTRFS_DEBUG
/*
* Checksum mode - offload it to workqueues or do it synchronously in
* btrfs_submit_chunk().
*/
enum btrfs_offload_csum_mode {
/*
* Choose offloading checksum or do it synchronously automatically.
* Do it synchronously if the checksum is fast, or offload to workqueues
* otherwise.
*/
BTRFS_OFFLOAD_CSUM_AUTO,
/* Always offload checksum to workqueues. */
BTRFS_OFFLOAD_CSUM_FORCE_ON,
/* Never offload checksum to workqueues. */
BTRFS_OFFLOAD_CSUM_FORCE_OFF,
};
#endif
struct btrfs_fs_devices { struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
...@@ -395,6 +414,11 @@ struct btrfs_fs_devices { ...@@ -395,6 +414,11 @@ struct btrfs_fs_devices {
/* Policy used to read the mirrored stripes. */ /* Policy used to read the mirrored stripes. */
enum btrfs_read_policy read_policy; enum btrfs_read_policy read_policy;
#ifdef CONFIG_BTRFS_DEBUG
/* Checksum mode - offload it or do it synchronously. */
enum btrfs_offload_csum_mode offload_csum_mode;
#endif
}; };
#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \ #define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment