diff options
Diffstat (limited to '0005-btrfs-shrink-delalloc-pages-instead-of-full-inodes.patch')
-rw-r--r-- | 0005-btrfs-shrink-delalloc-pages-instead-of-full-inodes.patch | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/0005-btrfs-shrink-delalloc-pages-instead-of-full-inodes.patch b/0005-btrfs-shrink-delalloc-pages-instead-of-full-inodes.patch new file mode 100644 index 0000000..0d02fab --- /dev/null +++ b/0005-btrfs-shrink-delalloc-pages-instead-of-full-inodes.patch @@ -0,0 +1,184 @@ +From 0397aa4e6205543d9b3da11794037e94f8735867 Mon Sep 17 00:00:00 2001 +From: Josef Bacik <josef@toxicpanda.com> +Date: Mon, 4 Jan 2021 15:24:11 -0500 +Subject: btrfs: shrink delalloc pages instead of full inodes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Commit 38d715f494f2 ("btrfs: use btrfs_start_delalloc_roots in +shrink_delalloc") cleaned up how we do delalloc shrinking by utilizing +some infrastructure we have in place to flush inodes that we use for +device replace and snapshot. However this introduced a pretty serious +performance regression. The root cause is because before we would +generally use the normal writeback path to reclaim delalloc space, and +for this we would provide it with the number of pages we wanted to +flush. The referenced commit changed this to flush that many inodes, +which drastically increased the amount of space we were flushing in +certain cases, which severely affected performance. + +We cannot revert this patch unfortunately, because Filipe has another +fix that requires the ability to skip flushing inodes that are being +cloned in certain scenarios, which means we need to keep using our +flushing infrastructure or risk re-introducing the deadlock. + +Instead to fix this problem we can go back to providing +btrfs_start_delalloc_roots with a number of pages to flush, and then set +up a writeback_control and utilize sync_inode() to handle the flushing +for us. This gives us the same behavior we had prior to the fix, while +still allowing us to avoid the deadlock that was fixed by Filipe. The +user reported reproducer was a simple untarring of a large tarball of +the source code for Firefox. The results are as follows + +5.9 0m54.258s +5.10 1m26.212s +Patch 0m38.800s + +We are significantly faster because of the work I did around improving +ENOSPC flushing in 5.10 and 5.11, so reverting to the previous write out +behavior gave us a pretty big boost. + +CC: stable@vger.kernel.org # 5.10 +Reported-by: René Rebe <rene@exactcode.de> +Fixes: 38d715f494f2 ("btrfs: use btrfs_start_delalloc_roots in shrink_delalloc") +Signed-off-by: Josef Bacik <josef@toxicpanda.com> +--- + fs/btrfs/inode.c | 60 ++++++++++++++++++++++++++++++++++++--------------- + fs/btrfs/space-info.c | 4 +++- + 2 files changed, 46 insertions(+), 18 deletions(-) + +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index 07479250221d..acc47e2ffb46 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -9389,7 +9389,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode + * some fairly slow code that needs optimization. This walks the list + * of all the inodes with pending delalloc and forces them to disk. + */ +-static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot, ++static int start_delalloc_inodes(struct btrfs_root *root, ++ struct writeback_control *wbc, bool snapshot, + bool in_reclaim_context) + { + struct btrfs_inode *binode; +@@ -9398,6 +9399,7 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot + struct list_head works; + struct list_head splice; + int ret = 0; ++ bool full_flush = wbc->nr_to_write == LONG_MAX; + + INIT_LIST_HEAD(&works); + INIT_LIST_HEAD(&splice); +@@ -9426,18 +9428,24 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot + if (snapshot) + set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, + &binode->runtime_flags); +- work = btrfs_alloc_delalloc_work(inode); +- if (!work) { +- iput(inode); +- ret = -ENOMEM; +- goto out; +- } +- list_add_tail(&work->list, &works); +- btrfs_queue_work(root->fs_info->flush_workers, +- &work->work); +- if (*nr != U64_MAX) { +- (*nr)--; +- if (*nr == 0) ++ if (full_flush) { ++ work = btrfs_alloc_delalloc_work(inode); ++ if (!work) { ++ iput(inode); ++ ret = -ENOMEM; ++ goto out; ++ } ++ list_add_tail(&work->list, &works); ++ btrfs_queue_work(root->fs_info->flush_workers, ++ &work->work); ++ } else { ++ ret = sync_inode(inode, wbc); ++ if (!ret && ++ test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, ++ &BTRFS_I(inode)->runtime_flags)) ++ ret = sync_inode(inode, wbc); ++ btrfs_add_delayed_iput(inode); ++ if (ret || wbc->nr_to_write <= 0) + goto out; + } + cond_resched(); +@@ -9463,18 +9471,29 @@ out: + + int btrfs_start_delalloc_snapshot(struct btrfs_root *root) + { ++ struct writeback_control wbc = { ++ .nr_to_write = LONG_MAX, ++ .sync_mode = WB_SYNC_NONE, ++ .range_start = 0, ++ .range_end = LLONG_MAX, ++ }; + struct btrfs_fs_info *fs_info = root->fs_info; +- u64 nr = U64_MAX; + + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) + return -EROFS; + +- return start_delalloc_inodes(root, &nr, true, false); ++ return start_delalloc_inodes(root, &wbc, true, false); + } + + int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, + bool in_reclaim_context) + { ++ struct writeback_control wbc = { ++ .nr_to_write = (nr == U64_MAX) ? LONG_MAX : (unsigned long)nr, ++ .sync_mode = WB_SYNC_NONE, ++ .range_start = 0, ++ .range_end = LLONG_MAX, ++ }; + struct btrfs_root *root; + struct list_head splice; + int ret; +@@ -9488,6 +9507,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, + spin_lock(&fs_info->delalloc_root_lock); + list_splice_init(&fs_info->delalloc_roots, &splice); + while (!list_empty(&splice) && nr) { ++ /* ++ * Reset nr_to_write here so we know that we're doing a full ++ * flush. ++ */ ++ if (nr == U64_MAX) ++ wbc.nr_to_write = LONG_MAX; ++ + root = list_first_entry(&splice, struct btrfs_root, + delalloc_root); + root = btrfs_grab_root(root); +@@ -9496,9 +9522,9 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, + &fs_info->delalloc_roots); + spin_unlock(&fs_info->delalloc_root_lock); + +- ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context); ++ ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context); + btrfs_put_root(root); +- if (ret < 0) ++ if (ret < 0 || wbc.nr_to_write <= 0) + goto out; + spin_lock(&fs_info->delalloc_root_lock); + } +diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c +index 67e55c5479b8..e8347461c8dd 100644 +--- a/fs/btrfs/space-info.c ++++ b/fs/btrfs/space-info.c +@@ -532,7 +532,9 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, + + loops = 0; + while ((delalloc_bytes || dio_bytes) && loops < 3) { +- btrfs_start_delalloc_roots(fs_info, items, true); ++ u64 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; ++ ++ btrfs_start_delalloc_roots(fs_info, nr_pages, true); + + loops++; + if (wait_ordered && !trans) { +-- +cgit v1.2.3-1-gf6bb5 + |