btrfs: Switch memory allocations in async csum calculation path to kvmalloc
Recent multi-page biovec rework allowed creation of bios that can span large regions - up to 128 megabytes in the case of btrfs. OTOH btrfs' submission path currently allocates a contiguous array to store the checksums for every bio submitted. This means we can request up to (128mb / BTRFS_SECTOR_SIZE) * 4 bytes + 32bytes of memory from kmalloc. On busy systems with possibly fragmented memory said kmalloc can fail which will trigger BUG_ON due to improper error handling IO submission context in btrfs. Until error handling is improved or bios in btrfs limited to a more manageable size (e.g. 1m) let's use kvmalloc to fallback to vmalloc for such large allocations. There is no hard requirement that the memory allocated for checksums during IO submission has to be contiguous, but this is a simple fix that does not require several non-contiguous allocations. For small writes this is unlikely to have any visible effect since kmalloc will still satisfy allocation requests as usual. For larger requests the code will just fallback to vmalloc. We've performed evaluation on several workload types and there was no significant difference kmalloc vs kvmalloc. Signed-off-by: Nikolay Borisov <nborisov@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
272e5326c7
commit
a3d46aea46
|
@ -7,6 +7,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
|
@ -427,9 +428,13 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
|
|||
unsigned long this_sum_bytes = 0;
|
||||
int i;
|
||||
u64 offset;
|
||||
unsigned nofs_flag;
|
||||
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
|
||||
GFP_KERNEL);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
|
||||
sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
|
||||
GFP_NOFS);
|
||||
if (!sums)
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
|
@ -472,8 +477,10 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
|
|||
|
||||
bytes_left = bio->bi_iter.bi_size - total_bytes;
|
||||
|
||||
sums = kzalloc(btrfs_ordered_sum_size(fs_info, bytes_left),
|
||||
GFP_NOFS);
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
|
||||
bytes_left), GFP_KERNEL);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
BUG_ON(!sums); /* -ENOMEM */
|
||||
sums->len = bytes_left;
|
||||
ordered = btrfs_lookup_ordered_extent(inode,
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "ctree.h"
|
||||
#include "transaction.h"
|
||||
#include "btrfs_inode.h"
|
||||
|
@ -442,7 +443,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
|
|||
cur = entry->list.next;
|
||||
sum = list_entry(cur, struct btrfs_ordered_sum, list);
|
||||
list_del(&sum->list);
|
||||
kfree(sum);
|
||||
kvfree(sum);
|
||||
}
|
||||
kmem_cache_free(btrfs_ordered_extent_cache, entry);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue