diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index 5724db0a5d..075e85d121 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -894,6 +894,7 @@ extern int arc_lotsfree_percent; extern void arc_reduce_target_size(int64_t to_free); extern boolean_t arc_reclaim_needed(void); extern void arc_kmem_reap_soon(void); +extern boolean_t arc_is_overflowing(void); extern void arc_lowmem_init(void); extern void arc_lowmem_fini(void); diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c index e34d4ae080..9ac4e3221c 100644 --- a/module/os/linux/zfs/arc_os.c +++ b/module/os/linux/zfs/arc_os.c @@ -268,6 +268,24 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc) */ if (pages > 0) { arc_reduce_target_size(ptob(sc->nr_to_scan)); + + /* + * Repeated calls to the arc shrinker can reduce arc_c + * drastically, potentially all the way to arc_c_min. While + * arc_c is below arc_size, ZFS can't process read/write + * requests, because arc_get_data_impl() will block. To + * ensure that arc_c doesn't shrink faster than the adjust + * thread can keep up, we wait for eviction here. + */ + mutex_enter(&arc_adjust_lock); + if (arc_is_overflowing()) { + arc_adjust_needed = B_TRUE; + zthr_wakeup(arc_adjust_zthr); + (void) cv_wait(&arc_adjust_waiters_cv, + &arc_adjust_lock); + } + mutex_exit(&arc_adjust_lock); + if (current_is_kswapd()) arc_kmem_reap_soon(); #ifdef HAVE_SPLIT_SHRINKER_CALLBACK diff --git a/module/zfs/arc.c b/module/zfs/arc.c index a11499703c..e75c1e453d 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -853,7 +853,6 @@ static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag); static void arc_hdr_free_abd(arc_buf_hdr_t *, boolean_t); static void arc_hdr_alloc_abd(arc_buf_hdr_t *, boolean_t); static void arc_access(arc_buf_hdr_t *, kmutex_t *); -static boolean_t arc_is_overflowing(void); static void arc_buf_watch(arc_buf_t *); static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *); @@ -3995,6 +3994,15 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker, multilist_sublist_unlock(mls); + /* + * If the ARC size is reduced from arc_c_max to arc_c_min (especially + * if the average cached block is small), eviction can be on-CPU for + * many seconds. To ensure that other threads that may be bound to + * this CPU are able to make progress, make a voluntary preemption + * call here. + */ + cond_resched(); + return (bytes_evicted); } @@ -4992,7 +5000,7 @@ arc_adapt(int bytes, arc_state_t *state) * Check if arc_size has grown past our upper threshold, determined by * zfs_arc_overflow_shift. */ -static boolean_t +boolean_t arc_is_overflowing(void) { /* Always allow at least one block of overflow */