diff --git a/arch/riscv/configs/tencent.config b/arch/riscv/configs/tencent.config
index 88bc831f447e..776fe6f23db7 100644
--- a/arch/riscv/configs/tencent.config
+++ b/arch/riscv/configs/tencent.config
@@ -140,7 +140,6 @@ CONFIG_DAMON_SYSFS=y
 CONFIG_DAMON_DBGFS=y
 CONFIG_DAMON_RECLAIM=y
 CONFIG_DAMON_LRU_SORT=y
-CONFIG_PAGECACHE_LIMIT=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6acda1a95807..fb165b7fc01f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -55,6 +55,8 @@ enum memcg_memory_event {
 	MEMCG_SWAP_HIGH,
 	MEMCG_SWAP_MAX,
 	MEMCG_SWAP_FAIL,
+	MEMCG_PAGECACHE_MAX,
+	MEMCG_PAGECACHE_OOM,
 	MEMCG_NR_MEMORY_EVENTS,
 };
 
@@ -237,6 +239,10 @@ struct mem_cgroup {
 		struct page_counter memsw;	/* v1 only */
 	};
 
+	struct page_counter pagecache;
+	u64 pagecache_reclaim_ratio;
+	u32 pagecache_max_ratio;
+
 	/* Legacy consumer-oriented counters */
 	struct page_counter kmem;		/* v1 only */
 	struct page_counter tcpmem;		/* v1 only */
@@ -403,6 +409,21 @@ struct mem_cgroup {
  */
 #define MEMCG_CHARGE_BATCH 64U
 
+/*
+ * Iteration constructs for visiting all cgroups (under a tree).  If
+ * loops are exited prematurely (break), mem_cgroup_iter_break() must
+ * be used for reference counting.
+ */
+#define for_each_mem_cgroup_tree(iter, root)		\
+	for (iter = mem_cgroup_iter(root, NULL, NULL);	\
+	     iter != NULL;				\
+	     iter = mem_cgroup_iter(root, iter, NULL))
+
+#define for_each_mem_cgroup(iter)			\
+	for (iter = mem_cgroup_iter(NULL, NULL, NULL);	\
+	     iter != NULL;				\
+	     iter = mem_cgroup_iter(NULL, iter, NULL))
+
 extern struct mem_cgroup *root_mem_cgroup;
 
 enum page_memcg_data_flags {
@@ -1841,6 +1862,12 @@ int alloc_shrinker_info(struct mem_cgroup *memcg);
 void free_shrinker_info(struct mem_cgroup *memcg);
 void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
 void reparent_shrinker_deferred(struct mem_cgroup *memcg);
+
+extern int sysctl_vm_memory_qos;
+extern unsigned int vm_pagecache_limit_retry_times;
+extern void
+mem_cgroup_shrink_pagecache(struct mem_cgroup *memcg, gfp_t gfp_mask);
+
 #else
 #define mem_cgroup_sockets_enabled 0
 static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ab397f104240..758fa26cdd07 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -847,7 +847,6 @@ struct zone {
 	 */
 	long lowmem_reserve[MAX_NR_ZONES];
 
-#ifdef CONFIG_PAGECACHE_LIMIT
 	/*
 	 * This atomic counter is set when there is pagecache limit
 	 * reclaim going on on this particular zone. Other potential
@@ -855,7 +854,6 @@ struct zone {
 	 * bouncing.
 	 */
 	atomic_t		pagecache_reclaim;
-#endif
 
 #ifdef CONFIG_NUMA
 	int node;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index fd45d18b416c..556339b5b9f3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -407,15 +407,19 @@ extern unsigned long zone_reclaimable_pages(struct zone *zone);
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
 
-#ifdef CONFIG_PAGECACHE_LIMIT
+#define ADDITIONAL_RECLAIM_RATIO 2
 extern int vm_pagecache_limit_ratio;
 extern int vm_pagecache_limit_reclaim_ratio;
 extern unsigned long vm_pagecache_limit_pages;
 extern unsigned long vm_pagecache_limit_reclaim_pages;
 extern unsigned int vm_pagecache_ignore_dirty;
 extern unsigned int vm_pagecache_limit_async;
+extern unsigned int vm_pagecache_limit_global;
 extern unsigned int vm_pagecache_ignore_slab;
 
+extern long shrink_page_cache_memcg(gfp_t mask, struct mem_cgroup *memcg,
+				    unsigned long nr_pages);
+extern unsigned long __pagecache_over_limit(void);
 extern unsigned long pagecache_over_limit(void);
 extern int kpagecache_limitd_run(void);
 extern void kpagecache_limitd_stop(void);
@@ -424,15 +428,6 @@ static inline bool pagecache_limit_should_shrink(void)
 {
 	return unlikely(vm_pagecache_limit_pages) && pagecache_over_limit();
 }
-#else
-extern inline void shrink_page_cache(gfp_t mask, struct page *page)
-{
-}
-static inline bool pagecache_limit_should_shrink(void)
-{
-	return 0;
-}
-#endif
 
 #define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
 #define MEMCG_RECLAIM_PROACTIVE (1 << 2)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 04bf387f126e..bfda2c9871f9 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -87,6 +87,8 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void *,
 int proc_do_large_bitmap(struct ctl_table *, int, void *, size_t *, loff_t *);
 int netcls_do_large_bitmap(struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
+int proc_pagecache_system_usage(struct ctl_table *table, int write,
+				  void __user *buffer, size_t *lenp, loff_t *ppos);
 int proc_do_static_key(struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1589b6dd1818..e118fa269bbb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -592,8 +592,6 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
 			buffer, lenp, ppos, conv, data);
 }
 
-#ifdef CONFIG_PAGECACHE_LIMIT
-#define ADDITIONAL_RECLAIM_RATIO 2
 static int setup_pagecache_limit(void)
 {
 	/* reclaim ADDITIONAL_RECLAIM_PAGES more than limit. */
@@ -661,7 +659,6 @@ static int pc_limit_async_handler(struct ctl_table *table, int write,
 
 	return ret;
 }
-#endif /* CONFIG_PAGECACHE_LIMIT */
 
 static int do_proc_douintvec_w(unsigned int *tbl_data,
 			       struct ctl_table *table,
@@ -2609,6 +2606,8 @@ static struct ctl_table kern_table[] = {
 	{ }
 };
 
+unsigned long vm_pagecache_system_usage;
+
 static struct ctl_table vm_table[] = {
 	{
 		.procname	= "overcommit_memory",
@@ -2852,7 +2851,6 @@ static struct ctl_table vm_table[] = {
 		.extra2		= (void *)&mmap_rnd_compat_bits_max,
 	},
 #endif
-#ifdef CONFIG_PAGECACHE_LIMIT
 	{
 		.procname	= "pagecache_limit_ratio",
 		.data		= &vm_pagecache_limit_ratio,
@@ -2892,8 +2890,32 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-#endif /* CONFIG_PAGECACHE_LIMIT */
 #ifdef CONFIG_MEMCG
+	{
+		.procname	= "pagecache_limit_global",
+		.data		= &vm_pagecache_limit_global,
+		.maxlen		= sizeof(vm_pagecache_limit_global),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "pagecache_limit_retry_times",
+		.data		= &vm_pagecache_limit_retry_times,
+		.maxlen		= sizeof(vm_pagecache_limit_retry_times),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_MAXOLDUID,
+	},
+	{
+		.procname	= "pagecache_system_usage",
+		.data		= &vm_pagecache_system_usage,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0444,
+		.proc_handler	= proc_pagecache_system_usage,
+	},
 	{
 		.procname		= "memory_qos",
 		.data			= &sysctl_vm_memory_qos,
diff --git a/mm/Kconfig b/mm/Kconfig
index 9a5cbcc84873..5ba91156e46f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1271,14 +1271,6 @@ config LOCK_MM_AND_FIND_VMA
 
 source "mm/damon/Kconfig"
 
-config PAGECACHE_LIMIT
-	bool "Page cache limit"
-	help
-	  This options allow user to set a limit for the page cache.
-	  For details, see Documentation/mm/pagecache-limit.
-
-	  If unsure, say N.
-
 config ENHANCED_MM
 	bool "Enable enhanced mm support (EMM)"
 	depends on MEMCG
diff --git a/mm/filemap.c b/mm/filemap.c
index ca878e126e05..7c2078ec803b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -848,6 +848,9 @@ noinline int __filemap_add_folio(struct address_space *mapping,
 	int huge = folio_test_hugetlb(folio);
 	bool charged = false;
 	long nr = 1;
+#ifdef CONFIG_MEMCG
+	struct mem_cgroup *memcg;
+#endif
 
 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
 	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
@@ -861,6 +864,14 @@ noinline int __filemap_add_folio(struct address_space *mapping,
 		charged = true;
 		xas_set_order(&xas, index, folio_order(folio));
 		nr = folio_nr_pages(folio);
+
+#ifdef CONFIG_MEMCG
+		/* For a successful charge, folio->memcg_data must be set. */
+		memcg = folio_memcg(folio);
+
+		for (; memcg; memcg = parent_mem_cgroup(memcg))
+			mem_cgroup_shrink_pagecache(memcg, gfp);
+#endif
 	}
 
 	gfp &= GFP_RECLAIM_MASK;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c74899005708..8486580b23c1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -112,6 +112,11 @@ static bool cgroup_memory_nobpf __ro_after_init;
 static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
 #endif
 
+#define MEMCG_PAGECACHE_RETRIES		20
+#define DEFAULT_PAGE_RECLAIM_RATIO	5
+#define PAGECACHE_MAX_RATIO_MIN		5
+#define PAGECACHE_MAX_RATIO_MAX		100
+
 int sysctl_vm_memory_qos;
 /* default has none reclaim priority */
 int sysctl_vm_qos_highest_reclaim_prio = CGROUP_PRIORITY_MAX;
@@ -254,21 +259,6 @@ enum res_type {
 #define MEMFILE_TYPE(val)	((val) >> 16 & 0xffff)
 #define MEMFILE_ATTR(val)	((val) & 0xffff)
 
-/*
- * Iteration constructs for visiting all cgroups (under a tree).  If
- * loops are exited prematurely (break), mem_cgroup_iter_break() must
- * be used for reference counting.
- */
-#define for_each_mem_cgroup_tree(iter, root)		\
-	for (iter = mem_cgroup_iter(root, NULL, NULL);	\
-	     iter != NULL;				\
-	     iter = mem_cgroup_iter(root, iter, NULL))
-
-#define for_each_mem_cgroup(iter)			\
-	for (iter = mem_cgroup_iter(NULL, NULL, NULL);	\
-	     iter != NULL;				\
-	     iter = mem_cgroup_iter(NULL, iter, NULL))
-
 static inline bool task_is_dying(void)
 {
 	return tsk_is_oom_victim(current) || fatal_signal_pending(current) ||
@@ -890,6 +880,13 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 	__this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
 
 	memcg_rstat_updated(memcg, val);
+
+	if (idx == NR_FILE_PAGES) {
+		if (val > 0)
+			page_counter_charge(&memcg->pagecache, val);
+		else
+			page_counter_uncharge(&memcg->pagecache, -val);
+	}
 	memcg_stats_unlock();
 }
 
@@ -4005,6 +4002,8 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
 }
 #endif
 
+static void pagecache_set_limit(struct mem_cgroup *memcg);
+
 static DEFINE_MUTEX(memcg_max_mutex);
 
 static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
@@ -4062,6 +4061,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
 
 		if (enlarge)
 			memcg_oom_recover(memcg);
+		pagecache_set_limit(memcg);
 	}
 
 	return ret;
@@ -4206,6 +4206,134 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
 	return -EINVAL;
 }
 
+#define MIN_PAGECACHE_PAGES 16
+unsigned int
+vm_pagecache_limit_retry_times __read_mostly = MEMCG_PAGECACHE_RETRIES;
+
+void mem_cgroup_shrink_pagecache(struct mem_cgroup *memcg, gfp_t gfp_mask)
+{
+	long pages_reclaimed;
+	unsigned long pages_used, pages_max, goal_pages_used, pre_used;
+	unsigned int retry_times = 0;
+	unsigned int limit_retry_times;
+	u32 max_ratio;
+
+	if (!sysctl_vm_memory_qos || vm_pagecache_limit_global)
+		return;
+
+	if (!memcg || mem_cgroup_is_root(memcg))
+		return;
+
+	max_ratio = READ_ONCE(memcg->pagecache_max_ratio);
+	if (max_ratio == PAGECACHE_MAX_RATIO_MAX)
+		return;
+
+	pages_max = READ_ONCE(memcg->pagecache.max);
+	if (pages_max == PAGE_COUNTER_MAX)
+		return;
+
+	if (unlikely(task_is_dying()))
+		return;
+
+	if (unlikely(current->flags & PF_MEMALLOC))
+		return;
+
+	if (unlikely(task_in_memcg_oom(current)))
+		return;
+
+	if (!gfpflags_allow_blocking(gfp_mask))
+		return;
+
+	pages_used = page_counter_read(&memcg->pagecache);
+	limit_retry_times = READ_ONCE(vm_pagecache_limit_retry_times);
+	goal_pages_used = (100 - READ_ONCE(memcg->pagecache_reclaim_ratio))
+				* pages_max / 100;
+	goal_pages_used = max_t(unsigned long, MIN_PAGECACHE_PAGES,
+				goal_pages_used);
+
+	if (pages_used > pages_max)
+		memcg_memory_event(memcg, MEMCG_PAGECACHE_MAX);
+
+	while (pages_used > goal_pages_used) {
+		if (fatal_signal_pending(current))
+			break;
+
+		pre_used = pages_used;
+		pages_reclaimed = shrink_page_cache_memcg(gfp_mask, memcg,
+						pages_used - goal_pages_used);
+
+		if (pages_reclaimed == -EINVAL)
+			return;
+
+		if (limit_retry_times == 0)
+			goto next_shrink;
+
+		if (pages_reclaimed == 0) {
+			io_schedule_timeout(HZ/10);
+			retry_times++;
+		} else
+			retry_times = 0;
+
+		if (retry_times > limit_retry_times) {
+			pr_warn("Attempts to recycle many times have not recovered enough pages.\n");
+			break;
+		}
+
+next_shrink:
+		pages_used = page_counter_read(&memcg->pagecache);
+		cond_resched();
+	}
+}
+
+static u64 pagecache_reclaim_ratio_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return memcg->pagecache_reclaim_ratio;
+}
+
+static ssize_t pagecache_reclaim_ratio_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	u64 reclaim_ratio;
+	int ret;
+	unsigned long nr_pages;
+
+	if (!sysctl_vm_memory_qos) {
+		pr_warn("you should open vm.memory_qos.\n");
+		return -EINVAL;
+	}
+
+	if (vm_pagecache_limit_global) {
+		pr_warn("you should clear vm_pagecache_limit_global.\n");
+		return -EINVAL;
+	}
+
+	buf = strstrip(buf);
+	if (!buf)
+		return -EINVAL;
+
+	ret = kstrtou64(buf, 0, &reclaim_ratio);
+	if (ret)
+		return ret;
+
+	if ((reclaim_ratio > 0) && (reclaim_ratio < 100)) {
+		memcg->pagecache_reclaim_ratio = reclaim_ratio;
+		mem_cgroup_shrink_pagecache(memcg, GFP_KERNEL);
+		return nbytes;
+	} else if (reclaim_ratio == 100) {
+		nr_pages = page_counter_read(&memcg->pagecache);
+
+		//try reclaim once
+		shrink_page_cache_memcg(GFP_KERNEL, memcg, nr_pages);
+		return nbytes;
+	}
+
+	return -EINVAL;
+}
+
 static u64 mem_cgroup_priority_oom_read(struct cgroup_subsys_state *css,
 					struct cftype *cft)
 {
@@ -4226,6 +4354,134 @@ static int mem_cgroup_priority_oom_write(struct cgroup_subsys_state *css,
 	return 0;
 }
 
+static u64 pagecache_current_read(struct cgroup_subsys_state *css,
+				struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return (u64)page_counter_read(&memcg->pagecache) * PAGE_SIZE;
+}
+
+static u64 memory_pagecache_max_read(struct cgroup_subsys_state *css,
+				struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return memcg->pagecache_max_ratio;
+}
+
+unsigned long mem_cgroup_pagecache_get_reclaim_pages(struct mem_cgroup *memcg)
+{
+	unsigned long goal_pages_used, pages_used, pages_max;
+
+	if ((!memcg) || (mem_cgroup_is_root(memcg)))
+		return 0;
+
+	pages_max = READ_ONCE(memcg->pagecache.max);
+	if (pages_max == PAGE_COUNTER_MAX)
+		return 0;
+
+	goal_pages_used = (100 - READ_ONCE(memcg->pagecache_reclaim_ratio))
+				* pages_max / 100;
+	goal_pages_used = max_t(unsigned long, MIN_PAGECACHE_PAGES,
+				goal_pages_used);
+	pages_used = page_counter_read(&memcg->pagecache);
+
+	return pages_used > pages_max ? pages_used - goal_pages_used : 0;
+}
+
+static void pagecache_set_limit(struct mem_cgroup *memcg)
+{
+	unsigned long max, pages_max;
+	u32 max_ratio;
+
+	pages_max = READ_ONCE(memcg->memory.max);
+	max_ratio = READ_ONCE(memcg->pagecache_max_ratio);
+	max = ((pages_max * max_ratio) / 100);
+	xchg(&memcg->pagecache.max, max);
+}
+
+static ssize_t memory_pagecache_max_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	unsigned int nr_reclaims = vm_pagecache_limit_retry_times;
+	unsigned long max;
+	long pages_reclaimed;
+	int ret = 0;
+	u64 max_ratio, old;
+
+	if (!sysctl_vm_memory_qos) {
+		pr_warn("you should open vm.memory_qos.\n");
+		return -EINVAL;
+	}
+
+	if (vm_pagecache_limit_global) {
+		pr_warn("you should clear vm_pagecache_limit_global.\n");
+		return -EINVAL;
+	}
+
+	if (!buf)
+		return -EINVAL;
+
+	ret = kstrtou64(buf, 0, &max_ratio);
+	if (ret)
+		return ret;
+
+	if (max_ratio > PAGECACHE_MAX_RATIO_MAX ||
+		max_ratio < PAGECACHE_MAX_RATIO_MIN)
+		return -EINVAL;
+
+	if (READ_ONCE(memcg->memory.max) == PAGE_COUNTER_MAX) {
+		pr_warn("pagecache limit not allowed for cgroup without memory limit set\n");
+		return -EPERM;
+	}
+
+	old = READ_ONCE(memcg->pagecache_max_ratio);
+	memcg->pagecache_max_ratio = max_ratio;
+	pagecache_set_limit(memcg);
+	max = READ_ONCE(memcg->pagecache.max);
+
+	for (;;) {
+		unsigned long pages_used = page_counter_read(&memcg->pagecache);
+
+		if (pages_used <= max)
+			break;
+
+		if (fatal_signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
+		if (nr_reclaims) {
+			pages_reclaimed =
+				shrink_page_cache_memcg(GFP_KERNEL, memcg,
+				mem_cgroup_pagecache_get_reclaim_pages(memcg));
+
+			if (pages_reclaimed == -EINVAL) {
+				pr_warn("you should clear vm_pagecache_limit_global.\n");
+				return -EINVAL;
+			}
+
+			if (pages_reclaimed == 0) {
+				io_schedule_timeout(HZ/10);
+				nr_reclaims--;
+				cond_resched();
+			} else
+				nr_reclaims = vm_pagecache_limit_retry_times;
+
+			continue;
+		}
+
+		memcg->pagecache_max_ratio = old;
+		pagecache_set_limit(memcg);
+		pr_warn("Attempts to recycle many times have not recovered enough pages.\n");
+		return -EINVAL;
+	}
+
+	return ret ? : nbytes;
+}
+
 static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
 {
 	unsigned long val;
@@ -6129,6 +6385,23 @@ static struct cftype mem_cgroup_legacy_files[] = {
 		.write_u64 = mem_cgroup_hierarchy_write,
 		.read_u64 = mem_cgroup_hierarchy_read,
 	},
+	{
+		.name = "pagecache.reclaim_ratio",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = pagecache_reclaim_ratio_read,
+		.write = pagecache_reclaim_ratio_write,
+	},
+	{
+		.name = "pagecache.max_ratio",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = memory_pagecache_max_read,
+		.write = memory_pagecache_max_write,
+	},
+	{
+		.name = "pagecache.current",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = pagecache_current_read,
+	},
 	{
 		.name = "use_priority_oom",
 		.write_u64 = mem_cgroup_priority_oom_write,
@@ -6589,6 +6862,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 
 	page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
 	WRITE_ONCE(memcg->soft_limit, PAGE_COUNTER_MAX);
+	memcg->pagecache_reclaim_ratio = DEFAULT_PAGE_RECLAIM_RATIO;
+	memcg->pagecache_max_ratio = PAGECACHE_MAX_RATIO_MAX;
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
 	memcg->zswap_max = PAGE_COUNTER_MAX;
 #endif
@@ -6619,12 +6894,14 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 		page_counter_init(&memcg->swap, &parent->swap);
 		page_counter_init(&memcg->kmem, &parent->kmem);
 		page_counter_init(&memcg->tcpmem, &parent->tcpmem);
+		page_counter_init(&memcg->pagecache, &parent->pagecache);
 	} else {
 		init_memcg_events();
 		page_counter_init(&memcg->memory, NULL);
 		page_counter_init(&memcg->swap, NULL);
 		page_counter_init(&memcg->kmem, NULL);
 		page_counter_init(&memcg->tcpmem, NULL);
+		page_counter_init(&memcg->pagecache, NULL);
 	}
 
 	setup_async_wmark(memcg);
@@ -6791,6 +7068,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 	page_counter_set_max(&memcg->swap, PAGE_COUNTER_MAX);
 	page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX);
 	page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
+	page_counter_set_max(&memcg->pagecache, PAGE_COUNTER_MAX);
 	page_counter_set_min(&memcg->memory, 0);
 	page_counter_set_low(&memcg->memory, 0);
 	page_counter_set_async_high(&memcg->memory, PAGE_COUNTER_MAX);
@@ -7857,6 +8135,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
 	setup_async_wmark(memcg);
 	if (need_memcg_async_reclaim(memcg))
 		queue_work(memcg_async_reclaim_wq, &memcg->async_work);
+	pagecache_set_limit(memcg);
 
 	memcg_wb_domain_size_changed(memcg);
 	return nbytes;
@@ -7872,6 +8151,10 @@ static void __memory_events_show(struct seq_file *m, atomic_long_t *events)
 		   atomic_long_read(&events[MEMCG_OOM_KILL]));
 	seq_printf(m, "oom_group_kill %lu\n",
 		   atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]));
+	seq_printf(m, "pagecache_max %lu\n",
+		   atomic_long_read(&events[MEMCG_PAGECACHE_MAX]));
+	seq_printf(m, "pagecache_oom %lu\n",
+		   atomic_long_read(&events[MEMCG_PAGECACHE_OOM]));
 }
 
 static int memory_events_show(struct seq_file *m, void *v)
@@ -8114,6 +8397,23 @@ static ssize_t memory_async_wmark_delta_write(struct kernfs_open_file *of,
 }
 
 static struct cftype memory_files[] = {
+	{
+		.name = "pagecache.reclaim_ratio",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = pagecache_reclaim_ratio_read,
+		.write = pagecache_reclaim_ratio_write,
+	},
+	{
+		.name = "pagecache.max_ratio",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = memory_pagecache_max_read,
+		.write = memory_pagecache_max_write,
+	},
+	{
+		.name = "pagecache.current",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = pagecache_current_read,
+	},
 	{
 		.name = "current",
 		.flags = CFTYPE_NOT_ON_ROOT,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index dd7061c8b495..688bd7474141 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -68,6 +68,7 @@
 #include <linux/swapops.h>
 #include <linux/balloon_compaction.h>
 #include <linux/sched/sysctl.h>
+#include <linux/cpumask.h>
 
 #include "internal.h"
 #include "swap.h"
@@ -505,6 +506,24 @@ static bool writeback_throttling_sane(struct scan_control *sc)
 	return false;
 }
 #else
+
+#define sysctl_vm_memory_qos 0
+
+/*
+ * Iteration constructs for visiting all cgroups (under a tree).  If
+ * loops are exited prematurely (break), mem_cgroup_iter_break() must
+ * be used for reference counting.
+ */
+#define for_each_mem_cgroup_tree(iter, root)		\
+	for (iter = mem_cgroup_iter(root, NULL, NULL);	\
+	     iter != NULL;				\
+	     iter = mem_cgroup_iter(root, iter, NULL))
+
+#define for_each_mem_cgroup(iter)			\
+	for (iter = mem_cgroup_iter(NULL, NULL, NULL);	\
+	     iter != NULL;				\
+	     iter = mem_cgroup_iter(NULL, iter, NULL))
+
 static int prealloc_memcg_shrinker(struct shrinker *shrinker)
 {
 	return -ENOSYS;
@@ -7400,69 +7419,97 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 EXPORT_SYMBOL_GPL(try_to_free_mem_cgroup_pages);
 #endif
 
-#ifdef CONFIG_PAGECACHE_LIMIT
 int vm_pagecache_limit_ratio __read_mostly;
 int vm_pagecache_limit_reclaim_ratio __read_mostly;
 unsigned long vm_pagecache_limit_pages __read_mostly;
 unsigned long vm_pagecache_limit_reclaim_pages __read_mostly;
 unsigned int vm_pagecache_ignore_dirty __read_mostly = 1;
 unsigned int vm_pagecache_limit_async __read_mostly;
+unsigned int vm_pagecache_limit_global __read_mostly;
 unsigned int vm_pagecache_ignore_slab __read_mostly = 1;
 static struct task_struct *kpclimitd;
 static bool kpclimitd_context;
 
+extern unsigned long vm_pagecache_system_usage;
+
+unsigned long __pagecache_over_limit(void)
+{
+	unsigned long pgcache_lru_pages = 0;
+	/*
+	 * We only want to limit unmapped and non-shmem page cache pages,
+	 * normally all shmem pages are mapped as well.
+	 */
+	unsigned long pgcache_pages = global_node_page_state(NR_FILE_PAGES)
+				    - max_t(unsigned long,
+					    global_node_page_state(NR_FILE_MAPPED),
+					    global_node_page_state(NR_SHMEM));
+
+	/*
+	 * We certainly can't free more than what's on the LRU lists
+	 * minus the dirty ones.
+	 */
+	if (vm_pagecache_ignore_slab)
+		pgcache_lru_pages = global_node_page_state(NR_ACTIVE_FILE)
+				  + global_node_page_state(NR_INACTIVE_FILE);
+	else
+		pgcache_lru_pages = global_node_page_state(NR_ACTIVE_FILE)
+				  + global_node_page_state(NR_INACTIVE_FILE)
+				  + global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B)
+				  + global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B);
+
+	if (vm_pagecache_ignore_dirty != 0)
+		pgcache_lru_pages -= global_node_page_state(NR_FILE_DIRTY) /
+				     vm_pagecache_ignore_dirty;
+
+	/* Paranoia */
+	if (unlikely(pgcache_lru_pages > LONG_MAX))
+		return 0;
+
+	/* Limit it to 94% of LRU (not all there might be unmapped). */
+	pgcache_lru_pages -= pgcache_lru_pages / 16;
+	if (vm_pagecache_ignore_slab)
+		pgcache_pages = min_t(unsigned long, pgcache_pages, pgcache_lru_pages);
+	else
+		pgcache_pages = pgcache_lru_pages;
+
+	return pgcache_pages;
+}
+
+int proc_pagecache_system_usage(struct ctl_table *table, int write,
+				  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	vm_pagecache_system_usage = __pagecache_over_limit();
+
+	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+}
+
 /*
  * Returns a number that's positive if the pagecache is above
- * the set limit
+ * the set limit.
  */
 unsigned long pagecache_over_limit(void)
 {
 	unsigned long should_reclaim_pages = 0;
 	unsigned long overlimit_pages = 0;
 	unsigned long delta_pages = 0;
-	unsigned long pgcache_lru_pages = 0;
-	/* We only want to limit unmapped and non-shmem page cache pages;
-	 * normally all shmem pages are mapped as well*/
-	unsigned long pgcache_pages = global_node_page_state(NR_FILE_PAGES)
-				    - max_t(unsigned long,
-					    global_node_page_state(NR_FILE_MAPPED),
-					    global_node_page_state(NR_SHMEM));
-	/* We certainly can't free more than what's on the LRU lists
-	 * minus the dirty ones*/
-	if (vm_pagecache_ignore_slab)
-		pgcache_lru_pages = global_node_page_state(NR_ACTIVE_FILE)
-					+ global_node_page_state(NR_INACTIVE_FILE);
-	else
-		pgcache_lru_pages = global_node_page_state(NR_ACTIVE_FILE)
-					+ global_node_page_state(NR_INACTIVE_FILE)
-					+ global_node_page_state(NR_SLAB_RECLAIMABLE_B)
-					+ global_node_page_state(NR_SLAB_UNRECLAIMABLE_B);
+	unsigned long pgcache_pages = 0;
 
-	if (vm_pagecache_ignore_dirty != 0)
-		pgcache_lru_pages -= global_node_page_state(NR_FILE_DIRTY)
-				     /vm_pagecache_ignore_dirty;
-	/* Paranoia */
-	if (unlikely(pgcache_lru_pages > LONG_MAX))
-		return 0;
-
-	/* Limit it to 94% of LRU (not all there might be unmapped) */
-	pgcache_lru_pages -= pgcache_lru_pages/16;
-	if (vm_pagecache_ignore_slab)
-		pgcache_pages = min_t(unsigned long, pgcache_pages, pgcache_lru_pages);
-	else
-		pgcache_pages = pgcache_lru_pages;
+	pgcache_pages = __pagecache_over_limit();
 
 	/*
-	*delta_pages: we should reclaim at least 2% more pages than overlimit_page, values get from
-	*		/proc/vm/pagecache_limit_reclaim_pages
-	*should_reclaim_pages: the real pages we will reclaim, but it should less than pgcache_pages;
-	*/
+	 * delta_pages: we should reclaim at least 2% more pages than overlimit_page,
+	 * values get from /proc/vm/pagecache_limit_reclaim_pages.
+	 * should_reclaim_pages: the real pages we will reclaim,
+	 * but it should less than pgcache_pages.
+	 */
 	if (pgcache_pages > vm_pagecache_limit_pages) {
 		overlimit_pages = pgcache_pages - vm_pagecache_limit_pages;
 		delta_pages = vm_pagecache_limit_reclaim_pages - vm_pagecache_limit_pages;
-		should_reclaim_pages = min_t(unsigned long, delta_pages, vm_pagecache_limit_pages) + overlimit_pages;
+		should_reclaim_pages = min_t(unsigned long, delta_pages, vm_pagecache_limit_pages)
+				     + overlimit_pages;
 		return should_reclaim_pages;
 	}
+
 	return 0;
 }
 
@@ -7648,7 +7695,8 @@ out:
  * This function is similar to shrink_all_memory, except that it may never
  * swap out mapped pages and only does four passes.
  */
-static void __shrink_page_cache(gfp_t mask)
+static unsigned long __shrink_page_cache(gfp_t mask, struct mem_cgroup *memcg,
+					 unsigned long nr_pages)
 {
 	unsigned long ret = 0;
 	int pass = 0;
@@ -7660,11 +7708,10 @@ static void __shrink_page_cache(gfp_t mask)
 		.may_unmap = 0,
 		.may_writepage = 0,
 		.may_deactivate = DEACTIVATE_FILE,
-		.target_mem_cgroup = NULL,
+		.target_mem_cgroup = memcg,
 		.reclaim_idx = MAX_NR_ZONES,
 	};
 	struct reclaim_state *old_rs = current->reclaim_state;
-	long nr_pages;
 
 	/* We might sleep during direct reclaim so make atomic context
 	 * is certainly a bug.
@@ -7672,9 +7719,6 @@ static void __shrink_page_cache(gfp_t mask)
 	BUG_ON(!(mask & __GFP_RECLAIM));
 
 retry:
-	/* How many pages are we over the limit?*/
-	nr_pages = pagecache_over_limit();
-
 	/*
 	 * Return early if there's no work to do.
 	 * Wake up reclaimers that couldn't scan any zone due to congestion.
@@ -7682,7 +7726,7 @@ retry:
 	 * This makes sure that no sleeping reclaimer will stay behind.
 	 * Allow breaching the limit if the task is on the way out.
 	 */
-	if (nr_pages <= 0 || fatal_signal_pending(current)) {
+	if (nr_pages == 0 || fatal_signal_pending(current)) {
 		wake_up_interruptible(&pagecache_reclaim_wq);
 		goto out;
 	}
@@ -7719,9 +7763,10 @@ retry:
 				goto out;
 
 			for_each_online_node(nid) {
-				struct mem_cgroup *memcg = NULL;
-				while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL)
-					shrink_slab(mask, nid, memcg, sc.priority);
+				struct mem_cgroup *iter;
+
+				for_each_mem_cgroup_tree(iter, memcg)
+					shrink_slab(mask, nid, iter, sc.priority);
 			}
 			ret += reclaim_state.reclaimed;
 			reclaim_state.reclaimed = 0;
@@ -7741,8 +7786,11 @@ retry:
 
 out:
 	current->reclaim_state = old_rs;
+	return sc.nr_reclaimed;
 }
 
+void batch_shrink_page_cache(gfp_t mask);
+
 static int kpagecache_limitd(void *data)
 {
 	DEFINE_WAIT(wait);
@@ -7755,7 +7803,9 @@ static int kpagecache_limitd(void *data)
 		wake_up_interruptible(&pagecache_reclaim_wq);
 
 	for (;;) {
-		__shrink_page_cache(GFP_KERNEL);
+		if (pagecache_limit_should_shrink())
+			batch_shrink_page_cache(GFP_KERNEL);
+
 		prepare_to_wait(&kpagecache_limitd_wq, &wait, TASK_INTERRUPTIBLE);
 
 		if (!kthread_should_stop())
@@ -7777,14 +7827,66 @@ static void wakeup_kpclimitd(gfp_t mask)
 	wake_up_interruptible(&kpagecache_limitd_wq);
 }
 
+void batch_shrink_page_cache(gfp_t mask)
+{
+	int reclaim_ratio, goal, retry_limit = 10, retry = 0;
+	unsigned long goals, currents, batchs, reclaims, reclaimed;
+	int tmp_reclaim_ratio = vm_pagecache_limit_reclaim_ratio;
+	int tmp_limit_ratio = vm_pagecache_limit_ratio;
+
+	reclaim_ratio = max_t(int, tmp_reclaim_ratio - tmp_limit_ratio,
+						ADDITIONAL_RECLAIM_RATIO);
+	goal = tmp_limit_ratio - reclaim_ratio;
+	if (goal <= 0)
+		return;
+
+	reclaims = reclaim_ratio * totalram_pages() / 100;
+	if (vm_pagecache_limit_async == 0)
+		batchs = reclaims / num_online_cpus();
+	else
+		batchs = reclaims;
+	goals = goal * totalram_pages() / 100;
+	currents = __pagecache_over_limit();
+
+	while (currents > goals) {
+		if (fatal_signal_pending(current))
+			break;
+
+		reclaimed = __shrink_page_cache(mask, NULL, batchs);
+		if (reclaimed == 0) {
+			io_schedule_timeout(HZ/10);
+			retry++;
+		} else
+			retry = 0;
+
+		if (retry > retry_limit)
+			break;
+
+		currents = __pagecache_over_limit();
+		cond_resched();
+	}
+}
+
 void shrink_page_cache(gfp_t mask, struct page *page)
 {
-	if (0 == vm_pagecache_limit_async)
-		__shrink_page_cache(mask);
+	if (!sysctl_vm_memory_qos || !vm_pagecache_limit_global)
+		return;
+
+	if (vm_pagecache_limit_async == 0)
+		batch_shrink_page_cache(mask);
 	else
 		wakeup_kpclimitd(mask);
 }
 
+long shrink_page_cache_memcg(gfp_t mask, struct mem_cgroup *memcg,
+			     unsigned long nr_pages)
+{
+	if (!vm_pagecache_limit_global)
+		return __shrink_page_cache(mask, memcg, nr_pages);
+
+	return -EINVAL;
+}
+
 int kpagecache_limitd_run(void)
 {
 	int ret = 0;
@@ -7809,7 +7911,6 @@ void kpagecache_limitd_stop(void)
 		kpclimitd = NULL;
 	}
 }
-#endif /* CONFIG_PAGECACHE_LIMIT */
 
 static void kswapd_age_node(struct pglist_data *pgdat, struct scan_control *sc)
 {
@@ -8053,6 +8154,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
 		.order = order,
 		.may_unmap = 1,
 	};
+	unsigned long nr_pages;
 
 	set_task_reclaim_state(current, &sc.reclaim_state);
 	psi_memstall_enter(&pflags);
@@ -8060,11 +8162,12 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
 
 	count_vm_event(PAGEOUTRUN);
 
-#ifdef CONFIG_PAGECACHE_LIMIT
 	/* This reclaims from all zones so don't count to sc.nr_reclaimed */
-	if (pagecache_limit_should_shrink())
-		__shrink_page_cache(GFP_KERNEL);
-#endif /* CONFIG_PAGECACHE_LIMIT */
+	if (pagecache_limit_should_shrink()) {
+		nr_pages = pagecache_over_limit();
+		if (nr_pages)
+			shrink_page_cache(GFP_KERNEL, NULL);
+	}
 
 	/*
 	 * Account for the reclaim boost. Note that the zone boost is left in