mm, memory_hotplug: remove timeout from __offline_memory
We have a hardcoded 120s timeout after which the memory offline fails basically since the hot remove has been introduced. This is essentially a policy implemented in the kernel. Moreover there is no way to adjust the timeout and so we are sometimes facing memory offline failures if the system is under a heavy memory pressure or very intensive CPU workload on large machines. It is not very clear what purpose the timeout actually serves. The offline operation is interruptible by a signal so if userspace wants some timeout based termination this can be done trivially by sending a signal. If there is a strong usecase to do this from the kernel then we should do it properly and have a it tunable from the userspace with the timeout disabled by default along with the explanation who uses it and for what purporse. Link: http://lkml.kernel.org/r/20170918070834.13083-3-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Reza Arbab <arbab@linux.vnet.ibm.com> Cc: Yasuaki Ishimatsu <yasu.isimatu@gmail.com> Cc: Xishi Qiu <qiuxishi@huawei.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: Vitaly Kuznetsov <vkuznets@redhat.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
72b39cfc4d
commit
ecde0f3e7f
|
@ -1590,9 +1590,9 @@ static void node_states_clear_node(int node, struct memory_notify *arg)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __ref __offline_pages(unsigned long start_pfn,
|
static int __ref __offline_pages(unsigned long start_pfn,
|
||||||
unsigned long end_pfn, unsigned long timeout)
|
unsigned long end_pfn)
|
||||||
{
|
{
|
||||||
unsigned long pfn, nr_pages, expire;
|
unsigned long pfn, nr_pages;
|
||||||
long offlined_pages;
|
long offlined_pages;
|
||||||
int ret, node;
|
int ret, node;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
@ -1630,12 +1630,8 @@ static int __ref __offline_pages(unsigned long start_pfn,
|
||||||
goto failed_removal;
|
goto failed_removal;
|
||||||
|
|
||||||
pfn = start_pfn;
|
pfn = start_pfn;
|
||||||
expire = jiffies + timeout;
|
|
||||||
repeat:
|
repeat:
|
||||||
/* start memory hot removal */
|
/* start memory hot removal */
|
||||||
ret = -EBUSY;
|
|
||||||
if (time_after(jiffies, expire))
|
|
||||||
goto failed_removal;
|
|
||||||
ret = -EINTR;
|
ret = -EINTR;
|
||||||
if (signal_pending(current))
|
if (signal_pending(current))
|
||||||
goto failed_removal;
|
goto failed_removal;
|
||||||
|
@ -1708,7 +1704,7 @@ failed_removal:
|
||||||
/* Must be protected by mem_hotplug_begin() or a device_lock */
|
/* Must be protected by mem_hotplug_begin() or a device_lock */
|
||||||
int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
|
int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
|
||||||
{
|
{
|
||||||
return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ);
|
return __offline_pages(start_pfn, start_pfn + nr_pages);
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue