diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst index 8abaeb144e44..bb90de3885d1 100644 --- a/Documentation/admin-guide/mm/hugetlbpage.rst +++ b/Documentation/admin-guide/mm/hugetlbpage.rst @@ -234,8 +234,12 @@ will exist, of the form:: hugepages-${size}kB -Inside each of these directories, the same set of files will exist:: +Inside each of these directories, the set of files contained in ``/proc`` +will exist. In addition, two additional interfaces for demoting huge +pages may exist:: + demote + demote_size nr_hugepages nr_hugepages_mempolicy nr_overcommit_hugepages @@ -243,7 +247,29 @@ Inside each of these directories, the same set of files will exist:: resv_hugepages surplus_hugepages -which function as described above for the default huge page-sized case. +The demote interfaces provide the ability to split a huge page into +smaller huge pages. For example, the x86 architecture supports both +1GB and 2MB huge pages sizes. A 1GB huge page can be split into 512 +2MB huge pages. Demote interfaces are not available for the smallest +huge page size. The demote interfaces are: + +demote_size + is the size of demoted pages. When a page is demoted a corresponding + number of huge pages of demote_size will be created. By default, + demote_size is set to the next smaller huge page size. If there are + multiple smaller huge page sizes, demote_size can be set to any of + these smaller sizes. Only huge page sizes less than the current huge + pages size are allowed. + +demote + is used to demote a number of huge pages. A user with root privileges + can write to this file. It may not be possible to demote the + requested number of huge pages. To determine how many pages were + actually demoted, compare the value of nr_hugepages before and after + writing to the demote interface. demote is a write only interface. + +The interfaces which are the same as in ``/proc`` (all except demote and +demote_size) function as described above for the default huge page-sized case. .. _mem_policy_and_hp_alloc: diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3cbf60464398..2bddd6c38204 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -586,6 +586,7 @@ struct hstate { int next_nid_to_alloc; int next_nid_to_free; unsigned int order; + unsigned int demote_order; unsigned long mask; unsigned long max_huge_pages; unsigned long nr_huge_pages; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d394d9545c4e..1a18ff2f0001 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2986,7 +2986,7 @@ free: static void __init hugetlb_init_hstates(void) { - struct hstate *h; + struct hstate *h, *h2; for_each_hstate(h) { if (minimum_order > huge_page_order(h)) @@ -2995,6 +2995,22 @@ static void __init hugetlb_init_hstates(void) /* oversize hugepages were init'ed in early boot */ if (!hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); + + /* + * Set demote order for each hstate. Note that + * h->demote_order is initially 0. + * - We can not demote gigantic pages if runtime freeing + * is not supported, so skip this. + */ + if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) + continue; + for_each_hstate(h2) { + if (h2 == h) + continue; + if (h2->order < h->order && + h2->order > h->demote_order) + h->demote_order = h2->order; + } } VM_BUG_ON(minimum_order == UINT_MAX); } @@ -3235,9 +3251,31 @@ out: return 0; } +static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) + __must_hold(&hugetlb_lock) +{ + int rc = 0; + + lockdep_assert_held(&hugetlb_lock); + + /* We should never get here if no demote order */ + if (!h->demote_order) { + pr_warn("HugeTLB: NULL demote order passed to demote_pool_huge_page.\n"); + return -EINVAL; /* internal error */ + } + + /* + * TODO - demote fucntionality will be added in subsequent patch + */ + return rc; +} + #define HSTATE_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) +#define HSTATE_ATTR_WO(_name) \ + static struct kobj_attribute _name##_attr = __ATTR_WO(_name) + #define HSTATE_ATTR(_name) \ static struct kobj_attribute _name##_attr = \ __ATTR(_name, 0644, _name##_show, _name##_store) @@ -3433,6 +3471,105 @@ static ssize_t surplus_hugepages_show(struct kobject *kobj, } HSTATE_ATTR_RO(surplus_hugepages); +static ssize_t demote_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t len) +{ + unsigned long nr_demote; + unsigned long nr_available; + nodemask_t nodes_allowed, *n_mask; + struct hstate *h; + int err = 0; + int nid; + + err = kstrtoul(buf, 10, &nr_demote); + if (err) + return err; + h = kobj_to_hstate(kobj, &nid); + + if (nid != NUMA_NO_NODE) { + init_nodemask_of_node(&nodes_allowed, nid); + n_mask = &nodes_allowed; + } else { + n_mask = &node_states[N_MEMORY]; + } + + /* Synchronize with other sysfs operations modifying huge pages */ + mutex_lock(&h->resize_lock); + spin_lock_irq(&hugetlb_lock); + + while (nr_demote) { + /* + * Check for available pages to demote each time thorough the + * loop as demote_pool_huge_page will drop hugetlb_lock. + * + * NOTE: demote_pool_huge_page does not yet drop hugetlb_lock + * but will when full demote functionality is added in a later + * patch. + */ + if (nid != NUMA_NO_NODE) + nr_available = h->free_huge_pages_node[nid]; + else + nr_available = h->free_huge_pages; + nr_available -= h->resv_huge_pages; + if (!nr_available) + break; + + err = demote_pool_huge_page(h, n_mask); + if (err) + break; + + nr_demote--; + } + + spin_unlock_irq(&hugetlb_lock); + mutex_unlock(&h->resize_lock); + + if (err) + return err; + return len; +} +HSTATE_ATTR_WO(demote); + +static ssize_t demote_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int nid; + struct hstate *h = kobj_to_hstate(kobj, &nid); + unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K; + + return sysfs_emit(buf, "%lukB\n", demote_size); +} + +static ssize_t demote_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct hstate *h, *demote_hstate; + unsigned long demote_size; + unsigned int demote_order; + int nid; + + demote_size = (unsigned long)memparse(buf, NULL); + + demote_hstate = size_to_hstate(demote_size); + if (!demote_hstate) + return -EINVAL; + demote_order = demote_hstate->order; + + /* demote order must be smaller than hstate order */ + h = kobj_to_hstate(kobj, &nid); + if (demote_order >= h->order) + return -EINVAL; + + /* resize_lock synchronizes access to demote size and writes */ + mutex_lock(&h->resize_lock); + h->demote_order = demote_order; + mutex_unlock(&h->resize_lock); + + return count; +} +HSTATE_ATTR(demote_size); + static struct attribute *hstate_attrs[] = { &nr_hugepages_attr.attr, &nr_overcommit_hugepages_attr.attr, @@ -3449,6 +3586,16 @@ static const struct attribute_group hstate_attr_group = { .attrs = hstate_attrs, }; +static struct attribute *hstate_demote_attrs[] = { + &demote_size_attr.attr, + &demote_attr.attr, + NULL, +}; + +static const struct attribute_group hstate_demote_attr_group = { + .attrs = hstate_demote_attrs, +}; + static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, struct kobject **hstate_kobjs, const struct attribute_group *hstate_attr_group) @@ -3466,6 +3613,12 @@ static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, hstate_kobjs[hi] = NULL; } + if (h->demote_order) { + if (sysfs_create_group(hstate_kobjs[hi], + &hstate_demote_attr_group)) + pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name); + } + return retval; }