Commit 38e719ab authored by Baolin Wang's avatar Baolin Wang Committed by Linus Torvalds

hugetlb: support node specified when using cma for gigantic hugepages

Now the size of CMA area for gigantic hugepages runtime allocation is
balanced for all online nodes, but we also want to specify the size of
CMA per-node, or only one node in some cases, which are similar with
patch [1].

For example, on some multi-nodes systems, each node's memory can be
different, allocating the same size of CMA for each node is not suitable
for the low-memory nodes.  Meanwhile some workloads like DPDK mentioned
by Zhenguo in patch [1] only need hugepages in one node.

On the other hand, we have some machines with multiple types of memory,
like DRAM and PMEM (persistent memory).  On this system, we may want to
specify all the hugepages only on DRAM node, or specify the proportion
of DRAM node and PMEM node, to tuning the performance of the workloads.

Thus this patch adds node format for 'hugetlb_cma' parameter to support
specifying the size of CMA per-node.  An example is as follows:

  hugetlb_cma=0:5G,2:5G

which means allocating 5G size of CMA area on node 0 and node 2
respectively.  And the users should use the node specific sysfs file to
allocate the gigantic hugepages if specified the CMA size on that node.

Link: https://lkml.kernel.org/r/20211005054729.86457-1-yaozhenguo1@gmail.com [1]
Link: https://lkml.kernel.org/r/bb790775ca60bb8f4b26956bb3f6988f74e075c7.1634261144.git.baolin.wang@linux.alibaba.comSigned-off-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 12b61320
...@@ -1587,8 +1587,10 @@ ...@@ -1587,8 +1587,10 @@
registers. Default set by CONFIG_HPET_MMAP_DEFAULT. registers. Default set by CONFIG_HPET_MMAP_DEFAULT.
hugetlb_cma= [HW,CMA] The size of a CMA area used for allocation hugetlb_cma= [HW,CMA] The size of a CMA area used for allocation
of gigantic hugepages. of gigantic hugepages. Or using node format, the size
Format: nn[KMGTPE] of a CMA area per node can be specified.
Format: nn[KMGTPE] or (node format)
<node>:nn[KMGTPE][,<node>:nn[KMGTPE]]
Reserve a CMA area of given size and allocate gigantic Reserve a CMA area of given size and allocate gigantic
hugepages using the CMA allocator. If enabled, the hugepages using the CMA allocator. If enabled, the
......
...@@ -50,6 +50,7 @@ struct hstate hstates[HUGE_MAX_HSTATE]; ...@@ -50,6 +50,7 @@ struct hstate hstates[HUGE_MAX_HSTATE];
#ifdef CONFIG_CMA #ifdef CONFIG_CMA
static struct cma *hugetlb_cma[MAX_NUMNODES]; static struct cma *hugetlb_cma[MAX_NUMNODES];
static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
static bool hugetlb_cma_page(struct page *page, unsigned int order) static bool hugetlb_cma_page(struct page *page, unsigned int order)
{ {
return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page, return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page,
...@@ -6762,7 +6763,38 @@ static bool cma_reserve_called __initdata; ...@@ -6762,7 +6763,38 @@ static bool cma_reserve_called __initdata;
static int __init cmdline_parse_hugetlb_cma(char *p) static int __init cmdline_parse_hugetlb_cma(char *p)
{ {
hugetlb_cma_size = memparse(p, &p); int nid, count = 0;
unsigned long tmp;
char *s = p;
while (*s) {
if (sscanf(s, "%lu%n", &tmp, &count) != 1)
break;
if (s[count] == ':') {
nid = tmp;
if (nid < 0 || nid >= MAX_NUMNODES)
break;
s += count + 1;
tmp = memparse(s, &s);
hugetlb_cma_size_in_node[nid] = tmp;
hugetlb_cma_size += tmp;
/*
* Skip the separator if have one, otherwise
* break the parsing.
*/
if (*s == ',')
s++;
else
break;
} else {
hugetlb_cma_size = memparse(p, &p);
break;
}
}
return 0; return 0;
} }
...@@ -6771,10 +6803,36 @@ early_param("hugetlb_cma", cmdline_parse_hugetlb_cma); ...@@ -6771,10 +6803,36 @@ early_param("hugetlb_cma", cmdline_parse_hugetlb_cma);
void __init hugetlb_cma_reserve(int order) void __init hugetlb_cma_reserve(int order)
{ {
unsigned long size, reserved, per_node; unsigned long size, reserved, per_node;
bool node_specific_cma_alloc = false;
int nid; int nid;
cma_reserve_called = true; cma_reserve_called = true;
if (!hugetlb_cma_size)
return;
for (nid = 0; nid < MAX_NUMNODES; nid++) {
if (hugetlb_cma_size_in_node[nid] == 0)
continue;
if (!node_state(nid, N_ONLINE)) {
pr_warn("hugetlb_cma: invalid node %d specified\n", nid);
hugetlb_cma_size -= hugetlb_cma_size_in_node[nid];
hugetlb_cma_size_in_node[nid] = 0;
continue;
}
if (hugetlb_cma_size_in_node[nid] < (PAGE_SIZE << order)) {
pr_warn("hugetlb_cma: cma area of node %d should be at least %lu MiB\n",
nid, (PAGE_SIZE << order) / SZ_1M);
hugetlb_cma_size -= hugetlb_cma_size_in_node[nid];
hugetlb_cma_size_in_node[nid] = 0;
} else {
node_specific_cma_alloc = true;
}
}
/* Validate the CMA size again in case some invalid nodes specified. */
if (!hugetlb_cma_size) if (!hugetlb_cma_size)
return; return;
...@@ -6785,20 +6843,30 @@ void __init hugetlb_cma_reserve(int order) ...@@ -6785,20 +6843,30 @@ void __init hugetlb_cma_reserve(int order)
return; return;
} }
/* if (!node_specific_cma_alloc) {
* If 3 GB area is requested on a machine with 4 numa nodes, /*
* let's allocate 1 GB on first three nodes and ignore the last one. * If 3 GB area is requested on a machine with 4 numa nodes,
*/ * let's allocate 1 GB on first three nodes and ignore the last one.
per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes); */
pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes);
hugetlb_cma_size / SZ_1M, per_node / SZ_1M); pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n",
hugetlb_cma_size / SZ_1M, per_node / SZ_1M);
}
reserved = 0; reserved = 0;
for_each_node_state(nid, N_ONLINE) { for_each_node_state(nid, N_ONLINE) {
int res; int res;
char name[CMA_MAX_NAME]; char name[CMA_MAX_NAME];
size = min(per_node, hugetlb_cma_size - reserved); if (node_specific_cma_alloc) {
if (hugetlb_cma_size_in_node[nid] == 0)
continue;
size = hugetlb_cma_size_in_node[nid];
} else {
size = min(per_node, hugetlb_cma_size - reserved);
}
size = round_up(size, PAGE_SIZE << order); size = round_up(size, PAGE_SIZE << order);
snprintf(name, sizeof(name), "hugetlb%d", nid); snprintf(name, sizeof(name), "hugetlb%d", nid);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment