Commit dbcf929c authored by David Gibson's avatar David Gibson Committed by Michael Ellerman

powerpc/pseries: Add support for hash table resizing

This adds support for using two hypercalls to change the size of the
main hash page table while running as a PAPR guest. For now these
hypercalls are only in experimental qemu versions.

The interface is two part: first H_RESIZE_HPT_PREPARE is used to
allocate and prepare the new hash table. This may be slow, but can be
done asynchronously. Then, H_RESIZE_HPT_COMMIT is used to switch to the
new hash table. This requires that no CPUs be concurrently updating the
HPT, and so must be run under stop_machine().

This also adds a debugfs file which can be used to manually control
HPT resizing or testing purposes.
Signed-off-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
Reviewed-by: default avatarPaul Mackerras <paulus@samba.org>
[mpe: Rename the debugfs file to "hpt_order"]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 64b40ffb
......@@ -157,6 +157,7 @@ struct mmu_hash_ops {
unsigned long addr,
unsigned char *hpte_slot_array,
int psize, int ssize, int local);
int (*resize_hpt)(unsigned long shift);
/*
* Special for kexec.
* To be called in real mode with interrupts disabled. No locks are
......
......@@ -35,7 +35,9 @@
#include <linux/memblock.h>
#include <linux/context_tracking.h>
#include <linux/libfdt.h>
#include <linux/debugfs.h>
#include <asm/debug.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
......@@ -1795,3 +1797,34 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
/* Finally limit subsequent allocations */
memblock_set_current_limit(ppc64_rma_size);
}
#ifdef CONFIG_DEBUG_FS
static int hpt_order_get(void *data, u64 *val)
{
*val = ppc64_pft_size;
return 0;
}
static int hpt_order_set(void *data, u64 val)
{
if (!mmu_hash_ops.resize_hpt)
return -ENODEV;
return mmu_hash_ops.resize_hpt(val);
}
DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
static int __init hash64_debugfs(void)
{
if (!debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root,
NULL, &fops_hpt_order)) {
pr_err("lpar: unable to create hpt_order debugsfs file\n");
}
return 0;
}
machine_device_initcall(pseries, hash64_debugfs);
#endif /* CONFIG_DEBUG_FS */
......@@ -27,6 +27,8 @@
#include <linux/console.h>
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/delay.h>
#include <linux/stop_machine.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
......@@ -609,6 +611,112 @@ static int __init disable_bulk_remove(char *str)
__setup("bulk_remove=", disable_bulk_remove);
#define HPT_RESIZE_TIMEOUT 10000 /* ms */
struct hpt_resize_state {
unsigned long shift;
int commit_rc;
};
static int pseries_lpar_resize_hpt_commit(void *data)
{
struct hpt_resize_state *state = data;
state->commit_rc = plpar_resize_hpt_commit(0, state->shift);
if (state->commit_rc != H_SUCCESS)
return -EIO;
/* Hypervisor has transitioned the HTAB, update our globals */
ppc64_pft_size = state->shift;
htab_size_bytes = 1UL << ppc64_pft_size;
htab_hash_mask = (htab_size_bytes >> 7) - 1;
return 0;
}
/* Must be called in user context */
static int pseries_lpar_resize_hpt(unsigned long shift)
{
struct hpt_resize_state state = {
.shift = shift,
.commit_rc = H_FUNCTION,
};
unsigned int delay, total_delay = 0;
int rc;
ktime_t t0, t1, t2;
might_sleep();
if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
return -ENODEV;
printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n",
shift);
t0 = ktime_get();
rc = plpar_resize_hpt_prepare(0, shift);
while (H_IS_LONG_BUSY(rc)) {
delay = get_longbusy_msecs(rc);
total_delay += delay;
if (total_delay > HPT_RESIZE_TIMEOUT) {
/* prepare with shift==0 cancels an in-progress resize */
rc = plpar_resize_hpt_prepare(0, 0);
if (rc != H_SUCCESS)
printk(KERN_WARNING
"lpar: Unexpected error %d cancelling timed out HPT resize\n",
rc);
return -ETIMEDOUT;
}
msleep(delay);
rc = plpar_resize_hpt_prepare(0, shift);
};
switch (rc) {
case H_SUCCESS:
/* Continue on */
break;
case H_PARAMETER:
return -EINVAL;
case H_RESOURCE:
return -EPERM;
default:
printk(KERN_WARNING
"lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n",
rc);
return -EIO;
}
t1 = ktime_get();
rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
t2 = ktime_get();
if (rc != 0) {
switch (state.commit_rc) {
case H_PTEG_FULL:
printk(KERN_WARNING
"lpar: Hash collision while resizing HPT\n");
return -ENOSPC;
default:
printk(KERN_WARNING
"lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
state.commit_rc);
return -EIO;
};
}
printk(KERN_INFO
"lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
shift, (long long) ktime_ms_delta(t1, t0),
(long long) ktime_ms_delta(t2, t1));
return 0;
}
void __init hpte_init_pseries(void)
{
mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate;
......@@ -620,6 +728,7 @@ void __init hpte_init_pseries(void)
mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range;
mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all;
mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
}
#ifdef CONFIG_PPC_SMLPAR
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment