Commit 23dcfa61 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (Andrew's patch-bomb)

Merge fixes from Andrew Morton.

Random drivers and some VM fixes.

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (17 commits)
  mm: compaction: Abort async compaction if locks are contended or taking too long
  mm: have order > 0 compaction start near a pageblock with free pages
  rapidio/tsi721: fix unused variable compiler warning
  rapidio/tsi721: fix inbound doorbell interrupt handling
  drivers/rtc/rtc-rs5c348.c: fix hour decoding in 12-hour mode
  mm: correct page->pfmemalloc to fix deactivate_slab regression
  drivers/rtc/rtc-pcf2123.c: initialize dynamic sysfs attributes
  mm/compaction.c: fix deferring compaction mistake
  drivers/misc/sgi-xp/xpc_uv.c: SGI XPC fails to load when cpu 0 is out of IRQ resources
  string: do not export memweight() to userspace
  hugetlb: update hugetlbpage.txt
  checkpatch: add control statement test to SINGLE_STATEMENT_DO_WHILE_MACRO
  mm: hugetlbfs: correctly populate shared pmd
  cciss: fix incorrect scsi status reporting
  Documentation: update mount option in filesystem/vfat.txt
  mm: change nr_ptes BUG_ON to WARN_ON
  cs5535-clockevt: typo, it's MFGPT, not MFPGT
parents a484147a c67fe375
...@@ -137,6 +137,17 @@ errors=panic|continue|remount-ro ...@@ -137,6 +137,17 @@ errors=panic|continue|remount-ro
without doing anything or remount the partition in without doing anything or remount the partition in
read-only mode (default behavior). read-only mode (default behavior).
discard -- If set, issues discard/TRIM commands to the block
device when blocks are freed. This is useful for SSD devices
and sparse/thinly-provisoned LUNs.
nfs -- This option maintains an index (cache) of directory
inodes by i_logstart which is used by the nfs-related code to
improve look-ups.
Enable this only if you want to export the FAT filesystem
over NFS
<bool>: 0,1,yes,no,true,false <bool>: 0,1,yes,no,true,false
TODO TODO
......
...@@ -299,11 +299,17 @@ map_hugetlb.c. ...@@ -299,11 +299,17 @@ map_hugetlb.c.
******************************************************************* *******************************************************************
/* /*
* hugepage-shm: see Documentation/vm/hugepage-shm.c * map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c
*/ */
******************************************************************* *******************************************************************
/* /*
* hugepage-mmap: see Documentation/vm/hugepage-mmap.c * hugepage-shm: see tools/testing/selftests/vm/hugepage-shm.c
*/
*******************************************************************
/*
* hugepage-mmap: see tools/testing/selftests/vm/hugepage-mmap.c
*/ */
...@@ -56,9 +56,16 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) ...@@ -56,9 +56,16 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
} }
/* /*
* search for a shareable pmd page for hugetlb. * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
* !shared pmd case because we can allocate the pmd later as well, it makes the
* code much cleaner. pmd allocation is essential for the shared case because
* pud has to be populated inside the same i_mmap_mutex section - otherwise
* racing tasks could either miss the sharing (see huge_pte_offset) or select a
* bad pmd for sharing.
*/ */
static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) static pte_t *
huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
{ {
struct vm_area_struct *vma = find_vma(mm, addr); struct vm_area_struct *vma = find_vma(mm, addr);
struct address_space *mapping = vma->vm_file->f_mapping; struct address_space *mapping = vma->vm_file->f_mapping;
...@@ -68,9 +75,10 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) ...@@ -68,9 +75,10 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
struct vm_area_struct *svma; struct vm_area_struct *svma;
unsigned long saddr; unsigned long saddr;
pte_t *spte = NULL; pte_t *spte = NULL;
pte_t *pte;
if (!vma_shareable(vma, addr)) if (!vma_shareable(vma, addr))
return; return (pte_t *)pmd_alloc(mm, pud, addr);
mutex_lock(&mapping->i_mmap_mutex); mutex_lock(&mapping->i_mmap_mutex);
vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
...@@ -97,7 +105,9 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) ...@@ -97,7 +105,9 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
put_page(virt_to_page(spte)); put_page(virt_to_page(spte));
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
out: out:
pte = (pte_t *)pmd_alloc(mm, pud, addr);
mutex_unlock(&mapping->i_mmap_mutex); mutex_unlock(&mapping->i_mmap_mutex);
return pte;
} }
/* /*
...@@ -142,8 +152,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ...@@ -142,8 +152,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
} else { } else {
BUG_ON(sz != PMD_SIZE); BUG_ON(sz != PMD_SIZE);
if (pud_none(*pud)) if (pud_none(*pud))
huge_pmd_share(mm, addr, pud); pte = huge_pmd_share(mm, addr, pud);
pte = (pte_t *) pmd_alloc(mm, pud, addr); else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
} }
} }
BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
......
...@@ -763,16 +763,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, ...@@ -763,16 +763,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
{ {
case CMD_TARGET_STATUS: case CMD_TARGET_STATUS:
/* Pass it up to the upper layers... */ /* Pass it up to the upper layers... */
if( ei->ScsiStatus) if (!ei->ScsiStatus) {
{
#if 0
printk(KERN_WARNING "cciss: cmd %p "
"has SCSI Status = %x\n",
c, ei->ScsiStatus);
#endif
cmd->result |= (ei->ScsiStatus << 1);
}
else { /* scsi status is zero??? How??? */
/* Ordinarily, this case should never happen, but there is a bug /* Ordinarily, this case should never happen, but there is a bug
in some released firmware revisions that allows it to happen in some released firmware revisions that allows it to happen
......
...@@ -53,7 +53,7 @@ static struct cs5535_mfgpt_timer *cs5535_event_clock; ...@@ -53,7 +53,7 @@ static struct cs5535_mfgpt_timer *cs5535_event_clock;
#define MFGPT_PERIODIC (MFGPT_HZ / HZ) #define MFGPT_PERIODIC (MFGPT_HZ / HZ)
/* /*
* The MFPGT timers on the CS5536 provide us with suitable timers to use * The MFGPT timers on the CS5536 provide us with suitable timers to use
* as clock event sources - not as good as a HPET or APIC, but certainly * as clock event sources - not as good as a HPET or APIC, but certainly
* better than the PIT. This isn't a general purpose MFGPT driver, but * better than the PIT. This isn't a general purpose MFGPT driver, but
* a simplified one designed specifically to act as a clock event source. * a simplified one designed specifically to act as a clock event source.
...@@ -144,7 +144,7 @@ static int __init cs5535_mfgpt_init(void) ...@@ -144,7 +144,7 @@ static int __init cs5535_mfgpt_init(void)
timer = cs5535_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING); timer = cs5535_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING);
if (!timer) { if (!timer) {
printk(KERN_ERR DRV_NAME ": Could not allocate MFPGT timer\n"); printk(KERN_ERR DRV_NAME ": Could not allocate MFGPT timer\n");
return -ENODEV; return -ENODEV;
} }
cs5535_event_clock = timer; cs5535_event_clock = timer;
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <asm/uv/uv_hub.h> #include <asm/uv/uv_hub.h>
...@@ -59,6 +61,8 @@ static struct xpc_heartbeat_uv *xpc_heartbeat_uv; ...@@ -59,6 +61,8 @@ static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
XPC_NOTIFY_MSG_SIZE_UV) XPC_NOTIFY_MSG_SIZE_UV)
#define XPC_NOTIFY_IRQ_NAME "xpc_notify" #define XPC_NOTIFY_IRQ_NAME "xpc_notify"
static int xpc_mq_node = -1;
static struct xpc_gru_mq_uv *xpc_activate_mq_uv; static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
static struct xpc_gru_mq_uv *xpc_notify_mq_uv; static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
...@@ -109,11 +113,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) ...@@ -109,11 +113,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
#if defined CONFIG_X86_64 #if defined CONFIG_X86_64
mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
UV_AFFINITY_CPU); UV_AFFINITY_CPU);
if (mq->irq < 0) { if (mq->irq < 0)
dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
-mq->irq);
return mq->irq; return mq->irq;
}
mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
...@@ -238,7 +239,8 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, ...@@ -238,7 +239,8 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
mq->mmr_blade = uv_cpu_to_blade_id(cpu); mq->mmr_blade = uv_cpu_to_blade_id(cpu);
nid = cpu_to_node(cpu); nid = cpu_to_node(cpu);
page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, page = alloc_pages_exact_node(nid,
GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
pg_order); pg_order);
if (page == NULL) { if (page == NULL) {
dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
...@@ -1731,37 +1733,81 @@ static struct xpc_arch_operations xpc_arch_ops_uv = { ...@@ -1731,37 +1733,81 @@ static struct xpc_arch_operations xpc_arch_ops_uv = {
.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv, .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
}; };
int static int
xpc_init_uv(void) xpc_init_mq_node(int nid)
{ {
xpc_arch_ops = xpc_arch_ops_uv; int cpu;
if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { get_online_cpus();
dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
XPC_MSG_HDR_MAX_SIZE);
return -E2BIG;
}
xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, for_each_cpu(cpu, cpumask_of_node(nid)) {
xpc_activate_mq_uv =
xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid,
XPC_ACTIVATE_IRQ_NAME, XPC_ACTIVATE_IRQ_NAME,
xpc_handle_activate_IRQ_uv); xpc_handle_activate_IRQ_uv);
if (IS_ERR(xpc_activate_mq_uv)) if (!IS_ERR(xpc_activate_mq_uv))
break;
}
if (IS_ERR(xpc_activate_mq_uv)) {
put_online_cpus();
return PTR_ERR(xpc_activate_mq_uv); return PTR_ERR(xpc_activate_mq_uv);
}
xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, for_each_cpu(cpu, cpumask_of_node(nid)) {
xpc_notify_mq_uv =
xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid,
XPC_NOTIFY_IRQ_NAME, XPC_NOTIFY_IRQ_NAME,
xpc_handle_notify_IRQ_uv); xpc_handle_notify_IRQ_uv);
if (!IS_ERR(xpc_notify_mq_uv))
break;
}
if (IS_ERR(xpc_notify_mq_uv)) { if (IS_ERR(xpc_notify_mq_uv)) {
xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
put_online_cpus();
return PTR_ERR(xpc_notify_mq_uv); return PTR_ERR(xpc_notify_mq_uv);
} }
put_online_cpus();
return 0; return 0;
} }
int
xpc_init_uv(void)
{
int nid;
int ret = 0;
xpc_arch_ops = xpc_arch_ops_uv;
if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
XPC_MSG_HDR_MAX_SIZE);
return -E2BIG;
}
if (xpc_mq_node < 0)
for_each_online_node(nid) {
ret = xpc_init_mq_node(nid);
if (!ret)
break;
}
else
ret = xpc_init_mq_node(xpc_mq_node);
if (ret < 0)
dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n",
-ret);
return ret;
}
void void
xpc_exit_uv(void) xpc_exit_uv(void)
{ {
xpc_destroy_gru_mq_uv(xpc_notify_mq_uv); xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
} }
module_param(xpc_mq_node, int, 0);
MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues.");
...@@ -435,6 +435,9 @@ static void tsi721_db_dpc(struct work_struct *work) ...@@ -435,6 +435,9 @@ static void tsi721_db_dpc(struct work_struct *work)
" info %4.4x\n", DBELL_SID(idb.bytes), " info %4.4x\n", DBELL_SID(idb.bytes),
DBELL_TID(idb.bytes), DBELL_INF(idb.bytes)); DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
} }
wr_ptr = ioread32(priv->regs +
TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE;
} }
iowrite32(rd_ptr & (IDB_QSIZE - 1), iowrite32(rd_ptr & (IDB_QSIZE - 1),
...@@ -445,6 +448,10 @@ static void tsi721_db_dpc(struct work_struct *work) ...@@ -445,6 +448,10 @@ static void tsi721_db_dpc(struct work_struct *work)
regval |= TSI721_SR_CHINT_IDBQRCV; regval |= TSI721_SR_CHINT_IDBQRCV;
iowrite32(regval, iowrite32(regval,
priv->regs + TSI721_SR_CHINTE(IDB_QUEUE)); priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE;
if (wr_ptr != rd_ptr)
schedule_work(&priv->idb_work);
} }
/** /**
...@@ -2212,7 +2219,7 @@ static int __devinit tsi721_probe(struct pci_dev *pdev, ...@@ -2212,7 +2219,7 @@ static int __devinit tsi721_probe(struct pci_dev *pdev,
const struct pci_device_id *id) const struct pci_device_id *id)
{ {
struct tsi721_device *priv; struct tsi721_device *priv;
int i, cap; int cap;
int err; int err;
u32 regval; u32 regval;
...@@ -2232,12 +2239,15 @@ static int __devinit tsi721_probe(struct pci_dev *pdev, ...@@ -2232,12 +2239,15 @@ static int __devinit tsi721_probe(struct pci_dev *pdev,
priv->pdev = pdev; priv->pdev = pdev;
#ifdef DEBUG #ifdef DEBUG
{
int i;
for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n", dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n",
i, (unsigned long long)pci_resource_start(pdev, i), i, (unsigned long long)pci_resource_start(pdev, i),
(unsigned long)pci_resource_len(pdev, i), (unsigned long)pci_resource_len(pdev, i),
pci_resource_flags(pdev, i)); pci_resource_flags(pdev, i));
} }
}
#endif #endif
/* /*
* Verify BAR configuration * Verify BAR configuration
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include <linux/rtc.h> #include <linux/rtc.h>
#include <linux/spi/spi.h> #include <linux/spi/spi.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/sysfs.h>
#define DRV_VERSION "0.6" #define DRV_VERSION "0.6"
...@@ -292,6 +293,7 @@ static int __devinit pcf2123_probe(struct spi_device *spi) ...@@ -292,6 +293,7 @@ static int __devinit pcf2123_probe(struct spi_device *spi)
pdata->rtc = rtc; pdata->rtc = rtc;
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
sysfs_attr_init(&pdata->regs[i].attr.attr);
sprintf(pdata->regs[i].name, "%1x", i); sprintf(pdata->regs[i].name, "%1x", i);
pdata->regs[i].attr.attr.mode = S_IRUGO | S_IWUSR; pdata->regs[i].attr.attr.mode = S_IRUGO | S_IWUSR;
pdata->regs[i].attr.attr.name = pdata->regs[i].name; pdata->regs[i].attr.attr.name = pdata->regs[i].name;
......
...@@ -122,9 +122,12 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm) ...@@ -122,9 +122,12 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm)
tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK); tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);
tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK); tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);
if (!pdata->rtc_24h) { if (!pdata->rtc_24h) {
if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM) {
tm->tm_hour -= 20;
tm->tm_hour %= 12; tm->tm_hour %= 12;
if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM)
tm->tm_hour += 12; tm->tm_hour += 12;
} else
tm->tm_hour %= 12;
} }
tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK); tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);
tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK); tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);
......
...@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, ...@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
extern int fragmentation_index(struct zone *zone, unsigned int order); extern int fragmentation_index(struct zone *zone, unsigned int order);
extern unsigned long try_to_compact_pages(struct zonelist *zonelist, extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *mask, int order, gfp_t gfp_mask, nodemask_t *mask,
bool sync); bool sync, bool *contended);
extern int compact_pgdat(pg_data_t *pgdat, int order); extern int compact_pgdat(pg_data_t *pgdat, int order);
extern unsigned long compaction_suitable(struct zone *zone, int order); extern unsigned long compaction_suitable(struct zone *zone, int order);
...@@ -64,7 +64,7 @@ static inline bool compaction_deferred(struct zone *zone, int order) ...@@ -64,7 +64,7 @@ static inline bool compaction_deferred(struct zone *zone, int order)
#else #else
static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask, int order, gfp_t gfp_mask, nodemask_t *nodemask,
bool sync) bool sync, bool *contended)
{ {
return COMPACT_CONTINUE; return COMPACT_CONTINUE;
} }
......
...@@ -144,8 +144,8 @@ static inline bool strstarts(const char *str, const char *prefix) ...@@ -144,8 +144,8 @@ static inline bool strstarts(const char *str, const char *prefix)
{ {
return strncmp(str, prefix, strlen(prefix)) == 0; return strncmp(str, prefix, strlen(prefix)) == 0;
} }
#endif
extern size_t memweight(const void *ptr, size_t bytes); extern size_t memweight(const void *ptr, size_t bytes);
#endif /* __KERNEL__ */
#endif /* _LINUX_STRING_H_ */ #endif /* _LINUX_STRING_H_ */
...@@ -50,6 +50,47 @@ static inline bool migrate_async_suitable(int migratetype) ...@@ -50,6 +50,47 @@ static inline bool migrate_async_suitable(int migratetype)
return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
} }
/*
* Compaction requires the taking of some coarse locks that are potentially
* very heavily contended. Check if the process needs to be scheduled or
* if the lock is contended. For async compaction, back out in the event
* if contention is severe. For sync compaction, schedule.
*
* Returns true if the lock is held.
* Returns false if the lock is released and compaction should abort
*/
static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
bool locked, struct compact_control *cc)
{
if (need_resched() || spin_is_contended(lock)) {
if (locked) {
spin_unlock_irqrestore(lock, *flags);
locked = false;
}
/* async aborts if taking too long or contended */
if (!cc->sync) {
if (cc->contended)
*cc->contended = true;
return false;
}
cond_resched();
if (fatal_signal_pending(current))
return false;
}
if (!locked)
spin_lock_irqsave(lock, *flags);
return true;
}
static inline bool compact_trylock_irqsave(spinlock_t *lock,
unsigned long *flags, struct compact_control *cc)
{
return compact_checklock_irqsave(lock, flags, false, cc);
}
/* /*
* Isolate free pages onto a private freelist. Caller must hold zone->lock. * Isolate free pages onto a private freelist. Caller must hold zone->lock.
* If @strict is true, will abort returning 0 on any invalid PFNs or non-free * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
...@@ -173,7 +214,7 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) ...@@ -173,7 +214,7 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
} }
/* Update the number of anon and file isolated pages in the zone */ /* Update the number of anon and file isolated pages in the zone */
static void acct_isolated(struct zone *zone, struct compact_control *cc) static void acct_isolated(struct zone *zone, bool locked, struct compact_control *cc)
{ {
struct page *page; struct page *page;
unsigned int count[2] = { 0, }; unsigned int count[2] = { 0, };
...@@ -181,8 +222,14 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc) ...@@ -181,8 +222,14 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc)
list_for_each_entry(page, &cc->migratepages, lru) list_for_each_entry(page, &cc->migratepages, lru)
count[!!page_is_file_cache(page)]++; count[!!page_is_file_cache(page)]++;
/* If locked we can use the interrupt unsafe versions */
if (locked) {
__mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
__mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
} else {
mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
}
} }
/* Similar to reclaim, but different enough that they don't share logic */ /* Similar to reclaim, but different enough that they don't share logic */
...@@ -228,6 +275,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, ...@@ -228,6 +275,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
struct list_head *migratelist = &cc->migratepages; struct list_head *migratelist = &cc->migratepages;
isolate_mode_t mode = 0; isolate_mode_t mode = 0;
struct lruvec *lruvec; struct lruvec *lruvec;
unsigned long flags;
bool locked;
/* /*
* Ensure that there are not too many pages isolated from the LRU * Ensure that there are not too many pages isolated from the LRU
...@@ -247,25 +296,22 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, ...@@ -247,25 +296,22 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
/* Time to isolate some pages for migration */ /* Time to isolate some pages for migration */
cond_resched(); cond_resched();
spin_lock_irq(&zone->lru_lock); spin_lock_irqsave(&zone->lru_lock, flags);
locked = true;
for (; low_pfn < end_pfn; low_pfn++) { for (; low_pfn < end_pfn; low_pfn++) {
struct page *page; struct page *page;
bool locked = true;
/* give a chance to irqs before checking need_resched() */ /* give a chance to irqs before checking need_resched() */
if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) { if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
spin_unlock_irq(&zone->lru_lock); spin_unlock_irqrestore(&zone->lru_lock, flags);
locked = false; locked = false;
} }
if (need_resched() || spin_is_contended(&zone->lru_lock)) {
if (locked) /* Check if it is ok to still hold the lock */
spin_unlock_irq(&zone->lru_lock); locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
cond_resched(); locked, cc);
spin_lock_irq(&zone->lru_lock); if (!locked)
if (fatal_signal_pending(current))
break; break;
} else if (!locked)
spin_lock_irq(&zone->lru_lock);
/* /*
* migrate_pfn does not necessarily start aligned to a * migrate_pfn does not necessarily start aligned to a
...@@ -349,9 +395,10 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, ...@@ -349,9 +395,10 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
} }
} }
acct_isolated(zone, cc); acct_isolated(zone, locked, cc);
spin_unlock_irq(&zone->lru_lock); if (locked)
spin_unlock_irqrestore(&zone->lru_lock, flags);
trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
...@@ -383,6 +430,20 @@ static bool suitable_migration_target(struct page *page) ...@@ -383,6 +430,20 @@ static bool suitable_migration_target(struct page *page)
return false; return false;
} }
/*
* Returns the start pfn of the last page block in a zone. This is the starting
* point for full compaction of a zone. Compaction searches for free pages from
* the end of each zone, while isolate_freepages_block scans forward inside each
* page block.
*/
static unsigned long start_free_pfn(struct zone *zone)
{
unsigned long free_pfn;
free_pfn = zone->zone_start_pfn + zone->spanned_pages;
free_pfn &= ~(pageblock_nr_pages-1);
return free_pfn;
}
/* /*
* Based on information in the current compact_control, find blocks * Based on information in the current compact_control, find blocks
* suitable for isolating free pages from and then isolate them. * suitable for isolating free pages from and then isolate them.
...@@ -422,17 +483,6 @@ static void isolate_freepages(struct zone *zone, ...@@ -422,17 +483,6 @@ static void isolate_freepages(struct zone *zone,
pfn -= pageblock_nr_pages) { pfn -= pageblock_nr_pages) {
unsigned long isolated; unsigned long isolated;
/*
* Skip ahead if another thread is compacting in the area
* simultaneously. If we wrapped around, we can only skip
* ahead if zone->compact_cached_free_pfn also wrapped to
* above our starting point.
*/
if (cc->order > 0 && (!cc->wrapped ||
zone->compact_cached_free_pfn >
cc->start_free_pfn))
pfn = min(pfn, zone->compact_cached_free_pfn);
if (!pfn_valid(pfn)) if (!pfn_valid(pfn))
continue; continue;
...@@ -458,7 +508,16 @@ static void isolate_freepages(struct zone *zone, ...@@ -458,7 +508,16 @@ static void isolate_freepages(struct zone *zone,
* are disabled * are disabled
*/ */
isolated = 0; isolated = 0;
spin_lock_irqsave(&zone->lock, flags);
/*
* The zone lock must be held to isolate freepages. This
* unfortunately this is a very coarse lock and can be
* heavily contended if there are parallel allocations
* or parallel compactions. For async compaction do not
* spin on the lock
*/
if (!compact_trylock_irqsave(&zone->lock, &flags, cc))
break;
if (suitable_migration_target(page)) { if (suitable_migration_target(page)) {
end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
isolated = isolate_freepages_block(pfn, end_pfn, isolated = isolate_freepages_block(pfn, end_pfn,
...@@ -474,7 +533,15 @@ static void isolate_freepages(struct zone *zone, ...@@ -474,7 +533,15 @@ static void isolate_freepages(struct zone *zone,
*/ */
if (isolated) { if (isolated) {
high_pfn = max(high_pfn, pfn); high_pfn = max(high_pfn, pfn);
if (cc->order > 0)
/*
* If the free scanner has wrapped, update
* compact_cached_free_pfn to point to the highest
* pageblock with free pages. This reduces excessive
* scanning of full pageblocks near the end of the
* zone
*/
if (cc->order > 0 && cc->wrapped)
zone->compact_cached_free_pfn = high_pfn; zone->compact_cached_free_pfn = high_pfn;
} }
} }
...@@ -484,6 +551,11 @@ static void isolate_freepages(struct zone *zone, ...@@ -484,6 +551,11 @@ static void isolate_freepages(struct zone *zone,
cc->free_pfn = high_pfn; cc->free_pfn = high_pfn;
cc->nr_freepages = nr_freepages; cc->nr_freepages = nr_freepages;
/* If compact_cached_free_pfn is reset then set it now */
if (cc->order > 0 && !cc->wrapped &&
zone->compact_cached_free_pfn == start_free_pfn(zone))
zone->compact_cached_free_pfn = high_pfn;
} }
/* /*
...@@ -570,20 +642,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, ...@@ -570,20 +642,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
return ISOLATE_SUCCESS; return ISOLATE_SUCCESS;
} }
/*
* Returns the start pfn of the last page block in a zone. This is the starting
* point for full compaction of a zone. Compaction searches for free pages from
* the end of each zone, while isolate_freepages_block scans forward inside each
* page block.
*/
static unsigned long start_free_pfn(struct zone *zone)
{
unsigned long free_pfn;
free_pfn = zone->zone_start_pfn + zone->spanned_pages;
free_pfn &= ~(pageblock_nr_pages-1);
return free_pfn;
}
static int compact_finished(struct zone *zone, static int compact_finished(struct zone *zone,
struct compact_control *cc) struct compact_control *cc)
{ {
...@@ -771,7 +829,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) ...@@ -771,7 +829,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
static unsigned long compact_zone_order(struct zone *zone, static unsigned long compact_zone_order(struct zone *zone,
int order, gfp_t gfp_mask, int order, gfp_t gfp_mask,
bool sync) bool sync, bool *contended)
{ {
struct compact_control cc = { struct compact_control cc = {
.nr_freepages = 0, .nr_freepages = 0,
...@@ -780,6 +838,7 @@ static unsigned long compact_zone_order(struct zone *zone, ...@@ -780,6 +838,7 @@ static unsigned long compact_zone_order(struct zone *zone,
.migratetype = allocflags_to_migratetype(gfp_mask), .migratetype = allocflags_to_migratetype(gfp_mask),
.zone = zone, .zone = zone,
.sync = sync, .sync = sync,
.contended = contended,
}; };
INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages); INIT_LIST_HEAD(&cc.migratepages);
...@@ -801,7 +860,7 @@ int sysctl_extfrag_threshold = 500; ...@@ -801,7 +860,7 @@ int sysctl_extfrag_threshold = 500;
*/ */
unsigned long try_to_compact_pages(struct zonelist *zonelist, unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask, int order, gfp_t gfp_mask, nodemask_t *nodemask,
bool sync) bool sync, bool *contended)
{ {
enum zone_type high_zoneidx = gfp_zone(gfp_mask); enum zone_type high_zoneidx = gfp_zone(gfp_mask);
int may_enter_fs = gfp_mask & __GFP_FS; int may_enter_fs = gfp_mask & __GFP_FS;
...@@ -825,7 +884,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, ...@@ -825,7 +884,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
nodemask) { nodemask) {
int status; int status;
status = compact_zone_order(zone, order, gfp_mask, sync); status = compact_zone_order(zone, order, gfp_mask, sync,
contended);
rc = max(status, rc); rc = max(status, rc);
/* If a normal allocation would succeed, stop compacting */ /* If a normal allocation would succeed, stop compacting */
...@@ -861,7 +921,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) ...@@ -861,7 +921,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
if (cc->order > 0) { if (cc->order > 0) {
int ok = zone_watermark_ok(zone, cc->order, int ok = zone_watermark_ok(zone, cc->order,
low_wmark_pages(zone), 0, 0); low_wmark_pages(zone), 0, 0);
if (ok && cc->order > zone->compact_order_failed) if (ok && cc->order >= zone->compact_order_failed)
zone->compact_order_failed = cc->order + 1; zone->compact_order_failed = cc->order + 1;
/* Currently async compaction is never deferred. */ /* Currently async compaction is never deferred. */
else if (!ok && cc->sync) else if (!ok && cc->sync)
......
...@@ -130,6 +130,7 @@ struct compact_control { ...@@ -130,6 +130,7 @@ struct compact_control {
int order; /* order a direct compactor needs */ int order; /* order a direct compactor needs */
int migratetype; /* MOVABLE, RECLAIMABLE etc */ int migratetype; /* MOVABLE, RECLAIMABLE etc */
struct zone *zone; struct zone *zone;
bool *contended; /* True if a lock was contended */
}; };
unsigned long unsigned long
......
...@@ -2309,7 +2309,7 @@ void exit_mmap(struct mm_struct *mm) ...@@ -2309,7 +2309,7 @@ void exit_mmap(struct mm_struct *mm)
} }
vm_unacct_memory(nr_accounted); vm_unacct_memory(nr_accounted);
BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
} }
/* Insert vm structure into process list sorted by address /* Insert vm structure into process list sorted by address
......
...@@ -1928,6 +1928,17 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, ...@@ -1928,6 +1928,17 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
zlc_active = 0; zlc_active = 0;
goto zonelist_scan; goto zonelist_scan;
} }
if (page)
/*
* page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
* necessary to allocate the page. The expectation is
* that the caller is taking steps that will free more
* memory. The caller should avoid the page being used
* for !PFMEMALLOC purposes.
*/
page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
return page; return page;
} }
...@@ -2091,7 +2102,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, ...@@ -2091,7 +2102,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx, struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
int migratetype, bool sync_migration, int migratetype, bool sync_migration,
bool *deferred_compaction, bool *contended_compaction, bool *deferred_compaction,
unsigned long *did_some_progress) unsigned long *did_some_progress)
{ {
struct page *page; struct page *page;
...@@ -2106,7 +2117,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, ...@@ -2106,7 +2117,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
current->flags |= PF_MEMALLOC; current->flags |= PF_MEMALLOC;
*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
nodemask, sync_migration); nodemask, sync_migration,
contended_compaction);
current->flags &= ~PF_MEMALLOC; current->flags &= ~PF_MEMALLOC;
if (*did_some_progress != COMPACT_SKIPPED) { if (*did_some_progress != COMPACT_SKIPPED) {
...@@ -2152,7 +2164,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, ...@@ -2152,7 +2164,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx, struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
int migratetype, bool sync_migration, int migratetype, bool sync_migration,
bool *deferred_compaction, bool *contended_compaction, bool *deferred_compaction,
unsigned long *did_some_progress) unsigned long *did_some_progress)
{ {
return NULL; return NULL;
...@@ -2325,6 +2337,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -2325,6 +2337,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
unsigned long did_some_progress; unsigned long did_some_progress;
bool sync_migration = false; bool sync_migration = false;
bool deferred_compaction = false; bool deferred_compaction = false;
bool contended_compaction = false;
/* /*
* In the slowpath, we sanity check order to avoid ever trying to * In the slowpath, we sanity check order to avoid ever trying to
...@@ -2389,14 +2402,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -2389,14 +2402,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
zonelist, high_zoneidx, nodemask, zonelist, high_zoneidx, nodemask,
preferred_zone, migratetype); preferred_zone, migratetype);
if (page) { if (page) {
/*
* page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
* necessary to allocate the page. The expectation is
* that the caller is taking steps that will free more
* memory. The caller should avoid the page being used
* for !PFMEMALLOC purposes.
*/
page->pfmemalloc = true;
goto got_pg; goto got_pg;
} }
} }
...@@ -2422,6 +2427,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -2422,6 +2427,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
nodemask, nodemask,
alloc_flags, preferred_zone, alloc_flags, preferred_zone,
migratetype, sync_migration, migratetype, sync_migration,
&contended_compaction,
&deferred_compaction, &deferred_compaction,
&did_some_progress); &did_some_progress);
if (page) if (page)
...@@ -2431,10 +2437,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -2431,10 +2437,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
/* /*
* If compaction is deferred for high-order allocations, it is because * If compaction is deferred for high-order allocations, it is because
* sync compaction recently failed. In this is the case and the caller * sync compaction recently failed. In this is the case and the caller
* has requested the system not be heavily disrupted, fail the * requested a movable allocation that does not heavily disrupt the
* allocation now instead of entering direct reclaim * system then fail the allocation instead of entering direct reclaim.
*/ */
if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD)) if ((deferred_compaction || contended_compaction) &&
(gfp_mask & __GFP_NO_KSWAPD))
goto nopage; goto nopage;
/* Try direct reclaim and then allocating */ /* Try direct reclaim and then allocating */
...@@ -2505,6 +2512,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -2505,6 +2512,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
nodemask, nodemask,
alloc_flags, preferred_zone, alloc_flags, preferred_zone,
migratetype, sync_migration, migratetype, sync_migration,
&contended_compaction,
&deferred_compaction, &deferred_compaction,
&did_some_progress); &did_some_progress);
if (page) if (page)
...@@ -2569,8 +2577,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ...@@ -2569,8 +2577,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
page = __alloc_pages_slowpath(gfp_mask, order, page = __alloc_pages_slowpath(gfp_mask, order,
zonelist, high_zoneidx, nodemask, zonelist, high_zoneidx, nodemask,
preferred_zone, migratetype); preferred_zone, migratetype);
else
page->pfmemalloc = false;
trace_mm_page_alloc(page, order, gfp_mask, migratetype); trace_mm_page_alloc(page, order, gfp_mask, migratetype);
......
...@@ -3016,7 +3016,8 @@ sub process { ...@@ -3016,7 +3016,8 @@ sub process {
$herectx .= raw_line($linenr, $n) . "\n"; $herectx .= raw_line($linenr, $n) . "\n";
} }
if (($stmts =~ tr/;/;/) == 1) { if (($stmts =~ tr/;/;/) == 1 &&
$stmts !~ /^\s*(if|while|for|switch)\b/) {
WARN("SINGLE_STATEMENT_DO_WHILE_MACRO", WARN("SINGLE_STATEMENT_DO_WHILE_MACRO",
"Single statement macros should not use a do {} while (0) loop\n" . "$herectx"); "Single statement macros should not use a do {} while (0) loop\n" . "$herectx");
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment