Commit 3f803abf authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew Morton)

Merge misc fixes from Andrew Morton.

* emailed patches from Andrew Morton akpm@linux-foundation.org>:
  mm: page_alloc: exempt GFP_THISNODE allocations from zone fairness
  mm: numa: bugfix for LAST_CPUPID_NOT_IN_PAGE_FLAGS
  MAINTAINERS: add and correct types of some "T:" entries
  MAINTAINERS: use tab for separator
  rapidio/tsi721: fix tasklet termination in dma channel release
  hfsplus: fix remount issue
  zram: avoid null access when fail to alloc meta
  sh: prefix sh-specific "CCR" and "CCR2" by "SH_"
  ocfs2: fix quota file corruption
  drivers/rtc/rtc-s3c.c: fix incorrect way of save/restore of S3C2410_TICNT for TYPE_S3C64XX
  kallsyms: fix absolute addresses for kASLR
  scripts/gen_initramfs_list.sh: fix flags for initramfs LZ4 compression
  mm: include VM_MIXEDMAP flag in the VM_SPECIAL list to avoid m(un)locking
  memcg: reparent charges of children before processing parent
  memcg: fix endless loop in __mem_cgroup_iter_next()
  lib/radix-tree.c: swapoff tmpfs radix_tree: remember to rcu_read_unlock
  dma debug: account for cachelines and read-only mappings in overlap tracking
  mm: close PageTail race
  MAINTAINERS: EDAC: add Mauro and Borislav as interim patch collectors
parents 0c0bd34a 27329369
......@@ -73,7 +73,8 @@ Descriptions of section entries:
L: Mailing list that is relevant to this area
W: Web-page with status/info
Q: Patchwork web based patch tracking system site
T: SCM tree type and location. Type is one of: git, hg, quilt, stgit, topgit.
T: SCM tree type and location.
Type is one of: git, hg, quilt, stgit, topgit
S: Status, one of the following:
Supported: Someone is actually paid to look after this.
Maintained: Someone actually looks after it.
......@@ -2159,7 +2160,7 @@ F: Documentation/zh_CN/
CHIPIDEA USB HIGH SPEED DUAL ROLE CONTROLLER
M: Peter Chen <Peter.Chen@freescale.com>
T: git://github.com/hzpeterchen/linux-usb.git
T: git git://github.com/hzpeterchen/linux-usb.git
L: linux-usb@vger.kernel.org
S: Maintained
F: drivers/usb/chipidea/
......@@ -2382,7 +2383,7 @@ M: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
M: Daniel Lezcano <daniel.lezcano@linaro.org>
L: linux-pm@vger.kernel.org
L: linux-arm-kernel@lists.infradead.org
T: git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
S: Maintained
F: drivers/cpuidle/cpuidle-big_little.c
......@@ -2391,7 +2392,7 @@ M: Rafael J. Wysocki <rjw@rjwysocki.net>
M: Daniel Lezcano <daniel.lezcano@linaro.org>
L: linux-pm@vger.kernel.org
S: Maintained
T: git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
F: drivers/cpuidle/*
F: include/linux/cpuidle.h
......@@ -3095,6 +3096,8 @@ F: fs/ecryptfs/
EDAC-CORE
M: Doug Thompson <dougthompson@xmission.com>
M: Borislav Petkov <bp@alien8.de>
M: Mauro Carvalho Chehab <m.chehab@samsung.com>
L: linux-edac@vger.kernel.org
W: bluesmoke.sourceforge.net
S: Supported
......@@ -4914,7 +4917,7 @@ F: drivers/staging/ktap/
KCONFIG
M: "Yann E. MORIN" <yann.morin.1998@free.fr>
L: linux-kbuild@vger.kernel.org
T: git://gitorious.org/linux-kconfig/linux-kconfig
T: git git://gitorious.org/linux-kconfig/linux-kconfig
S: Maintained
F: Documentation/kbuild/kconfig-language.txt
F: scripts/kconfig/
......@@ -5733,7 +5736,7 @@ L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
Q: http://patchwork.ozlabs.org/project/netdev/list/
Q: http://patchwork.kernel.org/project/linux-rdma/list/
T: git://openfabrics.org/~eli/connect-ib.git
T: git git://openfabrics.org/~eli/connect-ib.git
S: Supported
F: drivers/net/ethernet/mellanox/mlx5/core/
F: include/linux/mlx5/
......@@ -5743,7 +5746,7 @@ M: Eli Cohen <eli@mellanox.com>
L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
Q: http://patchwork.kernel.org/project/linux-rdma/list/
T: git://openfabrics.org/~eli/connect-ib.git
T: git git://openfabrics.org/~eli/connect-ib.git
S: Supported
F: include/linux/mlx5/
F: drivers/infiniband/hw/mlx5/
......@@ -9812,7 +9815,7 @@ ZR36067 VIDEO FOR LINUX DRIVER
L: mjpeg-users@lists.sourceforge.net
L: linux-media@vger.kernel.org
W: http://mjpeg.sourceforge.net/driver-zoran/
T: Mercurial http://linuxtv.org/hg/v4l-dvb
T: hg http://linuxtv.org/hg/v4l-dvb
S: Odd Fixes
F: drivers/media/pci/zoran/
......
......@@ -18,7 +18,7 @@
#define SH_CACHE_ASSOC 8
#if defined(CONFIG_CPU_SUBTYPE_SH7619)
#define CCR 0xffffffec
#define SH_CCR 0xffffffec
#define CCR_CACHE_CE 0x01 /* Cache enable */
#define CCR_CACHE_WT 0x02 /* CCR[bit1=1,bit2=1] */
......
......@@ -17,8 +17,8 @@
#define SH_CACHE_COMBINED 4
#define SH_CACHE_ASSOC 8
#define CCR 0xfffc1000 /* CCR1 */
#define CCR2 0xfffc1004
#define SH_CCR 0xfffc1000 /* CCR1 */
#define SH_CCR2 0xfffc1004
/*
* Most of the SH-2A CCR1 definitions resemble the SH-4 ones. All others not
......
......@@ -17,7 +17,7 @@
#define SH_CACHE_COMBINED 4
#define SH_CACHE_ASSOC 8
#define CCR 0xffffffec /* Address of Cache Control Register */
#define SH_CCR 0xffffffec /* Address of Cache Control Register */
#define CCR_CACHE_CE 0x01 /* Cache Enable */
#define CCR_CACHE_WT 0x02 /* Write-Through (for P0,U0,P3) (else writeback) */
......
......@@ -17,7 +17,7 @@
#define SH_CACHE_COMBINED 4
#define SH_CACHE_ASSOC 8
#define CCR 0xff00001c /* Address of Cache Control Register */
#define SH_CCR 0xff00001c /* Address of Cache Control Register */
#define CCR_CACHE_OCE 0x0001 /* Operand Cache Enable */
#define CCR_CACHE_WT 0x0002 /* Write-Through (for P0,U0,P3) (else writeback)*/
#define CCR_CACHE_CB 0x0004 /* Copy-Back (for P1) (else writethrough) */
......
......@@ -112,7 +112,7 @@ static void cache_init(void)
unsigned long ccr, flags;
jump_to_uncached();
ccr = __raw_readl(CCR);
ccr = __raw_readl(SH_CCR);
/*
* At this point we don't know whether the cache is enabled or not - a
......@@ -189,7 +189,7 @@ static void cache_init(void)
l2_cache_init();
__raw_writel(flags, CCR);
__raw_writel(flags, SH_CCR);
back_to_cached();
}
#else
......
......@@ -36,7 +36,7 @@ static int cache_seq_show(struct seq_file *file, void *iter)
*/
jump_to_uncached();
ccr = __raw_readl(CCR);
ccr = __raw_readl(SH_CCR);
if ((ccr & CCR_CACHE_ENABLE) == 0) {
back_to_cached();
......
......@@ -63,9 +63,9 @@ static void sh2__flush_invalidate_region(void *start, int size)
local_irq_save(flags);
jump_to_uncached();
ccr = __raw_readl(CCR);
ccr = __raw_readl(SH_CCR);
ccr |= CCR_CACHE_INVALIDATE;
__raw_writel(ccr, CCR);
__raw_writel(ccr, SH_CCR);
back_to_cached();
local_irq_restore(flags);
......
......@@ -134,7 +134,8 @@ static void sh2a__flush_invalidate_region(void *start, int size)
/* If there are too many pages then just blow the cache */
if (((end - begin) >> PAGE_SHIFT) >= MAX_OCACHE_PAGES) {
__raw_writel(__raw_readl(CCR) | CCR_OCACHE_INVALIDATE, CCR);
__raw_writel(__raw_readl(SH_CCR) | CCR_OCACHE_INVALIDATE,
SH_CCR);
} else {
for (v = begin; v < end; v += L1_CACHE_BYTES)
sh2a_invalidate_line(CACHE_OC_ADDRESS_ARRAY, v);
......@@ -167,7 +168,8 @@ static void sh2a_flush_icache_range(void *args)
/* I-Cache invalidate */
/* If there are too many pages then just blow the cache */
if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) {
__raw_writel(__raw_readl(CCR) | CCR_ICACHE_INVALIDATE, CCR);
__raw_writel(__raw_readl(SH_CCR) | CCR_ICACHE_INVALIDATE,
SH_CCR);
} else {
for (v = start; v < end; v += L1_CACHE_BYTES)
sh2a_invalidate_line(CACHE_IC_ADDRESS_ARRAY, v);
......
......@@ -133,9 +133,9 @@ static void flush_icache_all(void)
jump_to_uncached();
/* Flush I-cache */
ccr = __raw_readl(CCR);
ccr = __raw_readl(SH_CCR);
ccr |= CCR_CACHE_ICI;
__raw_writel(ccr, CCR);
__raw_writel(ccr, SH_CCR);
/*
* back_to_cached() will take care of the barrier for us, don't add
......
......@@ -19,7 +19,7 @@ void __init shx3_cache_init(void)
{
unsigned int ccr;
ccr = __raw_readl(CCR);
ccr = __raw_readl(SH_CCR);
/*
* If we've got cache aliases, resolve them in hardware.
......@@ -40,5 +40,5 @@ void __init shx3_cache_init(void)
ccr |= CCR_CACHE_IBE;
#endif
writel_uncached(ccr, CCR);
writel_uncached(ccr, SH_CCR);
}
......@@ -285,8 +285,8 @@ void __init cpu_cache_init(void)
{
unsigned int cache_disabled = 0;
#ifdef CCR
cache_disabled = !(__raw_readl(CCR) & CCR_CACHE_ENABLE);
#ifdef SH_CCR
cache_disabled = !(__raw_readl(SH_CCR) & CCR_CACHE_ENABLE);
#endif
compute_alias(&boot_cpu_data.icache);
......
......@@ -874,7 +874,7 @@ bio_pageinc(struct bio *bio)
/* Non-zero page count for non-head members of
* compound pages is no longer allowed by the kernel.
*/
page = compound_trans_head(bv.bv_page);
page = compound_head(bv.bv_page);
atomic_inc(&page->_count);
}
}
......@@ -887,7 +887,7 @@ bio_pagedec(struct bio *bio)
struct bvec_iter iter;
bio_for_each_segment(bv, bio, iter) {
page = compound_trans_head(bv.bv_page);
page = compound_head(bv.bv_page);
atomic_dec(&page->_count);
}
}
......
......@@ -612,6 +612,8 @@ static ssize_t disksize_store(struct device *dev,
disksize = PAGE_ALIGN(disksize);
meta = zram_meta_alloc(disksize);
if (!meta)
return -ENOMEM;
down_write(&zram->init_lock);
if (zram->init_done) {
up_write(&zram->init_lock);
......
......@@ -678,6 +678,7 @@ struct tsi721_bdma_chan {
struct list_head free_list;
dma_cookie_t completed_cookie;
struct tasklet_struct tasklet;
bool active;
};
#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
......
......@@ -206,7 +206,7 @@ void tsi721_bdma_handler(struct tsi721_bdma_chan *bdma_chan)
{
/* Disable BDMA channel interrupts */
iowrite32(0, bdma_chan->regs + TSI721_DMAC_INTE);
if (bdma_chan->active)
tasklet_schedule(&bdma_chan->tasklet);
}
......@@ -562,7 +562,7 @@ static int tsi721_alloc_chan_resources(struct dma_chan *dchan)
}
#endif /* CONFIG_PCI_MSI */
tasklet_enable(&bdma_chan->tasklet);
bdma_chan->active = true;
tsi721_bdma_interrupt_enable(bdma_chan, 1);
return bdma_chan->bd_num - 1;
......@@ -576,9 +576,7 @@ static int tsi721_alloc_chan_resources(struct dma_chan *dchan)
static void tsi721_free_chan_resources(struct dma_chan *dchan)
{
struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
#ifdef CONFIG_PCI_MSI
struct tsi721_device *priv = to_tsi721(dchan->device);
#endif
LIST_HEAD(list);
dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
......@@ -589,14 +587,25 @@ static void tsi721_free_chan_resources(struct dma_chan *dchan)
BUG_ON(!list_empty(&bdma_chan->active_list));
BUG_ON(!list_empty(&bdma_chan->queue));
tasklet_disable(&bdma_chan->tasklet);
tsi721_bdma_interrupt_enable(bdma_chan, 0);
bdma_chan->active = false;
#ifdef CONFIG_PCI_MSI
if (priv->flags & TSI721_USING_MSIX) {
synchronize_irq(priv->msix[TSI721_VECT_DMA0_DONE +
bdma_chan->id].vector);
synchronize_irq(priv->msix[TSI721_VECT_DMA0_INT +
bdma_chan->id].vector);
} else
#endif
synchronize_irq(priv->pdev->irq);
tasklet_kill(&bdma_chan->tasklet);
spin_lock_bh(&bdma_chan->lock);
list_splice_init(&bdma_chan->free_list, &list);
spin_unlock_bh(&bdma_chan->lock);
tsi721_bdma_interrupt_enable(bdma_chan, 0);
#ifdef CONFIG_PCI_MSI
if (priv->flags & TSI721_USING_MSIX) {
free_irq(priv->msix[TSI721_VECT_DMA0_DONE +
......@@ -790,6 +799,7 @@ int tsi721_register_dma(struct tsi721_device *priv)
bdma_chan->dchan.cookie = 1;
bdma_chan->dchan.chan_id = i;
bdma_chan->id = i;
bdma_chan->active = false;
spin_lock_init(&bdma_chan->lock);
......@@ -799,7 +809,6 @@ int tsi721_register_dma(struct tsi721_device *priv)
tasklet_init(&bdma_chan->tasklet, tsi721_dma_tasklet,
(unsigned long)bdma_chan);
tasklet_disable(&bdma_chan->tasklet);
list_add_tail(&bdma_chan->dchan.device_node,
&mport->dma.channels);
}
......
......@@ -580,10 +580,12 @@ static int s3c_rtc_suspend(struct device *dev)
clk_enable(rtc_clk);
/* save TICNT for anyone using periodic interrupts */
ticnt_save = readb(s3c_rtc_base + S3C2410_TICNT);
if (s3c_rtc_cpu_type == TYPE_S3C64XX) {
ticnt_en_save = readw(s3c_rtc_base + S3C2410_RTCCON);
ticnt_en_save &= S3C64XX_RTCCON_TICEN;
ticnt_save = readl(s3c_rtc_base + S3C2410_TICNT);
} else {
ticnt_save = readb(s3c_rtc_base + S3C2410_TICNT);
}
s3c_rtc_enable(pdev, 0);
......@@ -605,10 +607,15 @@ static int s3c_rtc_resume(struct device *dev)
clk_enable(rtc_clk);
s3c_rtc_enable(pdev, 1);
writeb(ticnt_save, s3c_rtc_base + S3C2410_TICNT);
if (s3c_rtc_cpu_type == TYPE_S3C64XX && ticnt_en_save) {
if (s3c_rtc_cpu_type == TYPE_S3C64XX) {
writel(ticnt_save, s3c_rtc_base + S3C2410_TICNT);
if (ticnt_en_save) {
tmp = readw(s3c_rtc_base + S3C2410_RTCCON);
writew(tmp | ticnt_en_save, s3c_rtc_base + S3C2410_RTCCON);
writew(tmp | ticnt_en_save,
s3c_rtc_base + S3C2410_RTCCON);
}
} else {
writeb(ticnt_save, s3c_rtc_base + S3C2410_TICNT);
}
if (device_may_wakeup(dev) && wake_en) {
......
......@@ -186,12 +186,12 @@ static bool is_invalid_reserved_pfn(unsigned long pfn)
if (pfn_valid(pfn)) {
bool reserved;
struct page *tail = pfn_to_page(pfn);
struct page *head = compound_trans_head(tail);
struct page *head = compound_head(tail);
reserved = !!(PageReserved(head));
if (head != tail) {
/*
* "head" is not a dangling pointer
* (compound_trans_head takes care of that)
* (compound_head takes care of that)
* but the hugepage may have been split
* from under us (and we may not hold a
* reference count on the head page so it can
......
......@@ -75,7 +75,7 @@ int hfsplus_parse_options_remount(char *input, int *force)
int token;
if (!input)
return 0;
return 1;
while ((p = strsep(&input, ",")) != NULL) {
if (!*p)
......
......@@ -717,6 +717,12 @@ static int ocfs2_release_dquot(struct dquot *dquot)
*/
if (status < 0)
mlog_errno(status);
/*
* Clear dq_off so that we search for the structure in quota file next
* time we acquire it. The structure might be deleted and reallocated
* elsewhere by another node while our dquot structure is on freelist.
*/
dquot->dq_off = 0;
clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_trans:
ocfs2_commit_trans(osb, handle);
......@@ -756,16 +762,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
status = ocfs2_lock_global_qf(info, 1);
if (status < 0)
goto out;
if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
status = ocfs2_qinfo_lock(info, 0);
if (status < 0)
goto out_dq;
/*
* We always want to read dquot structure from disk because we don't
* know what happened with it while it was on freelist.
*/
status = qtree_read_dquot(&info->dqi_gi, dquot);
ocfs2_qinfo_unlock(info, 0);
if (status < 0)
goto out_dq;
}
set_bit(DQ_READ_B, &dquot->dq_flags);
OCFS2_DQUOT(dquot)->dq_use_count++;
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
......
......@@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot)
ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
out:
/* Clear the read bit so that next time someone uses this
* dquot he reads fresh info from disk and allocates local
* dquot structure */
clear_bit(DQ_READ_B, &dquot->dq_flags);
return status;
}
......
......@@ -121,9 +121,8 @@ u64 stable_page_flags(struct page *page)
* just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon
* to make sure a given page is a thp, not a non-huge compound page.
*/
else if (PageTransCompound(page) &&
(PageLRU(compound_trans_head(page)) ||
PageAnon(compound_trans_head(page))))
else if (PageTransCompound(page) && (PageLRU(compound_head(page)) ||
PageAnon(compound_head(page))))
u |= 1 << KPF_THP;
/*
......
......@@ -157,46 +157,6 @@ static inline int hpage_nr_pages(struct page *page)
return HPAGE_PMD_NR;
return 1;
}
/*
* compound_trans_head() should be used instead of compound_head(),
* whenever the "page" passed as parameter could be the tail of a
* transparent hugepage that could be undergoing a
* __split_huge_page_refcount(). The page structure layout often
* changes across releases and it makes extensive use of unions. So if
* the page structure layout will change in a way that
* page->first_page gets clobbered by __split_huge_page_refcount, the
* implementation making use of smp_rmb() will be required.
*
* Currently we define compound_trans_head as compound_head, because
* page->private is in the same union with page->first_page, and
* page->private isn't clobbered. However this also means we're
* currently leaving dirt into the page->private field of anonymous
* pages resulting from a THP split, instead of setting page->private
* to zero like for every other page that has PG_private not set. But
* anonymous pages don't use page->private so this is not a problem.
*/
#if 0
/* This will be needed if page->private will be clobbered in split_huge_page */
static inline struct page *compound_trans_head(struct page *page)
{
if (PageTail(page)) {
struct page *head;
head = page->first_page;
smp_rmb();
/*
* head may be a dangling pointer.
* __split_huge_page_refcount clears PageTail before
* overwriting first_page, so if PageTail is still
* there it means the head pointer isn't dangling.
*/
if (PageTail(page))
return head;
}
return page;
}
#else
#define compound_trans_head(page) compound_head(page)
#endif
extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t pmd, pmd_t *pmdp);
......@@ -226,7 +186,6 @@ static inline int split_huge_page(struct page *page)
do { } while (0)
#define split_huge_page_pmd_mm(__mm, __address, __pmd) \
do { } while (0)
#define compound_trans_head(page) compound_head(page)
static inline int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
{
......
......@@ -175,7 +175,7 @@ extern unsigned int kobjsize(const void *objp);
* Special vmas that are non-mergable, non-mlock()able.
* Note: mm/huge_memory.c VM_NO_THP depends on this definition.
*/
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP)
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
/*
* mapping from the currently active vm_flags protection bits (the
......@@ -399,8 +399,18 @@ static inline void compound_unlock_irqrestore(struct page *page,
static inline struct page *compound_head(struct page *page)
{
if (unlikely(PageTail(page)))
return page->first_page;
if (unlikely(PageTail(page))) {
struct page *head = page->first_page;
/*
* page->first_page may be a dangling pointer to an old
* compound page, so recheck that it is still a tail
* page before returning.
*/
smp_rmb();
if (likely(PageTail(page)))
return head;
}
return page;
}
......@@ -757,7 +767,7 @@ static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
{
return xchg(&page->_last_cpupid, cpupid);
return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK);
}
static inline int page_cpupid_last(struct page *page)
......@@ -766,7 +776,7 @@ static inline int page_cpupid_last(struct page *page)
}
static inline void page_cpupid_reset_last(struct page *page)
{
page->_last_cpupid = -1;
page->_last_cpupid = -1 & LAST_CPUPID_MASK;
}
#else
static inline int page_cpupid_last(struct page *page)
......
......@@ -424,111 +424,134 @@ void debug_dma_dump_mappings(struct device *dev)
EXPORT_SYMBOL(debug_dma_dump_mappings);
/*
* For each page mapped (initial page in the case of
* dma_alloc_coherent/dma_map_{single|page}, or each page in a
* scatterlist) insert into this tree using the pfn as the key. At
* For each mapping (initial cacheline in the case of
* dma_alloc_coherent/dma_map_page, initial cacheline in each page of a
* scatterlist, or the cacheline specified in dma_map_single) insert
* into this tree using the cacheline as the key. At
* dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If
* the pfn already exists at insertion time add a tag as a reference
* the entry already exists at insertion time add a tag as a reference
* count for the overlapping mappings. For now, the overlap tracking
* just ensures that 'unmaps' balance 'maps' before marking the pfn
* idle, but we should also be flagging overlaps as an API violation.
* just ensures that 'unmaps' balance 'maps' before marking the
* cacheline idle, but we should also be flagging overlaps as an API
* violation.
*
* Memory usage is mostly constrained by the maximum number of available
* dma-debug entries in that we need a free dma_debug_entry before
* inserting into the tree. In the case of dma_map_{single|page} and
* dma_alloc_coherent there is only one dma_debug_entry and one pfn to
* track per event. dma_map_sg(), on the other hand,
* consumes a single dma_debug_entry, but inserts 'nents' entries into
* the tree.
* inserting into the tree. In the case of dma_map_page and
* dma_alloc_coherent there is only one dma_debug_entry and one
* dma_active_cacheline entry to track per event. dma_map_sg(), on the
* other hand, consumes a single dma_debug_entry, but inserts 'nents'
* entries into the tree.
*
* At any time debug_dma_assert_idle() can be called to trigger a
* warning if the given page is in the active set.
* warning if any cachelines in the given page are in the active set.
*/
static RADIX_TREE(dma_active_pfn, GFP_NOWAIT);
static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
static DEFINE_SPINLOCK(radix_lock);
#define ACTIVE_PFN_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
static int active_pfn_read_overlap(unsigned long pfn)
static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry)
{
return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) +
(entry->offset >> L1_CACHE_SHIFT);
}
static int active_cacheline_read_overlap(phys_addr_t cln)
{
int overlap = 0, i;
for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
if (radix_tree_tag_get(&dma_active_pfn, pfn, i))
if (radix_tree_tag_get(&dma_active_cacheline, cln, i))
overlap |= 1 << i;
return overlap;
}
static int active_pfn_set_overlap(unsigned long pfn, int overlap)
static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
{
int i;
if (overlap > ACTIVE_PFN_MAX_OVERLAP || overlap < 0)
if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0)
return overlap;
for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
if (overlap & 1 << i)
radix_tree_tag_set(&dma_active_pfn, pfn, i);
radix_tree_tag_set(&dma_active_cacheline, cln, i);
else
radix_tree_tag_clear(&dma_active_pfn, pfn, i);
radix_tree_tag_clear(&dma_active_cacheline, cln, i);
return overlap;
}
static void active_pfn_inc_overlap(unsigned long pfn)
static void active_cacheline_inc_overlap(phys_addr_t cln)
{
int overlap = active_pfn_read_overlap(pfn);
int overlap = active_cacheline_read_overlap(cln);
overlap = active_pfn_set_overlap(pfn, ++overlap);
overlap = active_cacheline_set_overlap(cln, ++overlap);
/* If we overflowed the overlap counter then we're potentially
* leaking dma-mappings. Otherwise, if maps and unmaps are
* balanced then this overflow may cause false negatives in
* debug_dma_assert_idle() as the pfn may be marked idle
* debug_dma_assert_idle() as the cacheline may be marked idle
* prematurely.
*/
WARN_ONCE(overlap > ACTIVE_PFN_MAX_OVERLAP,
"DMA-API: exceeded %d overlapping mappings of pfn %lx\n",
ACTIVE_PFN_MAX_OVERLAP, pfn);
WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
"DMA-API: exceeded %d overlapping mappings of cacheline %pa\n",
ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
}
static int active_pfn_dec_overlap(unsigned long pfn)
static int active_cacheline_dec_overlap(phys_addr_t cln)
{
int overlap = active_pfn_read_overlap(pfn);
int overlap = active_cacheline_read_overlap(cln);
return active_pfn_set_overlap(pfn, --overlap);
return active_cacheline_set_overlap(cln, --overlap);
}
static int active_pfn_insert(struct dma_debug_entry *entry)
static int active_cacheline_insert(struct dma_debug_entry *entry)
{
phys_addr_t cln = to_cacheline_number(entry);
unsigned long flags;
int rc;
/* If the device is not writing memory then we don't have any
* concerns about the cpu consuming stale data. This mitigates
* legitimate usages of overlapping mappings.
*/
if (entry->direction == DMA_TO_DEVICE)
return 0;
spin_lock_irqsave(&radix_lock, flags);
rc = radix_tree_insert(&dma_active_pfn, entry->pfn, entry);
rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
if (rc == -EEXIST)
active_pfn_inc_overlap(entry->pfn);
active_cacheline_inc_overlap(cln);
spin_unlock_irqrestore(&radix_lock, flags);
return rc;
}
static void active_pfn_remove(struct dma_debug_entry *entry)
static void active_cacheline_remove(struct dma_debug_entry *entry)
{
phys_addr_t cln = to_cacheline_number(entry);
unsigned long flags;
/* ...mirror the insert case */
if (entry->direction == DMA_TO_DEVICE)
return;
spin_lock_irqsave(&radix_lock, flags);
/* since we are counting overlaps the final put of the
* entry->pfn will occur when the overlap count is 0.
* active_pfn_dec_overlap() returns -1 in that case
* cacheline will occur when the overlap count is 0.
* active_cacheline_dec_overlap() returns -1 in that case
*/
if (active_pfn_dec_overlap(entry->pfn) < 0)
radix_tree_delete(&dma_active_pfn, entry->pfn);
if (active_cacheline_dec_overlap(cln) < 0)
radix_tree_delete(&dma_active_cacheline, cln);
spin_unlock_irqrestore(&radix_lock, flags);
}
/**
* debug_dma_assert_idle() - assert that a page is not undergoing dma
* @page: page to lookup in the dma_active_pfn tree
* @page: page to lookup in the dma_active_cacheline tree
*
* Place a call to this routine in cases where the cpu touching the page
* before the dma completes (page is dma_unmapped) will lead to data
......@@ -536,22 +559,38 @@ static void active_pfn_remove(struct dma_debug_entry *entry)
*/
void debug_dma_assert_idle(struct page *page)
{
static struct dma_debug_entry *ents[CACHELINES_PER_PAGE];
struct dma_debug_entry *entry = NULL;
void **results = (void **) &ents;
unsigned int nents, i;
unsigned long flags;
struct dma_debug_entry *entry;
phys_addr_t cln;
if (!page)
return;
cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT;
spin_lock_irqsave(&radix_lock, flags);
entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page));
nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln,
CACHELINES_PER_PAGE);
for (i = 0; i < nents; i++) {
phys_addr_t ent_cln = to_cacheline_number(ents[i]);
if (ent_cln == cln) {
entry = ents[i];
break;
} else if (ent_cln >= cln + CACHELINES_PER_PAGE)
break;
}
spin_unlock_irqrestore(&radix_lock, flags);
if (!entry)
return;
cln = to_cacheline_number(entry);
err_printk(entry->dev, entry,
"DMA-API: cpu touching an active dma mapped page "
"[pfn=0x%lx]\n", entry->pfn);
"DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n",
&cln);
}
/*
......@@ -568,9 +607,9 @@ static void add_dma_entry(struct dma_debug_entry *entry)
hash_bucket_add(bucket, entry);
put_hash_bucket(bucket, &flags);
rc = active_pfn_insert(entry);
rc = active_cacheline_insert(entry);
if (rc == -ENOMEM) {
pr_err("DMA-API: pfn tracking ENOMEM, dma-debug disabled\n");
pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n");
global_disable = true;
}
......@@ -631,7 +670,7 @@ static void dma_entry_free(struct dma_debug_entry *entry)
{
unsigned long flags;
active_pfn_remove(entry);
active_cacheline_remove(entry);
/*
* add to beginning of the list - this way the entries are
......
......@@ -1253,8 +1253,10 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
node = indirect_to_ptr(node);
max_index = radix_tree_maxindex(node->height);
if (cur_index > max_index)
if (cur_index > max_index) {
rcu_read_unlock();
break;
}
cur_index = __locate(node, item, cur_index, &found_index);
rcu_read_unlock();
......
......@@ -1961,7 +1961,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
return ret;
}
#define VM_NO_THP (VM_SPECIAL|VM_MIXEDMAP|VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
......
......@@ -444,7 +444,7 @@ static void break_cow(struct rmap_item *rmap_item)
static struct page *page_trans_compound_anon(struct page *page)
{
if (PageTransCompound(page)) {
struct page *head = compound_trans_head(page);
struct page *head = compound_head(page);
/*
* head may actually be splitted and freed from under
* us but it's ok here.
......
......@@ -1127,8 +1127,8 @@ static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
* skipping css reference should be safe.
*/
if (next_css) {
if ((next_css->flags & CSS_ONLINE) &&
(next_css == &root->css || css_tryget(next_css)))
if ((next_css == &root->css) ||
((next_css->flags & CSS_ONLINE) && css_tryget(next_css)))
return mem_cgroup_from_css(next_css);
prev_css = next_css;
......@@ -6595,6 +6595,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup_event *event, *tmp;
struct cgroup_subsys_state *iter;
/*
* Unregister events and notify userspace.
......@@ -6611,7 +6612,14 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
kmem_cgroup_css_offline(memcg);
mem_cgroup_invalidate_reclaim_iterators(memcg);
mem_cgroup_reparent_charges(memcg);
/*
* This requires that offlining is serialized. Right now that is
* guaranteed because css_killed_work_fn() holds the cgroup_mutex.
*/
css_for_each_descendant_post(iter, css)
mem_cgroup_reparent_charges(mem_cgroup_from_css(iter));
mem_cgroup_destroy_all_caches(memcg);
vmpressure_cleanup(&memcg->vmpressure);
}
......
......@@ -1651,7 +1651,7 @@ int soft_offline_page(struct page *page, int flags)
{
int ret;
unsigned long pfn = page_to_pfn(page);
struct page *hpage = compound_trans_head(page);
struct page *hpage = compound_head(page);
if (PageHWPoison(page)) {
pr_info("soft offline: %#lx page already poisoned\n", pfn);
......
......@@ -369,9 +369,11 @@ void prep_compound_page(struct page *page, unsigned long order)
__SetPageHead(page);
for (i = 1; i < nr_pages; i++) {
struct page *p = page + i;
__SetPageTail(p);
set_page_count(p, 0);
p->first_page = page;
/* Make sure p->first_page is always valid for PageTail() */
smp_wmb();
__SetPageTail(p);
}
}
......@@ -1236,6 +1238,15 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
}
local_irq_restore(flags);
}
static bool gfp_thisnode_allocation(gfp_t gfp_mask)
{
return (gfp_mask & GFP_THISNODE) == GFP_THISNODE;
}
#else
static bool gfp_thisnode_allocation(gfp_t gfp_mask)
{
return false;
}
#endif
/*
......@@ -1572,7 +1583,13 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
get_pageblock_migratetype(page));
}
/*
* NOTE: GFP_THISNODE allocations do not partake in the kswapd
* aging protocol, so they can't be fair.
*/
if (!gfp_thisnode_allocation(gfp_flags))
__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
local_irq_restore(flags);
......@@ -1944,8 +1961,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
* ultimately fall back to remote zones that do not
* partake in the fairness round-robin cycle of this
* zonelist.
*
* NOTE: GFP_THISNODE allocations do not partake in
* the kswapd aging protocol, so they can't be fair.
*/
if (alloc_flags & ALLOC_WMARK_LOW) {
if ((alloc_flags & ALLOC_WMARK_LOW) &&
!gfp_thisnode_allocation(gfp_mask)) {
if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
continue;
if (!zone_local(preferred_zone, zone))
......@@ -2501,8 +2522,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* allowed per node queues are empty and that nodes are
* over allocated.
*/
if (IS_ENABLED(CONFIG_NUMA) &&
(gfp_mask & GFP_THISNODE) == GFP_THISNODE)
if (gfp_thisnode_allocation(gfp_mask))
goto nopage;
restart:
......
......@@ -98,7 +98,7 @@ static void put_compound_page(struct page *page)
}
/* __split_huge_page_refcount can run under us */
page_head = compound_trans_head(page);
page_head = compound_head(page);
/*
* THP can not break up slab pages so avoid taking
......@@ -253,7 +253,7 @@ bool __get_page_tail(struct page *page)
*/
unsigned long flags;
bool got;
struct page *page_head = compound_trans_head(page);
struct page *page_head = compound_head(page);
/* Ref to put_compound_page() comment. */
if (!__compound_tail_refcounted(page_head)) {
......
......@@ -257,7 +257,7 @@ case "$arg" in
&& compr="lzop -9 -f"
echo "$output_file" | grep -q "\.lz4$" \
&& [ -x "`which lz4 2> /dev/null`" ] \
&& compr="lz4 -9 -f"
&& compr="lz4 -l -9 -f"
echo "$output_file" | grep -q "\.cpio$" && compr="cat"
shift
;;
......
......@@ -330,8 +330,7 @@ static void write_src(void)
printf("\tPTR\t_text + %#llx\n",
table[i].addr - _text);
else
printf("\tPTR\t_text - %#llx\n",
_text - table[i].addr);
printf("\tPTR\t%#llx\n", table[i].addr);
} else {
printf("\tPTR\t%#llx\n", table[i].addr);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment