Commit 41d4a4f8 authored by Kirill Smelkov's avatar Kirill Smelkov

bigfile/virtmem: Do storeblk() with virtmem lock released

to avoid deadlocks.

Description is in the last patch.

Fixes: #6

/cc @Tyagov, @klaus, @jm
/reviewed-on !2
parents e73e22ea fb4bfb32
...@@ -319,8 +319,8 @@ PyFunc(pyfileh_isdirty, "isdirty() - are there any changes to fileh memory at al ...@@ -319,8 +319,8 @@ PyFunc(pyfileh_isdirty, "isdirty() - are there any changes to fileh memory at al
if (!PyArg_ParseTuple(args, "")) if (!PyArg_ParseTuple(args, ""))
return NULL; return NULL;
/* NOTE not strictly neccessary to virt_lock() for reading ->dirty */ /* NOTE not strictly neccessary to virt_lock() for checking ->dirty_pages not empty */
return PyBool_FromLong(pyfileh->dirty); return PyBool_FromLong(!list_empty(&pyfileh->dirty_pages));
} }
......
...@@ -51,6 +51,7 @@ Page *ramh_alloc_page(RAMH *ramh, pgoff_t pgoffset_hint) ...@@ -51,6 +51,7 @@ Page *ramh_alloc_page(RAMH *ramh, pgoff_t pgoffset_hint)
page->ramh = ramh; page->ramh = ramh;
page->ramh_pgoffset = ramh_pgoffset; page->ramh_pgoffset = ramh_pgoffset;
INIT_LIST_HEAD(&page->lru); /* NOTE ->lru left unlinked */ INIT_LIST_HEAD(&page->lru); /* NOTE ->lru left unlinked */
INIT_LIST_HEAD(&page->in_dirty); /* initially not in dirty list */
page->refcnt = 0; page->refcnt = 0;
return page; return page;
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# #
# See COPYING file for full licensing terms. # See COPYING file for full licensing terms.
from wendelin.bigfile import BigFile from wendelin.bigfile import BigFile, WRITEOUT_STORE
from threading import Thread, Lock from threading import Thread, Lock
from time import sleep from time import sleep
...@@ -68,7 +68,17 @@ PS = 2*MB ...@@ -68,7 +68,17 @@ PS = 2*MB
# V -> loadblk # V -> loadblk
# Z <- ClientStorage.invalidateTransaction() # Z <- ClientStorage.invalidateTransaction()
# Z -> zeo.load # Z -> zeo.load
# V <- fileh_invalidate_page # V <- fileh_invalidate_page (possibly of unrelated page)
#
# --------
# and similarly for storeblk:
#
# T1 T2
#
# commit same as ^^^
# V -> storeblk
#
# Z -> zeo.store
def test_thread_lock_vs_virtmem_lock(): def test_thread_lock_vs_virtmem_lock():
Z = Lock() Z = Lock()
c12 = NotifyChannel() # T1 -> T2 c12 = NotifyChannel() # T1 -> T2
...@@ -77,14 +87,12 @@ def test_thread_lock_vs_virtmem_lock(): ...@@ -77,14 +87,12 @@ def test_thread_lock_vs_virtmem_lock():
class ZLockBigFile(BigFile): class ZLockBigFile(BigFile):
def __new__(cls, blksize): def __new__(cls, blksize):
obj = BigFile.__new__(cls, blksize) obj = BigFile.__new__(cls, blksize)
obj.cycle = 0
return obj return obj
def loadblk(self, blk, buf): def Zsync_and_lockunlock(self):
tell, wait = c12.tell, c21.wait tell, wait = c12.tell, c21.wait
# on the first cycle we synchronize with invalidate in T2 # synchronize with invalidate in T2
if self.cycle == 0:
tell('T1-V-under') tell('T1-V-under')
wait('T2-Z-taken') wait('T2-Z-taken')
...@@ -93,27 +101,42 @@ def test_thread_lock_vs_virtmem_lock(): ...@@ -93,27 +101,42 @@ def test_thread_lock_vs_virtmem_lock():
Z.acquire() Z.acquire()
Z.release() Z.release()
self.cycle += 1 def loadblk(self, blk, buf):
self.Zsync_and_lockunlock()
def storeblk(self, blk, buf):
self.Zsync_and_lockunlock()
f = ZLockBigFile(PS) f = ZLockBigFile(PS)
fh = f.fileh_open() fh = f.fileh_open()
fh2 = f.fileh_open()
vma = fh.mmap(0, 1) vma = fh.mmap(0, 1)
m = memoryview(vma) m = memoryview(vma)
def T1(): def T1():
m[0] # calls ZLockBigFile.loadblk() m[0] # calls ZLockBigFile.loadblk()
tell, wait = c12.tell, c21.wait
wait('T2-Z-released')
m[0] = bord_py3(b'1') # make page dirty
fh.dirty_writeout(WRITEOUT_STORE) # calls ZLockBigFile.storeblk()
def T2(): def T2():
tell, wait = c21.tell, c12.wait tell, wait = c21.tell, c12.wait
# cycle 0: vs loadblk in T0
# cycle 1: vs storeblk in T0
for _ in range(2):
wait('T1-V-under') wait('T1-V-under')
Z.acquire() Z.acquire()
tell('T2-Z-taken') tell('T2-Z-taken')
fh.invalidate_page(0) fh2.invalidate_page(0) # NOTE invalidating page _not_ of fh
Z.release() Z.release()
tell('T2-Z-released')
t1, t2 = Thread(target=T1), Thread(target=T2) t1, t2 = Thread(target=T1), Thread(target=T2)
t1.start(); t2.start() t1.start(); t2.start()
...@@ -185,7 +208,7 @@ def test_thread_multiaccess_parallel(): ...@@ -185,7 +208,7 @@ def test_thread_multiaccess_parallel():
t1.join(); t2.join() t1.join(); t2.join()
# loading vs invalidate in another thread # loading vs invalidate of same page in another thread
def test_thread_load_vs_invalidate(): def test_thread_load_vs_invalidate():
c12 = NotifyChannel() # T1 -> T2 c12 = NotifyChannel() # T1 -> T2
c21 = NotifyChannel() # T2 -> T1 c21 = NotifyChannel() # T2 -> T1
......
This diff is collapsed.
...@@ -149,6 +149,8 @@ int fileh_open(BigFileH *fileh, BigFile *file, RAM *ram) ...@@ -149,6 +149,8 @@ int fileh_open(BigFileH *fileh, BigFile *file, RAM *ram)
fileh->file = file; fileh->file = file;
INIT_LIST_HEAD(&fileh->mmaps); INIT_LIST_HEAD(&fileh->mmaps);
INIT_LIST_HEAD(&fileh->dirty_pages);
fileh->writeout_inprogress = 0;
pagemap_init(&fileh->pagemap, ilog2_exact(ram->pagesize)); pagemap_init(&fileh->pagemap, ilog2_exact(ram->pagesize));
out: out:
...@@ -171,6 +173,9 @@ void fileh_close(BigFileH *fileh) ...@@ -171,6 +173,9 @@ void fileh_close(BigFileH *fileh)
// fileh, but mapping exists - real fileh release is delayed to last unmap ? // fileh, but mapping exists - real fileh release is delayed to last unmap ?
BUG_ON(!list_empty(&fileh->mmaps)); BUG_ON(!list_empty(&fileh->mmaps));
/* it's an error to close fileh while writeout is in progress */
BUG_ON(fileh->writeout_inprogress);
/* drop all pages (dirty or not) associated with this fileh */ /* drop all pages (dirty or not) associated with this fileh */
pagemap_for_each(page, &fileh->pagemap) { pagemap_for_each(page, &fileh->pagemap) {
/* it's an error to close fileh to mapping of which an access is /* it's an error to close fileh to mapping of which an access is
...@@ -182,6 +187,8 @@ void fileh_close(BigFileH *fileh) ...@@ -182,6 +187,8 @@ void fileh_close(BigFileH *fileh)
free(page); free(page);
} }
BUG_ON(!list_empty(&fileh->dirty_pages));
/* and clear pagemap */ /* and clear pagemap */
pagemap_clear(&fileh->pagemap); pagemap_clear(&fileh->pagemap);
...@@ -296,11 +303,24 @@ void vma_unmap(VMA *vma) ...@@ -296,11 +303,24 @@ void vma_unmap(VMA *vma)
* WRITEOUT / DISCARD * * WRITEOUT / DISCARD *
**********************/ **********************/
/* helper for sorting dirty pages by ->f_pgoffset */
static int hpage_indirty_cmp_bypgoffset(struct list_head *hpage1, struct list_head *hpage2, void *_)
{
Page *page1 = list_entry(hpage1, typeof(*page1), in_dirty);
Page *page2 = list_entry(hpage2, typeof(*page2), in_dirty);
if (page1->f_pgoffset < page2->f_pgoffset)
return -1;
if (page1->f_pgoffset > page2->f_pgoffset)
return +1;
return 0;
}
int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags) int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags)
{ {
Page *page; Page *page;
BigFile *file = fileh->file; BigFile *file = fileh->file;
struct list_head *hmmap; struct list_head *hpage, *hpage_next, *hmmap;
sigset_t save_sigset; sigset_t save_sigset;
int err = 0; int err = 0;
...@@ -312,12 +332,18 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags) ...@@ -312,12 +332,18 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags)
sigsegv_block(&save_sigset); sigsegv_block(&save_sigset);
virt_lock(); virt_lock();
/* concurrent writeouts are not allowed */
BUG_ON(fileh->writeout_inprogress);
fileh->writeout_inprogress = 1;
/* pages are stored (if stored) in sorted order */
if (flags & WRITEOUT_STORE)
list_sort(&fileh->dirty_pages, hpage_indirty_cmp_bypgoffset, NULL);
/* write out dirty pages */ /* write out dirty pages */
pagemap_for_each(page, &fileh->pagemap) { list_for_each_safe(hpage, hpage_next, &fileh->dirty_pages) {
/* XXX we scan whole file pages which could be slow page = list_entry(hpage, typeof(*page), in_dirty);
* TODO -> maintain something like separate dirty_list ? */ BUG_ON(page->state != PAGE_DIRTY);
if (page->state != PAGE_DIRTY)
continue;
/* ->storeblk() */ /* ->storeblk() */
if (flags & WRITEOUT_STORE) { if (flags & WRITEOUT_STORE) {
...@@ -325,34 +351,28 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags) ...@@ -325,34 +351,28 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags)
blk_t blk = page->f_pgoffset; // NOTE assumes blksize = pagesize blk_t blk = page->f_pgoffset; // NOTE assumes blksize = pagesize
void *pagebuf; void *pagebuf;
int mapped_tmp = 0;
if (!page->refcnt) { /* mmap page temporarily somewhere
/* page not mmaped anywhere - mmap it temporarily somewhere */ *
* ( we cannot use present page mapping in some vma directly,
* because while storeblk is called with virtmem lock released that
* mapping can go away ) */
pagebuf = page_mmap(page, NULL, PROT_READ); pagebuf = page_mmap(page, NULL, PROT_READ);
TODO(!pagebuf); // XXX err TODO(!pagebuf); // XXX err
mapped_tmp = 1;
}
else { /* unlock virtmem before calling storeblk()
/* some vma mmaps page - use that memory directly */ *
* that call is potentially slow and external code can take other
/* XXX this assumes there is small #vma and is ugly - in general it * locks. If that "other locks" are also taken before external code
* should be simpler via back-pointers from page? */ * calls e.g. fileh_invalidate_page() in different codepath a deadlock
pagebuf = NULL; * can happen. (similar to loadblk case) */
list_for_each(hmmap, &fileh->mmaps) { virt_unlock();
VMA *vma = list_entry(hmmap, typeof(*vma), same_fileh);
if (vma_page_ismapped(vma, page)) {
pagebuf = vma_page_addr(vma, page);
break;
}
}
BUG_ON(!pagebuf);
}
err = file->file_ops->storeblk(file, blk, pagebuf); err = file->file_ops->storeblk(file, blk, pagebuf);
if (mapped_tmp) /* relock virtmem */
virt_lock();
xmunmap(pagebuf, page_size(page)); xmunmap(pagebuf, page_size(page));
if (err) if (err)
...@@ -362,7 +382,7 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags) ...@@ -362,7 +382,7 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags)
/* page.state -> PAGE_LOADED and correct mappings RW -> R */ /* page.state -> PAGE_LOADED and correct mappings RW -> R */
if (flags & WRITEOUT_MARKSTORED) { if (flags & WRITEOUT_MARKSTORED) {
page->state = PAGE_LOADED; page->state = PAGE_LOADED;
fileh->dirty--; list_del_init(&page->in_dirty);
list_for_each(hmmap, &fileh->mmaps) { list_for_each(hmmap, &fileh->mmaps) {
VMA *vma = list_entry(hmmap, typeof(*vma), same_fileh); VMA *vma = list_entry(hmmap, typeof(*vma), same_fileh);
...@@ -375,7 +395,9 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags) ...@@ -375,7 +395,9 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags)
/* if we successfully finished with markstored flag set - all dirty pages /* if we successfully finished with markstored flag set - all dirty pages
* should become non-dirty */ * should become non-dirty */
if (flags & WRITEOUT_MARKSTORED) if (flags & WRITEOUT_MARKSTORED)
BUG_ON(fileh->dirty); BUG_ON(!list_empty(&fileh->dirty_pages));
fileh->writeout_inprogress = 0;
out: out:
virt_unlock(); virt_unlock();
...@@ -387,18 +409,23 @@ out: ...@@ -387,18 +409,23 @@ out:
void fileh_dirty_discard(BigFileH *fileh) void fileh_dirty_discard(BigFileH *fileh)
{ {
Page *page; Page *page;
struct list_head *hpage, *hpage_next;
sigset_t save_sigset; sigset_t save_sigset;
sigsegv_block(&save_sigset); sigsegv_block(&save_sigset);
virt_lock(); virt_lock();
/* XXX we scan whole file pages which could be slow /* discard is not allowed to run in parallel to writeout */
* TODO -> maintain something like separate dirty_list ? */ BUG_ON(fileh->writeout_inprogress);
pagemap_for_each(page, &fileh->pagemap)
if (page->state == PAGE_DIRTY) list_for_each_safe(hpage, hpage_next, &fileh->dirty_pages) {
page = list_entry(hpage, typeof(*page), in_dirty);
BUG_ON(page->state != PAGE_DIRTY);
page_drop_memory(page); page_drop_memory(page);
}
BUG_ON(fileh->dirty); BUG_ON(!list_empty(&fileh->dirty_pages));
virt_unlock(); virt_unlock();
sigsegv_restore(&save_sigset); sigsegv_restore(&save_sigset);
...@@ -417,6 +444,9 @@ void fileh_invalidate_page(BigFileH *fileh, pgoff_t pgoffset) ...@@ -417,6 +444,9 @@ void fileh_invalidate_page(BigFileH *fileh, pgoff_t pgoffset)
sigsegv_block(&save_sigset); sigsegv_block(&save_sigset);
virt_lock(); virt_lock();
/* it's an error to invalidate fileh while writeout is in progress */
BUG_ON(fileh->writeout_inprogress);
page = pagemap_get(&fileh->pagemap, pgoffset); page = pagemap_get(&fileh->pagemap, pgoffset);
if (page) { if (page) {
/* for pages where loading is in progress, we just remove the page from /* for pages where loading is in progress, we just remove the page from
...@@ -639,7 +669,7 @@ VMFaultResult vma_on_pagefault(VMA *vma, uintptr_t addr, int write) ...@@ -639,7 +669,7 @@ VMFaultResult vma_on_pagefault(VMA *vma, uintptr_t addr, int write)
* that call is potentially slow and external code can take other * that call is potentially slow and external code can take other
* locks. If that "other locks" are also taken before external code * locks. If that "other locks" are also taken before external code
* calls e.g. fileh_invalidate_page() in different codepath a deadlock * calls e.g. fileh_invalidate_page() in different codepath a deadlock
* can happen. */ * can happen. (similar to storeblk case) */
page->state = PAGE_LOADING; page->state = PAGE_LOADING;
virt_unlock(); virt_unlock();
...@@ -721,8 +751,12 @@ VMFaultResult vma_on_pagefault(VMA *vma, uintptr_t addr, int write) ...@@ -721,8 +751,12 @@ VMFaultResult vma_on_pagefault(VMA *vma, uintptr_t addr, int write)
} }
// XXX also call page->markdirty() ? // XXX also call page->markdirty() ?
if (newstate == PAGE_DIRTY && newstate != page->state) if (newstate == PAGE_DIRTY && newstate != page->state) {
fileh->dirty++; /* it is not allowed to modify pages while writeout is in progress */
BUG_ON(fileh->writeout_inprogress);
list_add_tail(&page->in_dirty, &fileh->dirty_pages);
}
page->state = max(page->state, newstate); page->state = max(page->state, newstate);
/* mark page as used recently */ /* mark page as used recently */
...@@ -838,6 +872,8 @@ static void page_drop_memory(Page *page) ...@@ -838,6 +872,8 @@ static void page_drop_memory(Page *page)
/* NOTE we try not to drop memory for loading-in-progress pages. /* NOTE we try not to drop memory for loading-in-progress pages.
* so if this is called for such a page - it is a bug. */ * so if this is called for such a page - it is a bug. */
BUG_ON(page->state == PAGE_LOADING); BUG_ON(page->state == PAGE_LOADING);
/* same for storing-in-progress */
BUG_ON(page->fileh->writeout_inprogress && page->state == PAGE_DIRTY);
if (page->state == PAGE_EMPTY) if (page->state == PAGE_EMPTY)
return; return;
...@@ -850,7 +886,7 @@ static void page_drop_memory(Page *page) ...@@ -850,7 +886,7 @@ static void page_drop_memory(Page *page)
/* 2) release memory to ram */ /* 2) release memory to ram */
ramh_drop_memory(page->ramh, page->ramh_pgoffset); ramh_drop_memory(page->ramh, page->ramh_pgoffset);
if (page->state == PAGE_DIRTY) if (page->state == PAGE_DIRTY)
page->fileh->dirty--; list_del_init(&page->in_dirty);
page->state = PAGE_EMPTY; page->state = PAGE_EMPTY;
// XXX touch lru? // XXX touch lru?
......
...@@ -65,10 +65,11 @@ struct BigFileH { ...@@ -65,10 +65,11 @@ struct BigFileH {
PageMap pagemap; PageMap pagemap;
// XXX not sure we need this /* fileh dirty pages */
// -> currently is used to know whether to join ZODB DataManager serving ZBigFile struct list_head dirty_pages; /* _ -> page->in_dirty */
// XXX maybe change into dirty_list in the future?
unsigned dirty; /* whether writeout is currently in progress */
int writeout_inprogress;
}; };
typedef struct BigFileH BigFileH; typedef struct BigFileH BigFileH;
...@@ -99,6 +100,9 @@ struct Page { ...@@ -99,6 +100,9 @@ struct Page {
/* in recently-used pages for ramh->ram (ram->lru_list -> _) */ /* in recently-used pages for ramh->ram (ram->lru_list -> _) */
struct list_head lru; struct list_head lru;
/* in dirty pages for fileh (fileh->dirty_pages -> _) */
struct list_head in_dirty;
int refcnt; /* each mapping in a vma counts here */ int refcnt; /* each mapping in a vma counts here */
}; };
typedef struct Page Page; typedef struct Page Page;
...@@ -152,6 +156,7 @@ int fileh_open(BigFileH *fileh, BigFile *file, RAM *ram); ...@@ -152,6 +156,7 @@ int fileh_open(BigFileH *fileh, BigFile *file, RAM *ram);
/* close fileh /* close fileh
* *
* it's an error to call fileh_close with existing mappings * it's an error to call fileh_close with existing mappings
* it's an error to call fileh_close while writeout for fileh is in progress
*/ */
void fileh_close(BigFileH *fileh); void fileh_close(BigFileH *fileh);
...@@ -204,6 +209,12 @@ enum WriteoutFlags { ...@@ -204,6 +209,12 @@ enum WriteoutFlags {
* *
* No guarantee is made about atomicity - e.g. if this call fails, some * No guarantee is made about atomicity - e.g. if this call fails, some
* pages could be written and some left in memory in dirty state. * pages could be written and some left in memory in dirty state.
*
* it's an error for a given fileh to call several fileh_dirty_writeout() in
* parallel.
*
* it's an error for a given fileh to modify its pages while writeout is in
* progress: until fileh_dirty_writeout(... | WRITEOUT_STORE) has finished.
*/ */
int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags); int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags);
...@@ -215,6 +226,9 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags); ...@@ -215,6 +226,9 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags);
* - it is unmapped from all mmaps; * - it is unmapped from all mmaps;
* - its content is discarded; * - its content is discarded;
* - its backing memory is released to OS. * - its backing memory is released to OS.
*
* it's an error for a given fileh to call fileh_dirty_discard() while writeout
* is in progress.
*/ */
void fileh_dirty_discard(BigFileH *fileh); void fileh_dirty_discard(BigFileH *fileh);
...@@ -229,6 +243,9 @@ void fileh_dirty_discard(BigFileH *fileh); ...@@ -229,6 +243,9 @@ void fileh_dirty_discard(BigFileH *fileh);
* *
* ( Such invalidation is needed to synchronize fileh memory, when we know a * ( Such invalidation is needed to synchronize fileh memory, when we know a
* file was changed externally ) * file was changed externally )
*
* it's an error to call fileh_invalidate_page() while writeout for fileh is in
* progress.
*/ */
void fileh_invalidate_page(BigFileH *fileh, pgoff_t pgoffset); void fileh_invalidate_page(BigFileH *fileh, pgoff_t pgoffset);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment