Commit 986cf86e authored by Kirill Smelkov's avatar Kirill Smelkov

wcfs: client: Provide virtmem integration

Provide integration with virtmem, so that WCFS Mapping can be associated
and managed under virtmem VMA. In other words provide support so that WCFS can
be used as ZBigFile backend in "mmap overlay" mode (see fae045cc "bigfile/virtmem:
Introduce "mmap overlay" mode" for description of mmap-overlay mode).

We'll need this functionality for ZBigFile + WCFS client integration.

Virtmem integration will be tested via running whole wendelin.core functional
testsuite in wcfs-mode after the next patch.

Quoting added description:

---- 8< ----

Integration with wendelin.core virtmem layer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This client package can be used standalone, but additionally provides
integration with wendelin.core userspace virtual memory manager: when a
Mapping is created, it can be associated as serving base layer for a
particular virtmem VMA via FileH.mmap(vma=...). In that case, since virtmem
itself adds another layer of dirty pages over read-only base provided by
Mapping(+)

                 ┌──┐                      ┌──┐
                 │RW│                      │RW│    ← virtmem VMA dirty pages
                 └──┘                      └──┘
                           +
                                                   VMA base = X@at view provided by Mapping:

                                          ___        /@revA/bigfile/X
        __                                           /@revB/bigfile/X
               _                                     /@revC/bigfile/X
                           +                         ...
     ───  ───── ──────────────────────────   ─────   /head/bigfile/X

the Mapping will interact with virtmem layer to coordinate
updates to mapping virtual memory.

How it works
~~~~~~~~~~~~

Wcfs client integrates with virtmem layer to support virtmem handle
dirtying pages of read-only base-layer that wcfs client provides via
isolated Mapping. For wcfs-backed bigfiles every virtmem VMA is interlinked
with Mapping:

      VMA     -> BigFileH -> ZBigFile -----> Z
       ↑↓                                    O
     Mapping  -> FileH    -> wcfs server --> DB

When a page is write-accessed, virtmem mmaps in a page of RAM in place of
accessed virtual memory, copies base-layer content provided by Mapping into
there, and marks that page as read-write.

Upon receiving pin message, the pinner consults virtmem, whether
corresponding page was already dirtied in virtmem's BigFileH (call to
__fileh_page_isdirty), and if it was, the pinner does not remmap Mapping
part to wcfs/@revX/f and just leaves dirty page in its place, remembering
pin information in fileh._pinned.

Once dirty pages are no longer needed (either after discard/abort or
writeout/commit), virtmem asks wcfs client to remmap corresponding regions
of Mapping in its place again via calls to Mapping.remmap_blk for previously
dirtied blocks.

The scheme outlined above does not need to split Mapping upon dirtying an
inner page.

See bigfile_ops interface (wendelin/bigfile/file.h) that explains base-layer
and overlaying from virtmem point of view. For wcfs this interface is
provided by small wcfs client wrapper in bigfile/file_zodb.cpp.

(+) see bigfile_ops interface (wendelin/bigfile/file.h) that gives virtmem
    point of view on layering.

----------------------------------------

Some preliminary history:

kirr/wendelin.core@f330bd2f    X wcfs/client: Overview += interaction with virtmem layer
parent e11edc70
...@@ -60,8 +60,7 @@ static int __ram_reclaim(RAM *ram); ...@@ -60,8 +60,7 @@ static int __ram_reclaim(RAM *ram);
/* global lock which protects manipulating virtmem data structures /* global lock which protects manipulating virtmem data structures
* *
* NOTE not scalable, but this is temporary solution - as we are going to move * NOTE not scalable. */
* memory management back into the kernel, where it is done properly. */
static pthread_mutex_t virtmem_lock = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; static pthread_mutex_t virtmem_lock = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
static const VirtGilHooks *virtmem_gilhooks; static const VirtGilHooks *virtmem_gilhooks;
......
...@@ -94,7 +94,7 @@ struct bigfile_ops { ...@@ -94,7 +94,7 @@ struct bigfile_ops {
* dirtied pages that are layed over base data layer provided by the * dirtied pages that are layed over base data layer provided by the
* mappings. * mappings.
* *
* The primary user of this functionality will be wcfs - virtual filesystem that * The primary user of this functionality is wcfs - virtual filesystem that
* provides access to ZBigFile data via OS-level files(*). The layering can * provides access to ZBigFile data via OS-level files(*). The layering can
* be schematically depicted as follows * be schematically depicted as follows
* *
......
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
* are dirtied. The mode in which BigFile handle is opened is specified via * are dirtied. The mode in which BigFile handle is opened is specified via
* fileh_open(flags=...). * fileh_open(flags=...).
* *
* The primary user of "mmap overlay" functionality will be wcfs - virtual * The primary user of "mmap overlay" functionality is wcfs - virtual
* filesystem that provides access to ZBigFile data via OS-level files(*). * filesystem that provides access to ZBigFile data via OS-level files(*).
* *
* (*) see wcfs/client/wcfs.h and wcfs/wcfs.go * (*) see wcfs/client/wcfs.h and wcfs/wcfs.go
...@@ -171,7 +171,7 @@ struct VMA { ...@@ -171,7 +171,7 @@ struct VMA {
* MMAP_OVERLAY flag. bigfile_ops.mmap_setup_read can initialize this to * MMAP_OVERLAY flag. bigfile_ops.mmap_setup_read can initialize this to
* object pointer specific to serving created base overlay mapping. * object pointer specific to serving created base overlay mapping.
* *
* For example WCFS will use this to link VMA -> wcfs.Mapping to know which * For example WCFS uses this to link VMA -> wcfs.Mapping to know which
* wcfs-specific mapping is serving particular virtmem VMA. * wcfs-specific mapping is serving particular virtmem VMA.
* *
* NULL for VMAs created from under DONT_MMAP_OVERLAY fileh. */ * NULL for VMAs created from under DONT_MMAP_OVERLAY fileh. */
......
...@@ -318,7 +318,8 @@ setup( ...@@ -318,7 +318,8 @@ setup(
['wcfs/client/wcfs.cpp', ['wcfs/client/wcfs.cpp',
'wcfs/client/wcfs_watchlink.cpp', 'wcfs/client/wcfs_watchlink.cpp',
'wcfs/client/wcfs_misc.cpp'], 'wcfs/client/wcfs_misc.cpp'],
depends = libwcfs_h)], depends = libvirtmem_h + libwcfs_h,
dsos = ['wendelin.bigfile.libvirtmem'])],
ext_modules = [ ext_modules = [
PyGoExt('wendelin.bigfile._bigfile', PyGoExt('wendelin.bigfile._bigfile',
...@@ -333,14 +334,14 @@ setup( ...@@ -333,14 +334,14 @@ setup(
PyGoExt('wendelin.wcfs.client._wcfs', PyGoExt('wendelin.wcfs.client._wcfs',
['wcfs/client/_wcfs.pyx'], ['wcfs/client/_wcfs.pyx'],
depends = libwcfs_h, depends = libwcfs_h + libvirtmem_h,
dsos = ['wendelin.wcfs.client.libwcfs']), dsos = ['wendelin.wcfs.client.libwcfs']),
PyGoExt('wendelin.wcfs.client._wczsync', PyGoExt('wendelin.wcfs.client._wczsync',
['wcfs/client/_wczsync.pyx'], ['wcfs/client/_wczsync.pyx'],
depends = [ depends = [
'wcfs/client/_wcfs.pxd', 'wcfs/client/_wcfs.pxd',
] + libwcfs_h, ] + libwcfs_h + libvirtmem_h,
dsos = ['wendelin.wcfs.client.libwcfs']), dsos = ['wendelin.wcfs.client.libwcfs']),
PyGoExt('wendelin.wcfs.internal.wcfs_test', PyGoExt('wendelin.wcfs.internal.wcfs_test',
......
This diff is collapsed.
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
// See COPYING file for full licensing terms. // See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options. // See https://www.nexedi.com/licensing for rationale and options.
// Package wcfs provides WCFS client. // Package wcfs provides WCFS client integrated with user-space virtual memory manager.
// //
// This client package takes care about WCFS isolation protocol details and // This client package takes care about WCFS isolation protocol details and
// provides to clients simple interface to isolated view of bigfile data on // provides to clients simple interface to isolated view of bigfile data on
...@@ -46,6 +46,31 @@ ...@@ -46,6 +46,31 @@
// to maintain X@at data view according to WCFS isolation protocol(*). // to maintain X@at data view according to WCFS isolation protocol(*).
// //
// //
// Integration with wendelin.core virtmem layer
//
// This client package can be used standalone, but additionally provides
// integration with wendelin.core userspace virtual memory manager: when a
// Mapping is created, it can be associated as serving base layer for a
// particular virtmem VMA via FileH.mmap(vma=...). In that case, since virtmem
// itself adds another layer of dirty pages over read-only base provided by
// Mapping(+)
//
// ┌──┐ ┌──┐
// │RW│ │RW│ ← virtmem VMA dirty pages
// └──┘ └──┘
// +
// VMA base = X@at view provided by Mapping:
//
// ___ /@revA/bigfile/X
// __ /@revB/bigfile/X
// _ /@revC/bigfile/X
// + ...
// ─── ───── ────────────────────────── ───── /head/bigfile/X
//
// the Mapping will interact with virtmem layer to coordinate
// updates to mapping virtual memory.
//
//
// API overview // API overview
// //
// - `WCFS` represents filesystem-level connection to wcfs server. // - `WCFS` represents filesystem-level connection to wcfs server.
...@@ -67,6 +92,8 @@ ...@@ -67,6 +92,8 @@
// -------- // --------
// //
// (*) see wcfs.go documentation for WCFS isolation protocol overview and details. // (*) see wcfs.go documentation for WCFS isolation protocol overview and details.
// (+) see bigfile_ops interface (wendelin/bigfile/file.h) that gives virtmem
// point of view on layering.
#ifndef _NXD_WCFS_H_ #ifndef _NXD_WCFS_H_
#define _NXD_WCFS_H_ #define _NXD_WCFS_H_
...@@ -79,6 +106,12 @@ ...@@ -79,6 +106,12 @@
#include <utility> #include <utility>
#include "wcfs_misc.h" #include "wcfs_misc.h"
#include <wendelin/bug.h>
// from wendelin/bigfile/virtmem.h
extern "C" {
struct VMA;
}
// wcfs:: // wcfs::
...@@ -230,7 +263,7 @@ public: ...@@ -230,7 +263,7 @@ public:
public: public:
error close(); error close();
pair<Mapping, error> mmap(int64_t blk_start, int64_t blk_len); pair<Mapping, error> mmap(int64_t blk_start, int64_t blk_len, VMA *vma=nil);
string String() const; string String() const;
error _open(); error _open();
...@@ -252,16 +285,18 @@ struct _Mapping : object { ...@@ -252,16 +285,18 @@ struct _Mapping : object {
// protected by fileh._mmapMu // protected by fileh._mmapMu
uint8_t *mem_start; // mmapped memory [mem_start, mem_stop) uint8_t *mem_start; // mmapped memory [mem_start, mem_stop)
uint8_t *mem_stop; uint8_t *mem_stop;
VMA *vma; // mmapped under this virtmem VMA | nil if created standalone from virtmem
bool efaulted; // y after mapping was switched to be invalid (gives SIGSEGV on access) bool efaulted; // y after mapping was switched to be invalid (gives SIGSEGV on access)
int64_t blk_stop() const { int64_t blk_stop() const {
if (!((mem_stop - mem_start) % fileh->blksize == 0)) ASSERT((mem_stop - mem_start) % fileh->blksize == 0);
panic("len(mmap) % fileh.blksize != 0");
return blk_start + (mem_stop - mem_start) / fileh->blksize; return blk_start + (mem_stop - mem_start) / fileh->blksize;
} }
error remmap_blk(int64_t blk); // for virtmem-only
error unmap(); error unmap();
void _assertVMAOk();
error _remmapblk(int64_t blk, zodb::Tid at); error _remmapblk(int64_t blk, zodb::Tid at);
error __remmapAsEfault(); error __remmapAsEfault();
error __remmapBlkAsEfault(int64_t blk); error __remmapBlkAsEfault(int64_t blk);
...@@ -270,7 +305,7 @@ struct _Mapping : object { ...@@ -270,7 +305,7 @@ struct _Mapping : object {
private: private:
_Mapping(); _Mapping();
~_Mapping(); ~_Mapping();
friend pair<Mapping, error> _FileH::mmap(int64_t blk_start, int64_t blk_len); friend pair<Mapping, error> _FileH::mmap(int64_t blk_start, int64_t blk_len, VMA *vma);
public: public:
void decref(); void decref();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment