virtmem.h 8.56 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
#ifndef _WENDELIN_BIGFILE_VIRTMEM_H_
#define _WENDELIN_BIGFILE_VIRTMEM_H_

/* Wendelin.bigfile | Virtual memory
 * Copyright (C) 2014-2015  Nexedi SA and Contributors.
 *                          Kirill Smelkov <kirr@nexedi.com>
 *
 * This program is free software: you can Use, Study, Modify and Redistribute
 * it under the terms of the GNU General Public License version 3, or (at your
 * option) any later version, as published by the Free Software Foundation.
 *
 * You can also Link and Combine this program with other software covered by
 * the terms of any of the Open Source Initiative approved licenses and Convey
 * the resulting work. Corresponding source of such a combination shall include
 * the source code for all other software used.
 *
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See COPYING file for full licensing terms.
21 22 23 24 25 26 27 28 29
 *
 * ~~~~~~~~
 *
 * Virtual memory connects BigFile content and RAM pages into file memory
 * mappings.
 *
 * Read access to mapped pages cause their on-demand loading, and write access
 * marks modified pages as dirty. Dirty pages then can be on request either
 * written out back to file or discarded.
30 31 32 33
 */

#include <stdint.h>
#include <wendelin/list.h>
34 35 36
#include <wendelin/bigfile/types.h>
#include <wendelin/bigfile/pagemap.h>
#include <ccan/bitmap/bitmap.h> // XXX can't forward-decl for bitmap
37

38
typedef struct RAM RAM;
39
typedef struct RAMH RAMH;
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
typedef struct Page Page;
typedef struct BigFile BigFile;


/* BigFile Handle
 *
 * BigFile handle is a representation of file snapshot that could be locally
 * modified in-memory. The changes could be later either discarded or stored
 * back to file. One file can have many opened handles each with its own
 * modifications and optionally ram.
 */
struct BigFileH {
    BigFile *file;

    /* ram handle, backing this fileh mappings */
    RAMH    *ramh;

    /* fileh mappings (list of VMA)
     * NOTE current design assumes there will be not many mappings
     *      so instead of backpointers from pages to vma mapping entries, we'll
     *      scan all page->fileh->mmaps to overlap with page.
     */
    struct list_head mmaps; /* _ -> vma->same_fileh */
63

64 65
    /* {} f_pgoffset -> page */
    PageMap     pagemap;
66

67 68 69 70 71 72 73 74 75 76

    // XXX not sure we need this
    //     -> currently is used to know whether to join ZODB DataManager serving ZBigFile
    // XXX maybe change into dirty_list in the future?
    unsigned    dirty   : 1;
};
typedef struct BigFileH BigFileH;


/* Page - describes fixed-size item of physical RAM associated with content from fileh */
77 78 79 80 81 82 83 84 85 86
enum PageState {
    PAGE_EMPTY      = 0, /* file content has not been loaded yet */
    PAGE_LOADED     = 1, /* file content has     been loaded and was not modified */
    PAGE_DIRTY      = 2, /* file content has     been loaded and was     modified */
};
typedef enum PageState PageState;

struct Page {
    PageState   state;

87 88 89 90
    /* wrt fileh - associated with */
    BigFileH    *fileh;
    pgoff_t     f_pgoffset;

91 92 93 94 95 96 97 98 99 100 101 102
    /* wrt ram - associated with */
    RAMH*       ramh;
    pgoff_t     ramh_pgoffset;

    /* in recently-used pages for ramh->ram (ram->lru_list -> _) */
    struct list_head lru;

    int     refcnt; /* each mapping in a vma counts here */
};
typedef struct Page Page;




/* VMA - virtual memory area representing one fileh mapping
 *
 * NOTE areas may not overlap in virtual address space
 *      (in file space they can overlap).
 */
typedef struct VMA VMA;
struct VMA {
    uintptr_t   addr_start, addr_stop;    /* [addr_start, addr_stop) */

    BigFileH    *fileh;         /* for which fileh */
    pgoff_t     f_pgoffset;     /* where starts, in pages */

    /* FIXME For approximation 0, VMA(s) are kept in sorted doubly-linked
     * list, which is not good for lookup/add/remove performance O(n), but easy to
     * program. This should be ok for first draft, as there are not many fileh
     * views taken simultaneously.
     *
     * TODO for better performance, some binary-search-tree should be used.
     */
    struct list_head virt_list; /* (virtmem.c::vma_list -> _) */

    /* VMA's for the same fileh (fileh->mmaps -> _) */
    struct list_head same_fileh;

    /* whether corresponding to pgoffset-f_offset page is mapped in this VMA */
    bitmap      *page_ismappedv;    /* len ~ Δaddr / pagesize */
};


/*****************************
 *      API for clients      *
 *****************************/

/* open handle for a BigFile
 *
 * @fileh[out]  BigFileH handle to initialize for this open
 * @file
 * @ram         RAM that will back created fileh mappings
 *
 * @return  0 - ok, !0 - fail
 */
int fileh_open(BigFileH *fileh, BigFile *file, RAM *ram);


/* close fileh
 *
 * it's an error to call fileh_close with existing mappings
 */
void fileh_close(BigFileH *fileh);


/* map fileh part into memory
 *
 * This "maps" fileh part [pgoffset, pglen) in pages into process address space.
 *
 * @vma[out]    vma to initialize for this mmap
 * @return      0 - ok, !0 - fail
 */
int fileh_mmap(VMA *vma, BigFileH *fileh, pgoff_t pgoffset, pgoff_t pglen);


/* unmap mapping created by fileh_mmap()
 *
 * This removes mapping created by fileh_mmap() from process address space.
 * Changes made to fileh pages are preserved (to e.g. either other mappings and
 * later commit/discard).
 */
void vma_unmap(VMA *vma);


/* what to do at writeout */
enum WriteoutFlags {
    /* store dirty pages back to file
     *
     * - call file.storeblk() for all dirty pages;
     * - pages state remains PAGE_DIRTY.
     *
     * to "finish" the storage use WRITEOUT_MARKSTORED in the same or separate
     * call.
     */
    WRITEOUT_STORE          = 1 << 0,

    /* mark dirty pages as stored to file ok
     *
     * pages state becomes PAGE_LOADED and all mmaps are updated to map pages as
     * R/O to track further writes.
     */
    WRITEOUT_MARKSTORED     = 1 << 1,
};

/* write changes made to fileh memory back to file
 *
 * Perform write-related actions according to flags (see WriteoutFlags).
 *
 * @return  0 - ok      !0 - fail
 *          NOTE single WRITEOUT_MARKSTORED can not fail.
 *
 * No guarantee is made about atomicity - e.g. if this call fails, some
 * pages could be written and some left in memory in dirty state.
 */
int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags);


/* discard changes made to fileh memory
 *
 * For each fileh dirty page:
 *
 *   - it is unmapped from all mmaps;
 *   - its content is discarded;
 *   - its backing memory is released to OS.
 */
void fileh_dirty_discard(BigFileH *fileh);


/* pagefault handler
 *
 * serves read/write access to protected memory: loads data from file on demand
 * and tracks which pages were made dirty.
 *
 * (clients call this indirectly via triggering SIGSEGV on read/write to memory)
 */
void vma_on_pagefault(VMA *vma, uintptr_t addr, int write);
int pagefault_init(void);   /* in pagefault.c */


/* release some non-dirty ram back to OS; protect PROT_NONE related mappings
 *
 * This should be called when system is low on memory - it will scan through
 * RAM pages and release some LRU non-dirty pages ram memory back to OS.
 *
 * (this is usually done automatically under memory pressure)
 *
 * @return  how many RAM pages were reclaimed
 * XXX int -> size_t ?
 */
int ram_reclaim(RAM *ram);



243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
/************
 * Internal *
 ************/

/* mmap page memory into address space
 *
 * @addr     NULL - mmap somewhere,    !NULL - mmap exactly there (MAP_FIXED)
 * @return  !NULL - mmapped ok there,   NULL - error
 *
 * NOTE to unmap memory either
 *
 *      - use usual munmap(2), or
 *      - mmap(2) something else in place of mmaped page memory.
 */
void *page_mmap(Page *page, void *addr, int prot);

void page_incref(Page *page);
void page_decref(Page *page);


263 264 265 266 267
/* lookup VMA by addr */
VMA *virt_lookup_vma(void *addr);
void virt_register_vma(VMA *vma);
void virt_unregister_vma(VMA *vma);

268 269 270 271
/* allocate virtual memory address space */
void *mem_valloc(void *addr, size_t len);
void *mem_xvalloc(void *addr, size_t len);

272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
/* big virtmem lock */
void virt_lock(void);
void virt_unlock(void);

/* for thirdparty to hook into locking big virtmem lock process
 * (e.g. for python to hook in its GIL release/reacquire)  */
struct VirtGilHooks {
    /* drop gil, if current thread hold it */
    void *  (*gil_ensure_unlocked)      (void);
    /* retake gil, if we were holding it at ->ensure_unlocked() stage */
    void    (*gil_retake_if_waslocked)  (void *);
};
typedef struct VirtGilHooks VirtGilHooks;

void virt_lock_hookgil(const VirtGilHooks *gilhooks);


289 290 291 292 293
// XXX is this needed? think more
/* what happens on out-of-memory */
void OOM(void);


294
#endif