virtmem.h 8.56 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
#ifndef _WENDELIN_BIGFILE_VIRTMEM_H_
#define _WENDELIN_BIGFILE_VIRTMEM_H_

/* Wendelin.bigfile | Virtual memory
 * Copyright (C) 2014-2015  Nexedi SA and Contributors.
 *                          Kirill Smelkov <kirr@nexedi.com>
 *
 * This program is free software: you can Use, Study, Modify and Redistribute
 * it under the terms of the GNU General Public License version 3, or (at your
 * option) any later version, as published by the Free Software Foundation.
 *
 * You can also Link and Combine this program with other software covered by
 * the terms of any of the Open Source Initiative approved licenses and Convey
 * the resulting work. Corresponding source of such a combination shall include
 * the source code for all other software used.
 *
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See COPYING file for full licensing terms.
21 22 23 24 25 26 27 28 29
 *
 * ~~~~~~~~
 *
 * Virtual memory connects BigFile content and RAM pages into file memory
 * mappings.
 *
 * Read access to mapped pages cause their on-demand loading, and write access
 * marks modified pages as dirty. Dirty pages then can be on request either
 * written out back to file or discarded.
30 31 32 33
 */

#include <stdint.h>
#include <wendelin/list.h>
34 35 36
#include <wendelin/bigfile/types.h>
#include <wendelin/bigfile/pagemap.h>
#include <ccan/bitmap/bitmap.h> // XXX can't forward-decl for bitmap
37

38
typedef struct RAM RAM;
39
typedef struct RAMH RAMH;
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
typedef struct Page Page;
typedef struct BigFile BigFile;


/* BigFile Handle
 *
 * BigFile handle is a representation of file snapshot that could be locally
 * modified in-memory. The changes could be later either discarded or stored
 * back to file. One file can have many opened handles each with its own
 * modifications and optionally ram.
 */
struct BigFileH {
    BigFile *file;

    /* ram handle, backing this fileh mappings */
    RAMH    *ramh;

    /* fileh mappings (list of VMA)
     * NOTE current design assumes there will be not many mappings
     *      so instead of backpointers from pages to vma mapping entries, we'll
     *      scan all page->fileh->mmaps to overlap with page.
     */
    struct list_head mmaps; /* _ -> vma->same_fileh */
63

64 65
    /* {} f_pgoffset -> page */
    PageMap     pagemap;
66

67 68 69 70 71 72 73 74 75 76

    // XXX not sure we need this
    //     -> currently is used to know whether to join ZODB DataManager serving ZBigFile
    // XXX maybe change into dirty_list in the future?
    unsigned    dirty   : 1;
};
typedef struct BigFileH BigFileH;


/* Page - describes fixed-size item of physical RAM associated with content from fileh */
77 78 79 80 81 82 83 84 85 86
enum PageState {
    PAGE_EMPTY      = 0, /* file content has not been loaded yet */
    PAGE_LOADED     = 1, /* file content has     been loaded and was not modified */
    PAGE_DIRTY      = 2, /* file content has     been loaded and was     modified */
};
typedef enum PageState PageState;

struct Page {
    PageState   state;

87 88 89 90
    /* wrt fileh - associated with */
    BigFileH    *fileh;
    pgoff_t     f_pgoffset;

91 92 93 94 95 96 97 98 99 100 101 102
    /* wrt ram - associated with */
    RAMH*       ramh;
    pgoff_t     ramh_pgoffset;

    /* in recently-used pages for ramh->ram (ram->lru_list -> _) */
    struct list_head lru;

    int     refcnt; /* each mapping in a vma counts here */
};
typedef struct Page Page;


103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242

/* VMA - virtual memory area representing one fileh mapping
 *
 * NOTE areas may not overlap in virtual address space
 *      (in file space they can overlap).
 */
typedef struct VMA VMA;
struct VMA {
    uintptr_t   addr_start, addr_stop;    /* [addr_start, addr_stop) */

    BigFileH    *fileh;         /* for which fileh */
    pgoff_t     f_pgoffset;     /* where starts, in pages */

    /* FIXME For approximation 0, VMA(s) are kept in sorted doubly-linked
     * list, which is not good for lookup/add/remove performance O(n), but easy to
     * program. This should be ok for first draft, as there are not many fileh
     * views taken simultaneously.
     *
     * TODO for better performance, some binary-search-tree should be used.
     */
    struct list_head virt_list; /* (virtmem.c::vma_list -> _) */

    /* VMA's for the same fileh (fileh->mmaps -> _) */
    struct list_head same_fileh;

    /* whether corresponding to pgoffset-f_offset page is mapped in this VMA */
    bitmap      *page_ismappedv;    /* len ~ Δaddr / pagesize */
};


/*****************************
 *      API for clients      *
 *****************************/

/* open handle for a BigFile
 *
 * @fileh[out]  BigFileH handle to initialize for this open
 * @file
 * @ram         RAM that will back created fileh mappings
 *
 * @return  0 - ok, !0 - fail
 */
int fileh_open(BigFileH *fileh, BigFile *file, RAM *ram);


/* close fileh
 *
 * it's an error to call fileh_close with existing mappings
 */
void fileh_close(BigFileH *fileh);


/* map fileh part into memory
 *
 * This "maps" fileh part [pgoffset, pglen) in pages into process address space.
 *
 * @vma[out]    vma to initialize for this mmap
 * @return      0 - ok, !0 - fail
 */
int fileh_mmap(VMA *vma, BigFileH *fileh, pgoff_t pgoffset, pgoff_t pglen);


/* unmap mapping created by fileh_mmap()
 *
 * This removes mapping created by fileh_mmap() from process address space.
 * Changes made to fileh pages are preserved (to e.g. either other mappings and
 * later commit/discard).
 */
void vma_unmap(VMA *vma);


/* what to do at writeout */
enum WriteoutFlags {
    /* store dirty pages back to file
     *
     * - call file.storeblk() for all dirty pages;
     * - pages state remains PAGE_DIRTY.
     *
     * to "finish" the storage use WRITEOUT_MARKSTORED in the same or separate
     * call.
     */
    WRITEOUT_STORE          = 1 << 0,

    /* mark dirty pages as stored to file ok
     *
     * pages state becomes PAGE_LOADED and all mmaps are updated to map pages as
     * R/O to track further writes.
     */
    WRITEOUT_MARKSTORED     = 1 << 1,
};

/* write changes made to fileh memory back to file
 *
 * Perform write-related actions according to flags (see WriteoutFlags).
 *
 * @return  0 - ok      !0 - fail
 *          NOTE single WRITEOUT_MARKSTORED can not fail.
 *
 * No guarantee is made about atomicity - e.g. if this call fails, some
 * pages could be written and some left in memory in dirty state.
 */
int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags);


/* discard changes made to fileh memory
 *
 * For each fileh dirty page:
 *
 *   - it is unmapped from all mmaps;
 *   - its content is discarded;
 *   - its backing memory is released to OS.
 */
void fileh_dirty_discard(BigFileH *fileh);


/* pagefault handler
 *
 * serves read/write access to protected memory: loads data from file on demand
 * and tracks which pages were made dirty.
 *
 * (clients call this indirectly via triggering SIGSEGV on read/write to memory)
 */
void vma_on_pagefault(VMA *vma, uintptr_t addr, int write);
int pagefault_init(void);   /* in pagefault.c */


/* release some non-dirty ram back to OS; protect PROT_NONE related mappings
 *
 * This should be called when system is low on memory - it will scan through
 * RAM pages and release some LRU non-dirty pages ram memory back to OS.
 *
 * (this is usually done automatically under memory pressure)
 *
 * @return  how many RAM pages were reclaimed
 * XXX int -> size_t ?
 */
int ram_reclaim(RAM *ram);



243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
/************
 * Internal *
 ************/

/* mmap page memory into address space
 *
 * @addr     NULL - mmap somewhere,    !NULL - mmap exactly there (MAP_FIXED)
 * @return  !NULL - mmapped ok there,   NULL - error
 *
 * NOTE to unmap memory either
 *
 *      - use usual munmap(2), or
 *      - mmap(2) something else in place of mmaped page memory.
 */
void *page_mmap(Page *page, void *addr, int prot);

void page_incref(Page *page);
void page_decref(Page *page);


263 264 265 266 267
/* lookup VMA by addr */
VMA *virt_lookup_vma(void *addr);
void virt_register_vma(VMA *vma);
void virt_unregister_vma(VMA *vma);

268 269 270 271
/* allocate virtual memory address space */
void *mem_valloc(void *addr, size_t len);
void *mem_xvalloc(void *addr, size_t len);

272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
/* big virtmem lock */
void virt_lock(void);
void virt_unlock(void);

/* for thirdparty to hook into locking big virtmem lock process
 * (e.g. for python to hook in its GIL release/reacquire)  */
struct VirtGilHooks {
    /* drop gil, if current thread hold it */
    void *  (*gil_ensure_unlocked)      (void);
    /* retake gil, if we were holding it at ->ensure_unlocked() stage */
    void    (*gil_retake_if_waslocked)  (void *);
};
typedef struct VirtGilHooks VirtGilHooks;

void virt_lock_hookgil(const VirtGilHooks *gilhooks);


289 290 291 292 293
// XXX is this needed? think more
/* what happens on out-of-memory */
void OOM(void);


294
#endif