Blame view

bigfile/pagefault.c 7.31 KB
Kirill Smelkov committed
1 2 3 4 5 6 7 8 9
/* Wendelin.bigfile | Low-level pagefault handler
 * Copyright (C) 2014-2015  Nexedi SA and Contributors.
 *                          Kirill Smelkov <kirr@nexedi.com>
 *
 * This program is free software: you can Use, Study, Modify and Redistribute
 * it under the terms of the GNU General Public License version 3, or (at your
 * option) any later version, as published by the Free Software Foundation.
 *
 * You can also Link and Combine this program with other software covered by
Kirill Smelkov committed
10 11 12 13
 * the terms of any of the Free Software licenses or any of the Open Source
 * Initiative approved licenses and Convey the resulting work. Corresponding
 * source of such a combination shall include the source code for all other
 * software used.
Kirill Smelkov committed
14 15 16 17 18
 *
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See COPYING file for full licensing terms.
Kirill Smelkov committed
19
 * See https://www.nexedi.com/licensing for rationale and options.
Kirill Smelkov committed
20 21 22 23 24 25 26 27 28
 *
 * ~~~~~~~~
 *
 * low-level pagefault handler entry from OS
 *
 * its job is to lookup vma which is being accessed and whether it is
 * read/write, and tail to vma_on_pagefault().
 */

Kirill Smelkov committed
29 30 31 32
#include <wendelin/bigfile/virtmem.h>
#include <wendelin/bigfile/file.h>
#include <wendelin/bigfile/ram.h>
#include <wendelin/bigfile/pagemap.h>
Kirill Smelkov committed
33 34 35
#include <wendelin/bug.h>

#include <signal.h>
Kirill Smelkov committed
36
#include <ucontext.h>
Kirill Smelkov committed
37 38 39 40 41 42 43 44 45
#include <stdlib.h>
#include <errno.h>
#include <stdint.h>


/* "before us" previously installed SIGSEGV sigaction */
static struct sigaction prev_segv_act;
static int    segv_act_installed;

Kirill Smelkov committed
46
static int faulted_by(const ucontext_t *uc);
Kirill Smelkov committed
47 48 49 50 51


/* SIGSEGV handler */
static void on_pagefault(int sig, siginfo_t *si, void *_uc)
{
Kirill Smelkov committed
52
    ucontext_t *uc = _uc;
Kirill Smelkov committed
53
    unsigned write;
Kirill Smelkov committed
54
    VMA *vma;
Kirill Smelkov committed
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70

    BUG_ON(sig != SIGSEGV);
    BUG_ON(si->si_signo != SIGSEGV);

    /* determine what client wants - read or write */
    write = faulted_by(uc);

    /* we'll try to only handle "invalid permissions" faults (= read of page
     * with PROT_NONE | write to page with PROT_READ only).
     *
     * "address not mapped" (SEGV_MAPERR) and possibly anything else (e.g.
     * SI_USER for signals sent by kill - not by kernel) could not result from
     * valid access to prepared file address space, so we don't handle those. */
    if (si->si_code != SEGV_ACCERR)
        goto dont_handle;

Kirill Smelkov committed
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
    /* save errno, before doing any library calls   XXX & the like ?
     * (in case we'll handle the fault, and then will need to restore it) */
    int save_errno = errno;

    /* lock virtmem, so we can do further lookups / handling safely to
     * concurrent access / changes.
     *
     * NOTE it is ok to call e.g. pthread_mutex_lock() from synchronous signal
     *      handler.    */
    virt_lock();

    /* make sure we are not entering SIGSEGV handler recursively.
     *
     * we should not - double faulting from inside sighandler should just
     * coredump (see comments wrt SA_NODEFER in pagefault_init()), but anyway -
     * better check just in case.
     *
Kirill Smelkov committed
88 89 90 91 92 93
     * NOTE it is ok to use __thread in synchronous sighandler - even if TLS
     * block is allocated dynamically at runtime, we can overlap with such
     * allocation only if SIGSEGV happens in that original TLS allocation,
     * which should not happen, and thus it is already a bug somewhere in
     * thread datatructures. */
    static __thread int in_on_pagefault;
Kirill Smelkov committed
94 95 96
    BUG_ON(in_on_pagefault);
    ++in_on_pagefault;

Kirill Smelkov committed
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
    /* vma_on_pagefault() can tell us to retry handling the fault, e.g. after a
     * page has been loaded. Loop until pagefault is handled */
    while (1) {
        VMFaultResult vmres;

        /* (1) addr -> vma  ;lookup VMA covering faulting memory address */
        vma = virt_lookup_vma(si->si_addr);
        if (!vma) {
            --in_on_pagefault;
            virt_unlock();
            goto dont_handle;  /* fault outside registered file slices */
        }

        /* now, since we found faulting address in registered memory areas, we know
         * we should serve this pagefault. */
        vmres = vma_on_pagefault(vma, (uintptr_t)si->si_addr, write);

        /* see if pagefault handled or should be retried */
        if (vmres == VM_HANDLED)
            break;
        if (vmres == VM_RETRY)
            continue;
        BUG();  /* unreachable */
Kirill Smelkov committed
120
    }
Kirill Smelkov committed
121

Kirill Smelkov committed
122 123 124
    /* pagefault served - restore and return from sighandler */
    --in_on_pagefault;
    virt_unlock();
Kirill Smelkov committed
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
    errno = save_errno;

    return;


dont_handle:
    /* pagefault not resulted from correct access to file memory.
     * Crash if no previous SIGSEGV handler was set, or tail to that.   */
    if (prev_segv_act.sa_flags & SA_SIGINFO)
        prev_segv_act.sa_sigaction(sig, si, _uc);
    else
    if (prev_segv_act.sa_handler != SIG_DFL &&
        prev_segv_act.sa_handler != SIG_IGN /* yes, SIGSEGV can't be ignored */)

        prev_segv_act.sa_handler(sig);

    else {
        /* no previous SIGSEGV handler was set - re-trigger to die
         *
         * NOTE here SIGSEGV was set blocked in thread sigmask by kernel
         * when invoking signal handler (we explicitly did not specify
         * SA_NODEFER when setting it up).
         *
         * Re-access original memory location, and it will fault with
         * coredump directly without calling signal handler again.  */
        // XXX how to know access size? we just proceed here with 1byte ...
        // FIXME don't touch memory on SI_USER - just BUG.
        volatile uint8_t *p = (uint8_t *)si->si_addr;
        if (write)
            *p = *p;
        else
            *p;

        /* could get here because ex. other thread remapped something in place
         * of old page. Die unconditionally */
        BUG();
    }
}


/* ensures pagefault handler for SIGSEGV is installed */
int pagefault_init(void)
{
    struct sigaction act;
    int err;

    /* protect from double sigaction installing. It is ok to be called twice. */
    if (segv_act_installed)
        goto done;

    act.sa_sigaction = on_pagefault;
    // |SA_RESTART(but does not work for read/write vs SIGSEGV?)
    /* NOTE no SA_ONSTACK - continue executing on the same stack
     * TODO stack overflow protection
     */
    /* NOTE we do not set SA_NODEFER. This means upon entry to signal handler,
     * SIGSEGV will be automatically blocked by kernel for faulting thread.
     *
     * This in particular means we'll get automatic protection from double
     * faults - in case handler or any other code it calls accesses memory
     * without appropriate protection prior set, the kernel will coredump.
     */
    act.sa_flags = SA_SIGINFO;

    /* do not want to block any other signals */
    err = sigemptyset(&act.sa_mask);
    if (err)
        return err;

    err = sigaction(SIGSEGV, &act, &prev_segv_act);
    if (err)
        return err;

    segv_act_installed = 1;
done:
    return 0;
}



/* determine what client faulted by - read or write
 *
 * @return  0 - read        !0 - write
 */
Kirill Smelkov committed
209
static int faulted_by(const ucontext_t *uc)
Kirill Smelkov committed
210 211 212 213 214 215 216 217 218 219 220 221 222 223
{
    int write;

#if defined(__x86_64__) || defined(__i386__)
    /*
     * http://stackoverflow.com/questions/17671869/how-to-identify-read-or-write-operations-of-page-fault-when-using-sigaction-hand
     * http://wiki.osdev.org/Exceptions#Page_Fault
     */
    write = uc->uc_mcontext.gregs[REG_ERR] & 0x2;
#else
# error TODO: implement read/write detection for pagefaults for your arch
#endif
    return write;
}