Blame view

bigfile/pagefault.c 6.56 KB
Kirill Smelkov committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/* Wendelin.bigfile | Low-level pagefault handler
 * Copyright (C) 2014-2015  Nexedi SA and Contributors.
 *                          Kirill Smelkov <kirr@nexedi.com>
 *
 * This program is free software: you can Use, Study, Modify and Redistribute
 * it under the terms of the GNU General Public License version 3, or (at your
 * option) any later version, as published by the Free Software Foundation.
 *
 * You can also Link and Combine this program with other software covered by
 * the terms of any of the Open Source Initiative approved licenses and Convey
 * the resulting work. Corresponding source of such a combination shall include
 * the source code for all other software used.
 *
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See COPYING file for full licensing terms.
 *
 * ~~~~~~~~
 *
 * low-level pagefault handler entry from OS
 *
 * its job is to lookup vma which is being accessed and whether it is
 * read/write, and tail to vma_on_pagefault().
 */

Kirill Smelkov committed
27 28 29 30
#include <wendelin/bigfile/virtmem.h>
#include <wendelin/bigfile/file.h>
#include <wendelin/bigfile/ram.h>
#include <wendelin/bigfile/pagemap.h>
Kirill Smelkov committed
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
#include <wendelin/bug.h>

#include <signal.h>
#include <stdlib.h>
#include <errno.h>
#include <stdint.h>


/* "before us" previously installed SIGSEGV sigaction */
static struct sigaction prev_segv_act;
static int    segv_act_installed;

static int faulted_by(const struct ucontext *uc);


/* SIGSEGV handler */
static void on_pagefault(int sig, siginfo_t *si, void *_uc)
{
    struct ucontext *uc = _uc;
    unsigned write;
Kirill Smelkov committed
51
    VMA *vma;
Kirill Smelkov committed
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67

    BUG_ON(sig != SIGSEGV);
    BUG_ON(si->si_signo != SIGSEGV);

    /* determine what client wants - read or write */
    write = faulted_by(uc);

    /* we'll try to only handle "invalid permissions" faults (= read of page
     * with PROT_NONE | write to page with PROT_READ only).
     *
     * "address not mapped" (SEGV_MAPERR) and possibly anything else (e.g.
     * SI_USER for signals sent by kill - not by kernel) could not result from
     * valid access to prepared file address space, so we don't handle those. */
    if (si->si_code != SEGV_ACCERR)
        goto dont_handle;

Kirill Smelkov committed
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
    /* save errno, before doing any library calls   XXX & the like ?
     * (in case we'll handle the fault, and then will need to restore it) */
    int save_errno = errno;

    /* lock virtmem, so we can do further lookups / handling safely to
     * concurrent access / changes.
     *
     * NOTE it is ok to call e.g. pthread_mutex_lock() from synchronous signal
     *      handler.    */
    virt_lock();

    /* make sure we are not entering SIGSEGV handler recursively.
     *
     * we should not - double faulting from inside sighandler should just
     * coredump (see comments wrt SA_NODEFER in pagefault_init()), but anyway -
     * better check just in case.
     *
     * NOTE since we are under virtmem lock, here we can use just one static
     * variable, instead of several per-thread ones.    */
    static int in_on_pagefault;
    BUG_ON(in_on_pagefault);
    ++in_on_pagefault;

Kirill Smelkov committed
91 92

    /* (1) addr -> vma  ;lookup VMA covering faulting memory address */
Kirill Smelkov committed
93
    vma = virt_lookup_vma(si->si_addr);
Kirill Smelkov committed
94 95 96
    if (!vma) {
        --in_on_pagefault;
        virt_unlock();
Kirill Smelkov committed
97
        goto dont_handle;  /* fault outside registered file slices */
Kirill Smelkov committed
98
    }
Kirill Smelkov committed
99 100 101

    /* now, since we found faulting address in registered memory areas, we know
     * we should serve this pagefault. */
Kirill Smelkov committed
102
    vma_on_pagefault(vma, (uintptr_t)si->si_addr, write);
Kirill Smelkov committed
103

Kirill Smelkov committed
104 105 106
    /* pagefault served - restore and return from sighandler */
    --in_on_pagefault;
    virt_unlock();
Kirill Smelkov committed
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
    errno = save_errno;

    return;


dont_handle:
    /* pagefault not resulted from correct access to file memory.
     * Crash if no previous SIGSEGV handler was set, or tail to that.   */
    if (prev_segv_act.sa_flags & SA_SIGINFO)
        prev_segv_act.sa_sigaction(sig, si, _uc);
    else
    if (prev_segv_act.sa_handler != SIG_DFL &&
        prev_segv_act.sa_handler != SIG_IGN /* yes, SIGSEGV can't be ignored */)

        prev_segv_act.sa_handler(sig);

    else {
        /* no previous SIGSEGV handler was set - re-trigger to die
         *
         * NOTE here SIGSEGV was set blocked in thread sigmask by kernel
         * when invoking signal handler (we explicitly did not specify
         * SA_NODEFER when setting it up).
         *
         * Re-access original memory location, and it will fault with
         * coredump directly without calling signal handler again.  */
        // XXX how to know access size? we just proceed here with 1byte ...
        // FIXME don't touch memory on SI_USER - just BUG.
        volatile uint8_t *p = (uint8_t *)si->si_addr;
        if (write)
            *p = *p;
        else
            *p;

        /* could get here because ex. other thread remapped something in place
         * of old page. Die unconditionally */
        BUG();
    }
}


/* ensures pagefault handler for SIGSEGV is installed */
int pagefault_init(void)
{
    struct sigaction act;
    int err;

    /* protect from double sigaction installing. It is ok to be called twice. */
    if (segv_act_installed)
        goto done;

    act.sa_sigaction = on_pagefault;
    // |SA_RESTART(but does not work for read/write vs SIGSEGV?)
    /* NOTE no SA_ONSTACK - continue executing on the same stack
     * TODO stack overflow protection
     */
    /* NOTE we do not set SA_NODEFER. This means upon entry to signal handler,
     * SIGSEGV will be automatically blocked by kernel for faulting thread.
     *
     * This in particular means we'll get automatic protection from double
     * faults - in case handler or any other code it calls accesses memory
     * without appropriate protection prior set, the kernel will coredump.
     */
    act.sa_flags = SA_SIGINFO;

    /* do not want to block any other signals */
    err = sigemptyset(&act.sa_mask);
    if (err)
        return err;

    err = sigaction(SIGSEGV, &act, &prev_segv_act);
    if (err)
        return err;

    segv_act_installed = 1;
done:
    return 0;
}



/* determine what client faulted by - read or write
 *
 * @return  0 - read        !0 - write
 */
static int faulted_by(const struct ucontext *uc)
{
    int write;

#if defined(__x86_64__) || defined(__i386__)
    /*
     * http://stackoverflow.com/questions/17671869/how-to-identify-read-or-write-operations-of-page-fault-when-using-sigaction-hand
     * http://wiki.osdev.org/Exceptions#Page_Fault
     */
    write = uc->uc_mcontext.gregs[REG_ERR] & 0x2;
#else
# error TODO: implement read/write detection for pagefaults for your arch
#endif
    return write;
}