_pickle.c 181 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
#include "Python.h"
#include "structmember.h"

PyDoc_STRVAR(pickle_module_doc,
"Optimized C implementation for the Python pickle module.");

/* Bump this when new opcodes are added to the pickle protocol. */
enum {
    HIGHEST_PROTOCOL = 3,
    DEFAULT_PROTOCOL = 3
};

/* Pickle opcodes. These must be kept updated with pickle.py.
   Extensive docs are in pickletools.py. */
enum opcode {
    MARK            = '(',
    STOP            = '.',
    POP             = '0',
    POP_MARK        = '1',
    DUP             = '2',
    FLOAT           = 'F',
    INT             = 'I',
    BININT          = 'J',
    BININT1         = 'K',
    LONG            = 'L',
    BININT2         = 'M',
    NONE            = 'N',
    PERSID          = 'P',
    BINPERSID       = 'Q',
    REDUCE          = 'R',
    STRING          = 'S',
    BINSTRING       = 'T',
    SHORT_BINSTRING = 'U',
    UNICODE         = 'V',
    BINUNICODE      = 'X',
    APPEND          = 'a',
    BUILD           = 'b',
    GLOBAL          = 'c',
    DICT            = 'd',
    EMPTY_DICT      = '}',
    APPENDS         = 'e',
    GET             = 'g',
    BINGET          = 'h',
    INST            = 'i',
    LONG_BINGET     = 'j',
    LIST            = 'l',
    EMPTY_LIST      = ']',
    OBJ             = 'o',
    PUT             = 'p',
    BINPUT          = 'q',
    LONG_BINPUT     = 'r',
    SETITEM         = 's',
    TUPLE           = 't',
    EMPTY_TUPLE     = ')',
    SETITEMS        = 'u',
    BINFLOAT        = 'G',

    /* Protocol 2. */
    PROTO       = '\x80',
    NEWOBJ      = '\x81',
    EXT1        = '\x82',
    EXT2        = '\x83',
    EXT4        = '\x84',
    TUPLE1      = '\x85',
    TUPLE2      = '\x86',
    TUPLE3      = '\x87',
    NEWTRUE     = '\x88',
    NEWFALSE    = '\x89',
    LONG1       = '\x8a',
    LONG4       = '\x8b',

    /* Protocol 3 (Python 3.x) */
    BINBYTES       = 'B',
74
    SHORT_BINBYTES = 'C'
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
};

/* These aren't opcodes -- they're ways to pickle bools before protocol 2
 * so that unpicklers written before bools were introduced unpickle them
 * as ints, but unpicklers after can recognize that bools were intended.
 * Note that protocol 2 added direct ways to pickle bools.
 */
#undef TRUE
#define TRUE  "I01\n"
#undef FALSE
#define FALSE "I00\n"

enum {
   /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
      batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
      break if this gets out of synch with pickle.py, but it's unclear that would
      help anything either. */
    BATCHSIZE = 1000,

    /* Nesting limit until Pickler, when running in "fast mode", starts
       checking for self-referential data-structures. */
    FAST_NESTING_LIMIT = 50,

98 99 100 101 102 103
    /* Initial size of the write buffer of Pickler. */
    WRITE_BUF_SIZE = 4096,

    /* Maximum size of the write buffer of Pickler when pickling to a
       stream.  This is ignored for in-memory pickling. */
    MAX_WRITE_BUF_SIZE = 64 * 1024,
104 105

    /* Prefetch size when unpickling (disabled on unpeekable streams) */
106
    PREFETCH = 8192 * 16
107 108 109 110
};

/* Exception classes for pickle. These should override the ones defined in
   pickle.py, when the C-optimized Pickler and Unpickler are used. */
111 112 113
static PyObject *PickleError = NULL;
static PyObject *PicklingError = NULL;
static PyObject *UnpicklingError = NULL;
114 115

/* copyreg.dispatch_table, {type_object: pickling_function} */
116
static PyObject *dispatch_table = NULL;
117 118
/* For EXT[124] opcodes. */
/* copyreg._extension_registry, {(module_name, function_name): code} */
119
static PyObject *extension_registry = NULL;
120
/* copyreg._inverted_registry, {code: (module_name, function_name)} */
121
static PyObject *inverted_registry = NULL;
122
/* copyreg._extension_cache, {code: object} */
123 124 125 126 127 128 129 130 131
static PyObject *extension_cache = NULL;

/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
static PyObject *name_mapping_2to3 = NULL;
/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
static PyObject *import_mapping_2to3 = NULL;
/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
static PyObject *name_mapping_3to2 = NULL;
static PyObject *import_mapping_3to2 = NULL;
132 133 134

/* XXX: Are these really nescessary? */
/* As the name says, an empty tuple. */
135
static PyObject *empty_tuple = NULL;
136
/* For looking up name pairs in copyreg._extension_registry. */
137
static PyObject *two_tuple = NULL;
138 139 140 141 142 143 144 145 146 147

static int
stack_underflow(void)
{
    PyErr_SetString(UnpicklingError, "unpickling stack underflow");
    return -1;
}

/* Internal data type used as the unpickling stack. */
typedef struct {
148
    PyObject_VAR_HEAD
149
    PyObject **data;
150
    Py_ssize_t allocated;  /* number of slots in data allocated */
151 152 153 154 155
} Pdata;

static void
Pdata_dealloc(Pdata *self)
{
156 157 158
    int i = Py_SIZE(self);
    while (--i >= 0) {
        Py_DECREF(self->data[i]);
159
    }
160
    PyMem_FREE(self->data);
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
    PyObject_Del(self);
}

static PyTypeObject Pdata_Type = {
    PyVarObject_HEAD_INIT(NULL, 0)
    "_pickle.Pdata",              /*tp_name*/
    sizeof(Pdata),                /*tp_basicsize*/
    0,                            /*tp_itemsize*/
    (destructor)Pdata_dealloc,    /*tp_dealloc*/
};

static PyObject *
Pdata_New(void)
{
    Pdata *self;

    if (!(self = PyObject_New(Pdata, &Pdata_Type)))
        return NULL;
179 180 181
    Py_SIZE(self) = 0;
    self->allocated = 8;
    self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
182 183 184 185 186 187 188 189 190 191 192 193 194
    if (self->data)
        return (PyObject *)self;
    Py_DECREF(self);
    return PyErr_NoMemory();
}


/* Retain only the initial clearto items.  If clearto >= the current
 * number of items, this is a (non-erroneous) NOP.
 */
static int
Pdata_clear(Pdata *self, int clearto)
{
195
    int i = Py_SIZE(self);
196 197 198

    if (clearto < 0)
        return stack_underflow();
199
    if (clearto >= i)
200 201
        return 0;

202 203
    while (--i >= clearto) {
        Py_CLEAR(self->data[i]);
204
    }
205
    Py_SIZE(self) = clearto;
206 207 208 209 210 211
    return 0;
}

static int
Pdata_grow(Pdata *self)
{
212 213 214
    PyObject **data = self->data;
    Py_ssize_t allocated = self->allocated;
    Py_ssize_t new_allocated;
215

216 217 218
    new_allocated = (allocated >> 3) + 6;
    /* check for integer overflow */
    if (new_allocated > PY_SSIZE_T_MAX - allocated)
219
        goto nomemory;
220 221
    new_allocated += allocated;
    if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
222
        goto nomemory;
223 224
    data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
    if (data == NULL)
225
        goto nomemory;
226 227 228

    self->data = data;
    self->allocated = new_allocated;
229 230 231 232 233 234 235 236 237 238 239 240 241 242
    return 0;

  nomemory:
    PyErr_NoMemory();
    return -1;
}

/* D is a Pdata*.  Pop the topmost element and store it into V, which
 * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
 * is raised and V is set to NULL.
 */
static PyObject *
Pdata_pop(Pdata *self)
{
243
    if (Py_SIZE(self) == 0) {
244 245 246
        PyErr_SetString(UnpicklingError, "bad pickle data");
        return NULL;
    }
247
    return self->data[--Py_SIZE(self)];
248 249 250 251 252 253
}
#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)

static int
Pdata_push(Pdata *self, PyObject *obj)
{
254
    if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
255 256
        return -1;
    }
257
    self->data[Py_SIZE(self)++] = obj;
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
    return 0;
}

/* Push an object on stack, transferring its ownership to the stack. */
#define PDATA_PUSH(D, O, ER) do {                               \
        if (Pdata_push((D), (O)) < 0) return (ER); } while(0)

/* Push an object on stack, adding a new reference to the object. */
#define PDATA_APPEND(D, O, ER) do {                             \
        Py_INCREF((O));                                         \
        if (Pdata_push((D), (O)) < 0) return (ER); } while(0)

static PyObject *
Pdata_poptuple(Pdata *self, Py_ssize_t start)
{
    PyObject *tuple;
    Py_ssize_t len, i, j;

276
    len = Py_SIZE(self) - start;
277 278 279 280 281 282
    tuple = PyTuple_New(len);
    if (tuple == NULL)
        return NULL;
    for (i = start, j = 0; j < len; i++, j++)
        PyTuple_SET_ITEM(tuple, j, self->data[i]);

283
    Py_SIZE(self) = start;
284 285 286 287 288 289 290 291 292
    return tuple;
}

static PyObject *
Pdata_poplist(Pdata *self, Py_ssize_t start)
{
    PyObject *list;
    Py_ssize_t len, i, j;

293
    len = Py_SIZE(self) - start;
294 295 296 297 298 299
    list = PyList_New(len);
    if (list == NULL)
        return NULL;
    for (i = start, j = 0; j < len; i++, j++)
        PyList_SET_ITEM(list, j, self->data[i]);

300
    Py_SIZE(self) = start;
301 302 303
    return list;
}

304 305 306 307 308 309 310 311 312 313 314 315
typedef struct {
    PyObject *me_key;
    long me_value;
} PyMemoEntry;

typedef struct {
    Py_ssize_t mt_mask;
    Py_ssize_t mt_used;
    Py_ssize_t mt_allocated;
    PyMemoEntry *mt_table;
} PyMemoTable;

316 317
typedef struct PicklerObject {
    PyObject_HEAD
318
    PyMemoTable *memo;          /* Memo table, keep track of the seen
319
                                   objects to support self-referential objects
320
                                   pickling. */
321 322
    PyObject *pers_func;        /* persistent_id() method, can be NULL */
    PyObject *arg;
323 324 325 326 327 328

    PyObject *write;            /* write() method of the output stream. */
    PyObject *output_buffer;    /* Write into a local bytearray buffer before
                                   flushing to the stream. */
    Py_ssize_t output_len;      /* Length of output_buffer. */
    Py_ssize_t max_output_len;  /* Allocation size of output_buffer. */
329 330 331 332 333 334 335 336 337 338
    int proto;                  /* Pickle protocol number, >= 0 */
    int bin;                    /* Boolean, true if proto > 0 */
    int buf_size;               /* Size of the current buffered pickle data */
    int fast;                   /* Enable fast mode if set to a true value.
                                   The fast mode disable the usage of memo,
                                   therefore speeding the pickling process by
                                   not generating superfluous PUT opcodes. It
                                   should not be used if with self-referential
                                   objects. */
    int fast_nesting;
339 340
    int fix_imports;            /* Indicate whether Pickler should fix
                                   the name of globals for Python 2.x. */
341 342 343 344 345 346
    PyObject *fast_memo;
} PicklerObject;

typedef struct UnpicklerObject {
    PyObject_HEAD
    Pdata *stack;               /* Pickle data stack, store unpickled objects. */
347 348 349 350 351 352

    /* The unpickler memo is just an array of PyObject *s. Using a dict
       is unnecessary, since the keys are contiguous ints. */
    PyObject **memo;
    Py_ssize_t memo_size;

353 354
    PyObject *arg;
    PyObject *pers_func;        /* persistent_load() method, can be NULL. */
355 356 357 358 359 360

    Py_buffer buffer;
    char *input_buffer;
    char *input_line;
    Py_ssize_t input_len;
    Py_ssize_t next_read_idx;
361
    Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
362 363
    PyObject *read;             /* read() method of the input stream. */
    PyObject *readline;         /* readline() method of the input stream. */
364
    PyObject *peek;             /* peek() method of the input stream, or NULL */
365

366 367 368 369 370 371 372 373 374 375
    char *encoding;             /* Name of the encoding to be used for
                                   decoding strings pickled using Python
                                   2.x. The default value is "ASCII" */
    char *errors;               /* Name of errors handling scheme to used when
                                   decoding strings. The default value is
                                   "strict". */
    int *marks;                 /* Mark stack, used for unpickling container
                                   objects. */
    Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
    Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
376 377 378
    int proto;                  /* Protocol of the pickle loaded. */
    int fix_imports;            /* Indicate whether Unpickler should fix
                                   the name of globals pickled by Python 2.x. */
379 380 381 382 383 384 385 386 387
} UnpicklerObject;

/* Forward declarations */
static int save(PicklerObject *, PyObject *, int);
static int save_reduce(PicklerObject *, PyObject *, PyObject *);
static PyTypeObject Pickler_Type;
static PyTypeObject Unpickler_Type;


388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
/*************************************************************************
 A custom hashtable mapping void* to longs. This is used by the pickler for
 memoization. Using a custom hashtable rather than PyDict allows us to skip
 a bunch of unnecessary object creation. This makes a huge performance
 difference. */

#define MT_MINSIZE 8
#define PERTURB_SHIFT 5


static PyMemoTable *
PyMemoTable_New(void)
{
    PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
    if (memo == NULL) {
        PyErr_NoMemory();
        return NULL;
    }

    memo->mt_used = 0;
    memo->mt_allocated = MT_MINSIZE;
    memo->mt_mask = MT_MINSIZE - 1;
    memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
    if (memo->mt_table == NULL) {
        PyMem_FREE(memo);
        PyErr_NoMemory();
        return NULL;
    }
    memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));

    return memo;
}

static PyMemoTable *
PyMemoTable_Copy(PyMemoTable *self)
{
    Py_ssize_t i;
    PyMemoTable *new = PyMemoTable_New();
    if (new == NULL)
        return NULL;

    new->mt_used = self->mt_used;
    new->mt_allocated = self->mt_allocated;
    new->mt_mask = self->mt_mask;
    /* The table we get from _New() is probably smaller than we wanted.
       Free it and allocate one that's the right size. */
    PyMem_FREE(new->mt_table);
    new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
    if (new->mt_table == NULL) {
        PyMem_FREE(new);
        return NULL;
    }
    for (i = 0; i < self->mt_allocated; i++) {
        Py_XINCREF(self->mt_table[i].me_key);
    }
    memcpy(new->mt_table, self->mt_table,
           sizeof(PyMemoEntry) * self->mt_allocated);

    return new;
}

static Py_ssize_t
PyMemoTable_Size(PyMemoTable *self)
{
    return self->mt_used;
}

static int
PyMemoTable_Clear(PyMemoTable *self)
{
    Py_ssize_t i = self->mt_allocated;

    while (--i >= 0) {
        Py_XDECREF(self->mt_table[i].me_key);
    }
    self->mt_used = 0;
    memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
    return 0;
}

static void
PyMemoTable_Del(PyMemoTable *self)
{
    if (self == NULL)
        return;
    PyMemoTable_Clear(self);

    PyMem_FREE(self->mt_table);
    PyMem_FREE(self);
}

/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
   can be considerably simpler than dictobject.c's lookdict(). */
static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
{
    size_t i;
    size_t perturb;
    size_t mask = (size_t)self->mt_mask;
    PyMemoEntry *table = self->mt_table;
    PyMemoEntry *entry;
489
    Py_hash_t hash = (Py_hash_t)key >> 3;
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606

    i = hash & mask;
    entry = &table[i];
    if (entry->me_key == NULL || entry->me_key == key)
        return entry;

    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
        i = (i << 2) + i + perturb + 1;
        entry = &table[i & mask];
        if (entry->me_key == NULL || entry->me_key == key)
            return entry;
    }
    assert(0);  /* Never reached */
    return NULL;
}

/* Returns -1 on failure, 0 on success. */
static int
_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
{
    PyMemoEntry *oldtable = NULL;
    PyMemoEntry *oldentry, *newentry;
    Py_ssize_t new_size = MT_MINSIZE;
    Py_ssize_t to_process;

    assert(min_size > 0);

    /* Find the smallest valid table size >= min_size. */
    while (new_size < min_size && new_size > 0)
        new_size <<= 1;
    if (new_size <= 0) {
        PyErr_NoMemory();
        return -1;
    }
    /* new_size needs to be a power of two. */
    assert((new_size & (new_size - 1)) == 0);

    /* Allocate new table. */
    oldtable = self->mt_table;
    self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
    if (self->mt_table == NULL) {
        PyMem_FREE(oldtable);
        PyErr_NoMemory();
        return -1;
    }
    self->mt_allocated = new_size;
    self->mt_mask = new_size - 1;
    memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);

    /* Copy entries from the old table. */
    to_process = self->mt_used;
    for (oldentry = oldtable; to_process > 0; oldentry++) {
        if (oldentry->me_key != NULL) {
            to_process--;
            /* newentry is a pointer to a chunk of the new
               mt_table, so we're setting the key:value pair
               in-place. */
            newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
            newentry->me_key = oldentry->me_key;
            newentry->me_value = oldentry->me_value;
        }
    }

    /* Deallocate the old table. */
    PyMem_FREE(oldtable);
    return 0;
}

/* Returns NULL on failure, a pointer to the value otherwise. */
static long *
PyMemoTable_Get(PyMemoTable *self, PyObject *key)
{
    PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
    if (entry->me_key == NULL)
        return NULL;
    return &entry->me_value;
}

/* Returns -1 on failure, 0 on success. */
static int
PyMemoTable_Set(PyMemoTable *self, PyObject *key, long value)
{
    PyMemoEntry *entry;

    assert(key != NULL);

    entry = _PyMemoTable_Lookup(self, key);
    if (entry->me_key != NULL) {
        entry->me_value = value;
        return 0;
    }
    Py_INCREF(key);
    entry->me_key = key;
    entry->me_value = value;
    self->mt_used++;

    /* If we added a key, we can safely resize. Otherwise just return!
     * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
     *
     * Quadrupling the size improves average table sparseness
     * (reducing collisions) at the cost of some memory. It also halves
     * the number of expensive resize operations in a growing memo table.
     *
     * Very large memo tables (over 50K items) use doubling instead.
     * This may help applications with severe memory constraints.
     */
    if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
        return 0;
    return _PyMemoTable_ResizeTable(self,
        (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
}

#undef MT_MINSIZE
#undef PERTURB_SHIFT

/*************************************************************************/

607
/* Helpers for creating the argument tuple passed to functions. This has the
608
   performance advantage of calling PyTuple_New() only once. 
609

610 611
   XXX(avassalotti): Inline directly in _Pickler_FastCall() and
   _Unpickler_FastCall(). */
612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647
#define ARG_TUP(self, obj) do {                             \
        if ((self)->arg || ((self)->arg=PyTuple_New(1))) {  \
            Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0));   \
            PyTuple_SET_ITEM((self)->arg, 0, (obj));        \
        }                                                   \
        else {                                              \
            Py_DECREF((obj));                               \
        }                                                   \
    } while (0)

#define FREE_ARG_TUP(self) do {                 \
        if ((self)->arg->ob_refcnt > 1)         \
            Py_CLEAR((self)->arg);              \
    } while (0)

/* A temporary cleaner API for fast single argument function call.

   XXX: Does caching the argument tuple provides any real performance benefits?

   A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
   glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
   when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
   immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
   (i.e, call PyTuple_New() and store the returned value in an array), to save
   one second (wall clock time). Either ways, the loading time a pickle stream
   large enough to generate this number of calls would be massively
   overwhelmed by other factors, like I/O throughput, the GC traversal and
   object allocation overhead. So, I really doubt these functions provide any
   real benefits.

   On the other hand, oprofile reports that pickle spends a lot of time in
   these functions. But, that is probably more related to the function call
   overhead, than the argument tuple allocation.

   XXX: And, what is the reference behavior of these? Steal, borrow? At first
   glance, it seems to steal the reference of 'arg' and borrow the reference
648
   of 'func'. */
649
static PyObject *
650
_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
651 652 653 654 655 656 657 658 659 660 661
{
    PyObject *result = NULL;

    ARG_TUP(self, arg);
    if (self->arg) {
        result = PyObject_Call(func, self->arg, NULL);
        FREE_ARG_TUP(self);
    }
    return result;
}

662 663 664 665 666 667 668 669 670 671 672 673
static int
_Pickler_ClearBuffer(PicklerObject *self)
{
    Py_CLEAR(self->output_buffer);
    self->output_buffer =
        PyBytes_FromStringAndSize(NULL, self->max_output_len);
    if (self->output_buffer == NULL)
        return -1;
    self->output_len = 0;
    return 0;
}

674
static PyObject *
675
_Pickler_GetString(PicklerObject *self)
676
{
677
    PyObject *output_buffer = self->output_buffer;
678

679 680 681 682 683 684
    assert(self->output_buffer != NULL);
    self->output_buffer = NULL;
    /* Resize down to exact size */
    if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
        return NULL;
    return output_buffer;
685 686
}

687 688
static int
_Pickler_FlushToFile(PicklerObject *self)
689
{
690 691 692
    PyObject *output, *result;

    assert(self->write != NULL);
693

694 695
    output = _Pickler_GetString(self);
    if (output == NULL)
696 697
        return -1;

698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
    result = _Pickler_FastCall(self, self->write, output);
    Py_XDECREF(result);
    return (result == NULL) ? -1 : 0;
}

static int
_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
{
    Py_ssize_t i, required;
    char *buffer;

    assert(s != NULL);

    required = self->output_len + n;
    if (required > self->max_output_len) {
        if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
            /* XXX This reallocates a new buffer every time, which is a bit
               wasteful. */
            if (_Pickler_FlushToFile(self) < 0)
                return -1;
            if (_Pickler_ClearBuffer(self) < 0)
719 720
                return -1;
        }
721 722 723 724 725 726 727
        if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
            /* we already flushed above, so the buffer is empty */
            PyObject *result;
            /* XXX we could spare an intermediate copy and pass
               a memoryview instead */
            PyObject *output = PyBytes_FromStringAndSize(s, n);
            if (s == NULL)
728
                return -1;
729 730 731
            result = _Pickler_FastCall(self, self->write, output);
            Py_XDECREF(result);
            return (result == NULL) ? -1 : 0;
732 733
        }
        else {
734 735 736 737 738 739 740
            if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
                PyErr_NoMemory();
                return -1;
            }
            self->max_output_len = (self->output_len + n) * 2;
            if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
                return -1;
741 742
        }
    }
743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797
    buffer = PyBytes_AS_STRING(self->output_buffer);
    if (n < 8) {
        /* This is faster than memcpy when the string is short. */
        for (i = 0; i < n; i++) {
            buffer[self->output_len + i] = s[i];
        }
    }
    else {
        memcpy(buffer + self->output_len, s, n);
    }
    self->output_len += n;
    return n;
}

static PicklerObject *
_Pickler_New(void)
{
    PicklerObject *self;

    self = PyObject_GC_New(PicklerObject, &Pickler_Type);
    if (self == NULL)
        return NULL;

    self->pers_func = NULL;
    self->arg = NULL;
    self->write = NULL;
    self->proto = 0;
    self->bin = 0;
    self->fast = 0;
    self->fast_nesting = 0;
    self->fix_imports = 0;
    self->fast_memo = NULL;

    self->memo = PyMemoTable_New();
    if (self->memo == NULL) {
        Py_DECREF(self);
        return NULL;
    }
    self->max_output_len = WRITE_BUF_SIZE;
    self->output_len = 0;
    self->output_buffer = PyBytes_FromStringAndSize(NULL,
                                                    self->max_output_len);
    if (self->output_buffer == NULL) {
        Py_DECREF(self);
        return NULL;
    }
    return self;
}

static int
_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
                     PyObject *fix_imports_obj)
{
    long proto = 0;
    int fix_imports;
798

799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814
    if (proto_obj == NULL || proto_obj == Py_None)
        proto = DEFAULT_PROTOCOL;
    else {
        proto = PyLong_AsLong(proto_obj);
        if (proto == -1 && PyErr_Occurred())
            return -1;
    }
    if (proto < 0)
        proto = HIGHEST_PROTOCOL;
    if (proto > HIGHEST_PROTOCOL) {
        PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
                     HIGHEST_PROTOCOL);
        return -1;
    }
    fix_imports = PyObject_IsTrue(fix_imports_obj);
    if (fix_imports == -1)
815
        return -1;
816 817 818 819
    
    self->proto = proto;
    self->bin = proto > 0;
    self->fix_imports = fix_imports && proto < 3;
820

821
    return 0;
822 823
}

824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853
/* Returns -1 (with an exception set) on failure, 0 on success. This may
   be called once on a freshly created Pickler. */
static int
_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
{
    assert(file != NULL);
    self->write = PyObject_GetAttrString(file, "write");
    if (self->write == NULL) {
        if (PyErr_ExceptionMatches(PyExc_AttributeError))
            PyErr_SetString(PyExc_TypeError,
                            "file must have a 'write' attribute");
        return -1;
    }

    return 0;
}

/* See documentation for _Pickler_FastCall(). */
static PyObject *
_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
{
    PyObject *result = NULL;

    ARG_TUP(self, arg);
    if (self->arg) {
        result = PyObject_Call(func, self->arg, NULL);
        FREE_ARG_TUP(self);
    }
    return result;
}
854

855 856
/* Returns the size of the input on success, -1 on failure. This takes its
   own reference to `input`. */
857
static Py_ssize_t
858 859 860 861 862 863 864 865 866
_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
{
    if (self->buffer.buf != NULL)
        PyBuffer_Release(&self->buffer);
    if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
        return -1;
    self->input_buffer = self->buffer.buf;
    self->input_len = self->buffer.len;
    self->next_read_idx = 0;
867
    self->prefetched_idx = self->input_len;
868 869 870
    return self->input_len;
}

871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888
static int
_Unpickler_SkipConsumed(UnpicklerObject *self)
{
    Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;

    if (consumed > 0) {
        PyObject *r;
        assert(self->peek);  /* otherwise we did something wrong */
        /* This makes an useless copy... */
        r = PyObject_CallFunction(self->read, "n", consumed);
        if (r == NULL)
            return -1;
        Py_DECREF(r);
        self->prefetched_idx = self->next_read_idx;
    }
    return 0;
}

889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
static const Py_ssize_t READ_WHOLE_LINE = -1;

/* If reading from a file, we need to only pull the bytes we need, since there
   may be multiple pickle objects arranged contiguously in the same input
   buffer.

   If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
   bytes from the input stream/buffer.

   Update the unpickler's input buffer with the newly-read data. Returns -1 on
   failure; on success, returns the number of bytes read from the file.

   On success, self->input_len will be 0; this is intentional so that when
   unpickling from a file, the "we've run out of data" code paths will trigger,
   causing the Unpickler to go back to the file for more data. Use the returned
   size to tell you how much data you can process. */
static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
907 908
{
    PyObject *data;
909
    Py_ssize_t read_size, prefetched_size = 0;
910

911
    assert(self->read != NULL);
912 913 914
    
    if (_Unpickler_SkipConsumed(self) < 0)
        return -1;
915 916 917 918 919 920 921 922 923

    if (n == READ_WHOLE_LINE)
        data = PyObject_Call(self->readline, empty_tuple, NULL);
    else {
        PyObject *len = PyLong_FromSsize_t(n);
        if (len == NULL)
            return -1;
        data = _Unpickler_FastCall(self, self->read, len);
    }
924 925 926
    if (data == NULL)
        return -1;

927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956
    /* Prefetch some data without advancing the file pointer, if possible */
    if (self->peek) {
        PyObject *len, *prefetched;
        len = PyLong_FromSsize_t(PREFETCH);
        if (len == NULL) {
            Py_DECREF(data);
            return -1;
        }
        prefetched = _Unpickler_FastCall(self, self->peek, len);
        if (prefetched == NULL) {
            if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
                /* peek() is probably not supported by the given file object */
                PyErr_Clear();
                Py_CLEAR(self->peek);
            }
            else {
                Py_DECREF(data);
                return -1;
            }
        }
        else {
            assert(PyBytes_Check(prefetched));
            prefetched_size = PyBytes_GET_SIZE(prefetched);
            PyBytes_ConcatAndDel(&data, prefetched);
            if (data == NULL)
                return -1;
        }
    }

    read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
957
    Py_DECREF(data);
958
    self->prefetched_idx = read_size;
959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977
    return read_size;
}

/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.

   This should be used for all data reads, rather than accessing the unpickler's
   input buffer directly. This method deals correctly with reading from input
   streams, which the input buffer doesn't deal with.

   Note that when reading from a file-like object, self->next_read_idx won't
   be updated (it should remain at 0 for the entire unpickling process). You
   should use this function's return value to know how many bytes you can
   consume.

   Returns -1 (with an exception set) on failure. On success, return the
   number of chars read. */
static Py_ssize_t
_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
{
978 979 980 981 982 983 984 985 986 987 988 989 990 991 992
    Py_ssize_t num_read;

    if (self->next_read_idx + n <= self->input_len) {
        *s = self->input_buffer + self->next_read_idx;
        self->next_read_idx += n;
        return n;
    }
    if (!self->read) {
        PyErr_Format(PyExc_EOFError, "Ran out of input");
        return -1;
    }
    num_read = _Unpickler_ReadFromFile(self, n);
    if (num_read < 0)
        return -1;
    if (num_read < n) {
993
        PyErr_Format(PyExc_EOFError, "Ran out of input");
994 995
        return -1;
    }
996 997
    *s = self->input_buffer;
    self->next_read_idx = n;
998 999
    return n;
}
1000

1001 1002 1003 1004 1005 1006
static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
                    char **result)
{
    char *input_line = PyMem_Realloc(self->input_line, len + 1);
    if (input_line == NULL)
1007 1008
        return -1;

1009 1010 1011 1012 1013
    memcpy(input_line, line, len);
    input_line[len] = '\0';
    self->input_line = input_line;
    *result = self->input_line;
    return len;
1014 1015
}

1016 1017 1018 1019
/* Read a line from the input stream/buffer. If we run off the end of the input
   before hitting \n, return the data we found.

   Returns the number of chars read, or -1 on failure. */
1020
static Py_ssize_t
1021
_Unpickler_Readline(UnpicklerObject *self, char **result)
1022
{
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037
    Py_ssize_t i, num_read;

    for (i = self->next_read_idx; i < self->input_len; i++) {
        if (self->input_buffer[i] == '\n') {
            char *line_start = self->input_buffer + self->next_read_idx;
            num_read = i - self->next_read_idx + 1;
            self->next_read_idx = i + 1;
            return _Unpickler_CopyLine(self, line_start, num_read, result);
        }
    }
    if (self->read) {
        num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
        if (num_read < 0)
            return -1;
        *result = self->input_buffer;
1038
        self->next_read_idx = num_read;
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
        return num_read;
    }
 
    /* If we get here, we've run off the end of the input string. Return the
       remaining string and let the caller figure it out. */
    *result = self->input_buffer + self->next_read_idx;
    num_read = i - self->next_read_idx;
    self->next_read_idx = i;
    return num_read;
}
1049

1050 1051 1052 1053 1054 1055 1056
/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
   will be modified in place. */
static int
_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
{
    Py_ssize_t i;
    PyObject **memo;
1057

1058 1059 1060 1061 1062
    assert(new_size > self->memo_size);

    memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
    if (memo == NULL) {
        PyErr_NoMemory();
1063 1064
        return -1;
    }
1065 1066 1067 1068 1069 1070
    self->memo = memo;
    for (i = self->memo_size; i < new_size; i++)
        self->memo[i] = NULL;
    self->memo_size = new_size;
    return 0;
}
1071

1072 1073 1074 1075 1076 1077
/* Returns NULL if idx is out of bounds. */
static PyObject *
_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
{
    if (idx < 0 || idx >= self->memo_size)
        return NULL;
1078

1079
    return self->memo[idx];
1080 1081
}

1082 1083
/* Returns -1 (with an exception set) on failure, 0 on success.
   This takes its own reference to `value`. */
1084
static int
1085
_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1086
{
1087
    PyObject *old_item;
1088

1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156
    if (idx >= self->memo_size) {
        if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
            return -1;
        assert(idx < self->memo_size);
    }
    Py_INCREF(value);
    old_item = self->memo[idx];
    self->memo[idx] = value;
    Py_XDECREF(old_item);
    return 0;
}

static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)
{
    PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
    if (memo == NULL)
        return NULL;
    memset(memo, 0, new_size * sizeof(PyObject *));
    return memo;
}

/* Free the unpickler's memo, taking care to decref any items left in it. */
static void
_Unpickler_MemoCleanup(UnpicklerObject *self)
{
    Py_ssize_t i;
    PyObject **memo = self->memo;

    if (self->memo == NULL)
        return;
    self->memo = NULL;
    i = self->memo_size;
    while (--i >= 0) {
        Py_XDECREF(memo[i]);
    }
    PyMem_FREE(memo);
}

static UnpicklerObject *
_Unpickler_New(void)
{
    UnpicklerObject *self;

    self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
    if (self == NULL)
        return NULL;

    self->stack = (Pdata *)Pdata_New();
    if (self->stack == NULL) {
        Py_DECREF(self);
        return NULL;
    }
    memset(&self->buffer, 0, sizeof(Py_buffer));

    self->memo_size = 32;
    self->memo = _Unpickler_NewMemo(self->memo_size);
    if (self->memo == NULL) {
        Py_DECREF(self);
        return NULL;
    }

    self->arg = NULL;
    self->pers_func = NULL;
    self->input_buffer = NULL;
    self->input_line = NULL;
    self->input_len = 0;
    self->next_read_idx = 0;
1157
    self->prefetched_idx = 0;
1158 1159
    self->read = NULL;
    self->readline = NULL;
1160
    self->peek = NULL;
1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
    self->encoding = NULL;
    self->errors = NULL;
    self->marks = NULL;
    self->num_marks = 0;
    self->marks_size = 0;
    self->proto = 0;
    self->fix_imports = 0;

    return self;
}

/* Returns -1 (with an exception set) on failure, 0 on success. This may
   be called once on a freshly created Pickler. */
static int
_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
{
1177 1178 1179 1180 1181 1182 1183
    self->peek = PyObject_GetAttrString(file, "peek");
    if (self->peek == NULL) {
        if (PyErr_ExceptionMatches(PyExc_AttributeError))
            PyErr_Clear();
        else
            return -1;
    }
1184 1185 1186 1187 1188 1189 1190 1191
    self->read = PyObject_GetAttrString(file, "read");
    self->readline = PyObject_GetAttrString(file, "readline");
    if (self->readline == NULL || self->read == NULL) {
        if (PyErr_ExceptionMatches(PyExc_AttributeError))
            PyErr_SetString(PyExc_TypeError,
                            "file must have 'read' and 'readline' attributes");
        Py_CLEAR(self->read);
        Py_CLEAR(self->readline);
1192
        Py_CLEAR(self->peek);
1193 1194
        return -1;
    }
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208
    return 0;
}

/* Returns -1 (with an exception set) on failure, 0 on success. This may
   be called once on a freshly created Pickler. */
static int
_Unpickler_SetInputEncoding(UnpicklerObject *self,
                            const char *encoding,
                            const char *errors)
{
    if (encoding == NULL)
        encoding = "ASCII";
    if (errors == NULL)
        errors = "strict";
1209

1210 1211 1212 1213
    self->encoding = strdup(encoding);
    self->errors = strdup(errors);
    if (self->encoding == NULL || self->errors == NULL) {
        PyErr_NoMemory();
1214
        return -1;
1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225
    }
    return 0;
}

/* Generate a GET opcode for an object stored in the memo. */
static int
memo_get(PicklerObject *self, PyObject *key)
{
    long *value;
    char pdata[30];
    int len;
1226

1227 1228 1229
    value = PyMemoTable_Get(self->memo, key);
    if (value == NULL)  {
        PyErr_SetObject(PyExc_KeyError, key);
1230 1231 1232 1233 1234
        return -1;
    }

    if (!self->bin) {
        pdata[0] = GET;
1235
        PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", *value);
1236 1237 1238
        len = (int)strlen(pdata);
    }
    else {
1239
        if (*value < 256) {
1240
            pdata[0] = BINGET;
1241
            pdata[1] = (unsigned char)(*value & 0xff);
1242 1243
            len = 2;
        }
1244
        else if (*value <= 0xffffffffL) {
1245
            pdata[0] = LONG_BINGET;
1246 1247 1248 1249
            pdata[1] = (unsigned char)(*value & 0xff);
            pdata[2] = (unsigned char)((*value >> 8) & 0xff);
            pdata[3] = (unsigned char)((*value >> 16) & 0xff);
            pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1250 1251 1252 1253 1254 1255 1256 1257 1258
            len = 5;
        }
        else { /* unlikely */
            PyErr_SetString(PicklingError,
                            "memo id too large for LONG_BINGET");
            return -1;
        }
    }

1259
    if (_Pickler_Write(self, pdata, len) < 0)
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
        return -1;

    return 0;
}

/* Store an object in the memo, assign it a new unique ID based on the number
   of objects currently stored in the memo and generate a PUT opcode. */
static int
memo_put(PicklerObject *self, PyObject *obj)
{
    long x;
    char pdata[30];
    int len;
    int status = 0;

    if (self->fast)
        return 0;

1278 1279
    x = PyMemoTable_Size(self->memo);
    if (PyMemoTable_Set(self->memo, obj, x) < 0)
1280 1281 1282 1283 1284 1285 1286 1287 1288 1289
        goto error;

    if (!self->bin) {
        pdata[0] = PUT;
        PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
        len = strlen(pdata);
    }
    else {
        if (x < 256) {
            pdata[0] = BINPUT;
1290
            pdata[1] = (unsigned char)x;
1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307
            len = 2;
        }
        else if (x <= 0xffffffffL) {
            pdata[0] = LONG_BINPUT;
            pdata[1] = (unsigned char)(x & 0xff);
            pdata[2] = (unsigned char)((x >> 8) & 0xff);
            pdata[3] = (unsigned char)((x >> 16) & 0xff);
            pdata[4] = (unsigned char)((x >> 24) & 0xff);
            len = 5;
        }
        else { /* unlikely */
            PyErr_SetString(PicklingError,
                            "memo id too large for LONG_BINPUT");
            return -1;
        }
    }

1308
    if (_Pickler_Write(self, pdata, len) < 0)
1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340
        goto error;

    if (0) {
  error:
        status = -1;
    }

    return status;
}

static PyObject *
whichmodule(PyObject *global, PyObject *global_name)
{
    Py_ssize_t i, j;
    static PyObject *module_str = NULL;
    static PyObject *main_str = NULL;
    PyObject *module_name;
    PyObject *modules_dict;
    PyObject *module;
    PyObject *obj;

    if (module_str == NULL) {
        module_str = PyUnicode_InternFromString("__module__");
        if (module_str == NULL)
            return NULL;
        main_str = PyUnicode_InternFromString("__main__");
        if (main_str == NULL)
            return NULL;
    }

    module_name = PyObject_GetAttr(global, module_str);

1341 1342 1343
    /* In some rare cases (e.g., bound methods of extension types),
       __module__ can be None. If it is so, then search sys.modules
       for the module of global.  */
1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364
    if (module_name == Py_None) {
        Py_DECREF(module_name);
        goto search;
    }

    if (module_name) {
        return module_name;
    }
    if (PyErr_ExceptionMatches(PyExc_AttributeError))
        PyErr_Clear();
    else
        return NULL;

  search:
    modules_dict = PySys_GetObject("modules");
    if (modules_dict == NULL)
        return NULL;

    i = 0;
    module_name = NULL;
    while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
1365
        if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457
            continue;

        obj = PyObject_GetAttr(module, global_name);
        if (obj == NULL) {
            if (PyErr_ExceptionMatches(PyExc_AttributeError))
                PyErr_Clear();
            else
                return NULL;
            continue;
        }

        if (obj != global) {
            Py_DECREF(obj);
            continue;
        }

        Py_DECREF(obj);
        break;
    }

    /* If no module is found, use __main__. */
    if (!j) {
        module_name = main_str;
    }

    Py_INCREF(module_name);
    return module_name;
}

/* fast_save_enter() and fast_save_leave() are guards against recursive
   objects when Pickler is used with the "fast mode" (i.e., with object
   memoization disabled). If the nesting of a list or dict object exceed
   FAST_NESTING_LIMIT, these guards will start keeping an internal
   reference to the seen list or dict objects and check whether these objects
   are recursive. These are not strictly necessary, since save() has a
   hard-coded recursion limit, but they give a nicer error message than the
   typical RuntimeError. */
static int
fast_save_enter(PicklerObject *self, PyObject *obj)
{
    /* if fast_nesting < 0, we're doing an error exit. */
    if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
        PyObject *key = NULL;
        if (self->fast_memo == NULL) {
            self->fast_memo = PyDict_New();
            if (self->fast_memo == NULL) {
                self->fast_nesting = -1;
                return 0;
            }
        }
        key = PyLong_FromVoidPtr(obj);
        if (key == NULL)
            return 0;
        if (PyDict_GetItem(self->fast_memo, key)) {
            Py_DECREF(key);
            PyErr_Format(PyExc_ValueError,
                         "fast mode: can't pickle cyclic objects "
                         "including object type %.200s at %p",
                         obj->ob_type->tp_name, obj);
            self->fast_nesting = -1;
            return 0;
        }
        if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
            Py_DECREF(key);
            self->fast_nesting = -1;
            return 0;
        }
        Py_DECREF(key);
    }
    return 1;
}

static int
fast_save_leave(PicklerObject *self, PyObject *obj)
{
    if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
        PyObject *key = PyLong_FromVoidPtr(obj);
        if (key == NULL)
            return 0;
        if (PyDict_DelItem(self->fast_memo, key) < 0) {
            Py_DECREF(key);
            return 0;
        }
        Py_DECREF(key);
    }
    return 1;
}

static int
save_none(PicklerObject *self, PyObject *obj)
{
    const char none_op = NONE;
1458
    if (_Pickler_Write(self, &none_op, 1) < 0)
1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472
        return -1;

    return 0;
}

static int
save_bool(PicklerObject *self, PyObject *obj)
{
    static const char *buf[2] = { FALSE, TRUE };
    const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
    int p = (obj == Py_True);

    if (self->proto >= 2) {
        const char bool_op = p ? NEWTRUE : NEWFALSE;
1473
        if (_Pickler_Write(self, &bool_op, 1) < 0)
1474 1475
            return -1;
    }
1476
    else if (_Pickler_Write(self, buf[p], len[p]) < 0)
1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495
        return -1;

    return 0;
}

static int
save_int(PicklerObject *self, long x)
{
    char pdata[32];
    int len = 0;

    if (!self->bin
#if SIZEOF_LONG > 4
        || x > 0x7fffffffL || x < -0x80000000L
#endif
        ) {
        /* Text-mode pickle, or long too big to fit in the 4-byte
         * signed BININT format:  store as a string.
         */
1496 1497
        pdata[0] = LONG;        /* use LONG for consistency with pickle.py */
        PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
1498
        if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522
            return -1;
    }
    else {
        /* Binary pickle and x fits in a signed 4-byte int. */
        pdata[1] = (unsigned char)(x & 0xff);
        pdata[2] = (unsigned char)((x >> 8) & 0xff);
        pdata[3] = (unsigned char)((x >> 16) & 0xff);
        pdata[4] = (unsigned char)((x >> 24) & 0xff);

        if ((pdata[4] == 0) && (pdata[3] == 0)) {
            if (pdata[2] == 0) {
                pdata[0] = BININT1;
                len = 2;
            }
            else {
                pdata[0] = BININT2;
                len = 3;
            }
        }
        else {
            pdata[0] = BININT;
            len = 5;
        }

1523
        if (_Pickler_Write(self, pdata, len) < 0)
1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558
            return -1;
    }

    return 0;
}

static int
save_long(PicklerObject *self, PyObject *obj)
{
    PyObject *repr = NULL;
    Py_ssize_t size;
    long val = PyLong_AsLong(obj);
    int status = 0;

    const char long_op = LONG;

    if (val == -1 && PyErr_Occurred()) {
        /* out of range for int pickling */
        PyErr_Clear();
    }
    else
        return save_int(self, val);

    if (self->proto >= 2) {
        /* Linear-time pickling. */
        size_t nbits;
        size_t nbytes;
        unsigned char *pdata;
        char header[5];
        int i;
        int sign = _PyLong_Sign(obj);

        if (sign == 0) {
            header[0] = LONG1;
            header[1] = 0;      /* It's 0 -- an empty bytestring. */
1559
            if (_Pickler_Write(self, header, 2) < 0)
1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585
                goto error;
            return 0;
        }
        nbits = _PyLong_NumBits(obj);
        if (nbits == (size_t)-1 && PyErr_Occurred())
            goto error;
        /* How many bytes do we need?  There are nbits >> 3 full
         * bytes of data, and nbits & 7 leftover bits.  If there
         * are any leftover bits, then we clearly need another
         * byte.  Wnat's not so obvious is that we *probably*
         * need another byte even if there aren't any leftovers:
         * the most-significant bit of the most-significant byte
         * acts like a sign bit, and it's usually got a sense
         * opposite of the one we need.  The exception is longs
         * of the form -(2**(8*j-1)) for j > 0.  Such a long is
         * its own 256's-complement, so has the right sign bit
         * even without the extra byte.  That's a pain to check
         * for in advance, though, so we always grab an extra
         * byte at the start, and cut it back later if possible.
         */
        nbytes = (nbits >> 3) + 1;
        if (nbytes > INT_MAX) {
            PyErr_SetString(PyExc_OverflowError,
                            "long too large to pickle");
            goto error;
        }
1586
        repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
1587 1588
        if (repr == NULL)
            goto error;
1589
        pdata = (unsigned char *)PyBytes_AS_STRING(repr);
1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619
        i = _PyLong_AsByteArray((PyLongObject *)obj,
                                pdata, nbytes,
                                1 /* little endian */ , 1 /* signed */ );
        if (i < 0)
            goto error;
        /* If the long is negative, this may be a byte more than
         * needed.  This is so iff the MSB is all redundant sign
         * bits.
         */
        if (sign < 0 &&
            nbytes > 1 && 
            pdata[nbytes - 1] == 0xff &&
            (pdata[nbytes - 2] & 0x80) != 0) {
            nbytes--;
        }

        if (nbytes < 256) {
            header[0] = LONG1;
            header[1] = (unsigned char)nbytes;
            size = 2;
        }
        else {
            header[0] = LONG4;
            size = (int)nbytes;
            for (i = 1; i < 5; i++) {
                header[i] = (unsigned char)(size & 0xff);
                size >>= 8;
            }
            size = 5;
        }
1620 1621
        if (_Pickler_Write(self, header, size) < 0 ||
            _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
1622 1623 1624 1625 1626
            goto error;
    }
    else {
        char *string;

1627 1628 1629
        /* proto < 2: write the repr and newline.  This is quadratic-time (in
           the number of digits), in both directions.  We add a trailing 'L'
           to the repr, for compatibility with Python 2.x. */
1630 1631 1632 1633 1634

        repr = PyObject_Repr(obj);
        if (repr == NULL)
            goto error;

1635
        string = _PyUnicode_AsStringAndSize(repr, &size);
1636 1637 1638
        if (string == NULL)
            goto error;

1639 1640 1641
        if (_Pickler_Write(self, &long_op, 1) < 0 ||
            _Pickler_Write(self, string, size) < 0 ||
            _Pickler_Write(self, "L\n", 2) < 0)
1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663
            goto error;
    }

    if (0) {
  error:
      status = -1;
    }
    Py_XDECREF(repr);

    return status;
}

static int
save_float(PicklerObject *self, PyObject *obj)
{
    double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);

    if (self->bin) {
        char pdata[9];
        pdata[0] = BINFLOAT;
        if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
            return -1;
1664
        if (_Pickler_Write(self, pdata, 9) < 0)
1665
            return -1;
1666
   } 
1667
    else {
1668 1669 1670
        int result = -1;
        char *buf = NULL;
        char op = FLOAT;
1671

1672
        if (_Pickler_Write(self, &op, 1) < 0)
1673 1674
            goto done;

1675
        buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
1676 1677 1678 1679 1680
        if (!buf) {
            PyErr_NoMemory();
            goto done;
        }

1681
        if (_Pickler_Write(self, buf, strlen(buf)) < 0)
1682 1683
            goto done;

1684
        if (_Pickler_Write(self, "\n", 1) < 0)
1685 1686 1687 1688 1689 1690
            goto done;

        result = 0;
done:
        PyMem_Free(buf);
        return result;
1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749
    }

    return 0;
}

static int
save_bytes(PicklerObject *self, PyObject *obj)
{
    if (self->proto < 3) {
        /* Older pickle protocols do not have an opcode for pickling bytes
           objects. Therefore, we need to fake the copy protocol (i.e.,
           the __reduce__ method) to permit bytes object unpickling. */
        PyObject *reduce_value = NULL;
        PyObject *bytelist = NULL;
        int status;

        bytelist = PySequence_List(obj);
        if (bytelist == NULL)
            return -1;

        reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
                                     bytelist);
        if (reduce_value == NULL) {
            Py_DECREF(bytelist);
            return -1;
        }

        /* save_reduce() will memoize the object automatically. */
        status = save_reduce(self, reduce_value, obj);
        Py_DECREF(reduce_value);
        Py_DECREF(bytelist);
        return status;
    }
    else {
        Py_ssize_t size;
        char header[5];
        int len;

        size = PyBytes_Size(obj);
        if (size < 0)
            return -1;

        if (size < 256) {
            header[0] = SHORT_BINBYTES;
            header[1] = (unsigned char)size;
            len = 2;
        }
        else if (size <= 0xffffffffL) {
            header[0] = BINBYTES;
            header[1] = (unsigned char)(size & 0xff);
            header[2] = (unsigned char)((size >> 8) & 0xff);
            header[3] = (unsigned char)((size >> 16) & 0xff);
            header[4] = (unsigned char)((size >> 24) & 0xff);
            len = 5;
        }
        else {
            return -1;          /* string too large */
        }

1750
        if (_Pickler_Write(self, header, len) < 0)
1751 1752
            return -1;

1753
        if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774
            return -1;

        if (memo_put(self, obj) < 0)
            return -1;

        return 0;
    }
}

/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
   backslash and newline characters to \uXXXX escapes. */
static PyObject *
raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
{
    PyObject *repr, *result;
    char *p;
    char *q;

    static const char *hexdigits = "0123456789abcdef";

#ifdef Py_UNICODE_WIDE
1775
    const Py_ssize_t expandsize = 10;
1776
#else
1777
    const Py_ssize_t expandsize = 6;
1778
#endif
1779 1780 1781 1782 1783
    
    if (size > PY_SSIZE_T_MAX / expandsize)
        return PyErr_NoMemory();
    
    repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
1784 1785 1786 1787 1788
    if (repr == NULL)
        return NULL;
    if (size == 0)
        goto done;

1789
    p = q = PyByteArray_AS_STRING(repr);
1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806
    while (size-- > 0) {
        Py_UNICODE ch = *s++;
#ifdef Py_UNICODE_WIDE
        /* Map 32-bit characters to '\Uxxxxxxxx' */
        if (ch >= 0x10000) {
            *p++ = '\\';
            *p++ = 'U';
            *p++ = hexdigits[(ch >> 28) & 0xf];
            *p++ = hexdigits[(ch >> 24) & 0xf];
            *p++ = hexdigits[(ch >> 20) & 0xf];
            *p++ = hexdigits[(ch >> 16) & 0xf];
            *p++ = hexdigits[(ch >> 12) & 0xf];
            *p++ = hexdigits[(ch >> 8) & 0xf];
            *p++ = hexdigits[(ch >> 4) & 0xf];
            *p++ = hexdigits[ch & 15];
        }
        else
1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832
#else
            /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
            if (ch >= 0xD800 && ch < 0xDC00) {
                Py_UNICODE ch2;
                Py_UCS4 ucs;

                ch2 = *s++;
                size--;
                if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
                    ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
                    *p++ = '\\';
                    *p++ = 'U';
                    *p++ = hexdigits[(ucs >> 28) & 0xf];
                    *p++ = hexdigits[(ucs >> 24) & 0xf];
                    *p++ = hexdigits[(ucs >> 20) & 0xf];
                    *p++ = hexdigits[(ucs >> 16) & 0xf];
                    *p++ = hexdigits[(ucs >> 12) & 0xf];
                    *p++ = hexdigits[(ucs >> 8) & 0xf];
                    *p++ = hexdigits[(ucs >> 4) & 0xf];
                    *p++ = hexdigits[ucs & 0xf];
                    continue;
                }
                /* Fall through: isolated surrogates are copied as-is */
                s--;
                size++;
            }
1833 1834 1835 1836 1837 1838 1839 1840 1841 1842
#endif
        /* Map 16-bit characters to '\uxxxx' */
        if (ch >= 256 || ch == '\\' || ch == '\n') {
            *p++ = '\\';
            *p++ = 'u';
            *p++ = hexdigits[(ch >> 12) & 0xf];
            *p++ = hexdigits[(ch >> 8) & 0xf];
            *p++ = hexdigits[(ch >> 4) & 0xf];
            *p++ = hexdigits[ch & 15];
        }
1843
        /* Copy everything else as-is */
1844 1845 1846 1847 1848 1849
        else
            *p++ = (char) ch;
    }
    size = p - q;

  done:
1850
    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863
    Py_DECREF(repr);
    return result;
}

static int
save_unicode(PicklerObject *self, PyObject *obj)
{
    Py_ssize_t size;
    PyObject *encoded = NULL;

    if (self->bin) {
        char pdata[5];

1864 1865 1866
        encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
                                    PyUnicode_GET_SIZE(obj),
                                    "surrogatepass");
1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879
        if (encoded == NULL)
            goto error;

        size = PyBytes_GET_SIZE(encoded);
        if (size < 0 || size > 0xffffffffL)
            goto error;          /* string too large */

        pdata[0] = BINUNICODE;
        pdata[1] = (unsigned char)(size & 0xff);
        pdata[2] = (unsigned char)((size >> 8) & 0xff);
        pdata[3] = (unsigned char)((size >> 16) & 0xff);
        pdata[4] = (unsigned char)((size >> 24) & 0xff);

1880
        if (_Pickler_Write(self, pdata, 5) < 0)
1881 1882
            goto error;

1883
        if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
1884 1885 1886 1887 1888 1889 1890 1891 1892 1893
            goto error;
    }
    else {
        const char unicode_op = UNICODE;

        encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
                                     PyUnicode_GET_SIZE(obj));
        if (encoded == NULL)
            goto error;

1894
        if (_Pickler_Write(self, &unicode_op, 1) < 0)
1895 1896 1897
            goto error;

        size = PyBytes_GET_SIZE(encoded);
1898
        if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
1899 1900
            goto error;

1901
        if (_Pickler_Write(self, "\n", 1) < 0)
1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966
            goto error;
    }
    if (memo_put(self, obj) < 0)
        goto error;

    Py_DECREF(encoded);
    return 0;

  error:
    Py_XDECREF(encoded);
    return -1;
}

/* A helper for save_tuple.  Push the len elements in tuple t on the stack. */
static int
store_tuple_elements(PicklerObject *self, PyObject *t, int len)
{
    int i;

    assert(PyTuple_Size(t) == len);

    for (i = 0; i < len; i++) {
        PyObject *element = PyTuple_GET_ITEM(t, i);

        if (element == NULL)
            return -1;
        if (save(self, element, 0) < 0)
            return -1;
    }

    return 0;
}

/* Tuples are ubiquitous in the pickle protocols, so many techniques are
 * used across protocols to minimize the space needed to pickle them.
 * Tuples are also the only builtin immutable type that can be recursive
 * (a tuple can be reached from itself), and that requires some subtle
 * magic so that it works in all cases.  IOW, this is a long routine.
 */
static int
save_tuple(PicklerObject *self, PyObject *obj)
{
    int len, i;

    const char mark_op = MARK;
    const char tuple_op = TUPLE;
    const char pop_op = POP;
    const char pop_mark_op = POP_MARK;
    const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};

    if ((len = PyTuple_Size(obj)) < 0)
        return -1;

    if (len == 0) {
        char pdata[2];

        if (self->proto) {
            pdata[0] = EMPTY_TUPLE;
            len = 1;
        }
        else {
            pdata[0] = MARK;
            pdata[1] = TUPLE;
            len = 2;
        }
1967
        if (_Pickler_Write(self, pdata, len) < 0)
1968 1969 1970 1971
            return -1;
        return 0;
    }

1972
    /* The tuple isn't in the memo now.  If it shows up there after
1973 1974 1975 1976 1977 1978 1979
     * saving the tuple elements, the tuple must be recursive, in
     * which case we'll pop everything we put on the stack, and fetch
     * its value from the memo.
     */
    if (len <= 3 && self->proto >= 2) {
        /* Use TUPLE{1,2,3} opcodes. */
        if (store_tuple_elements(self, obj, len) < 0)
1980
            return -1;
1981

1982
        if (PyMemoTable_Get(self->memo, obj)) {
1983 1984
            /* pop the len elements */
            for (i = 0; i < len; i++)
1985 1986
                if (_Pickler_Write(self, &pop_op, 1) < 0)
                    return -1;
1987
            /* fetch from memo */
1988 1989
            if (memo_get(self, obj) < 0)
                return -1;
1990 1991 1992 1993

            return 0;
        }
        else { /* Not recursive. */
1994 1995
            if (_Pickler_Write(self, len2opcode + len, 1) < 0)
                return -1;
1996 1997 1998 1999 2000 2001 2002
        }
        goto memoize;
    }

    /* proto < 2 and len > 0, or proto >= 2 and len > 3.
     * Generate MARK e1 e2 ... TUPLE
     */
2003 2004
    if (_Pickler_Write(self, &mark_op, 1) < 0)
        return -1;
2005 2006

    if (store_tuple_elements(self, obj, len) < 0)
2007
        return -1;
2008

2009
    if (PyMemoTable_Get(self->memo, obj)) {
2010 2011
        /* pop the stack stuff we pushed */
        if (self->bin) {
2012 2013
            if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
                return -1;
2014 2015 2016 2017 2018 2019
        }
        else {
            /* Note that we pop one more than len, to remove
             * the MARK too.
             */
            for (i = 0; i <= len; i++)
2020 2021
                if (_Pickler_Write(self, &pop_op, 1) < 0)
                    return -1;
2022 2023
        }
        /* fetch from memo */
2024 2025
        if (memo_get(self, obj) < 0)
            return -1;
2026 2027 2028 2029

        return 0;
    }
    else { /* Not recursive. */
2030 2031
        if (_Pickler_Write(self, &tuple_op, 1) < 0)
            return -1;
2032 2033 2034 2035
    }

  memoize:
    if (memo_put(self, obj) < 0)
2036
        return -1;
2037

2038
    return 0;
2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049
}

/* iter is an iterator giving items, and we batch up chunks of
 *     MARK item item ... item APPENDS
 * opcode sequences.  Calling code should have arranged to first create an
 * empty list, or list-like object, for the APPENDS to operate on.
 * Returns 0 on success, <0 on error.
 */
static int
batch_list(PicklerObject *self, PyObject *iter)
{
2050 2051
    PyObject *obj = NULL;
    PyObject *firstitem = NULL;
2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077
    int i, n;

    const char mark_op = MARK;
    const char append_op = APPEND;
    const char appends_op = APPENDS;

    assert(iter != NULL);

    /* XXX: I think this function could be made faster by avoiding the
       iterator interface and fetching objects directly from list using
       PyList_GET_ITEM.
    */

    if (self->proto == 0) {
        /* APPENDS isn't available; do one at a time. */
        for (;;) {
            obj = PyIter_Next(iter);
            if (obj == NULL) {
                if (PyErr_Occurred())
                    return -1;
                break;
            }
            i = save(self, obj, 0);
            Py_DECREF(obj);
            if (i < 0)
                return -1;
2078
            if (_Pickler_Write(self, &append_op, 1) < 0)
2079 2080 2081 2082 2083 2084 2085
                return -1;
        }
        return 0;
    }

    /* proto > 0:  write in batches of BATCHSIZE. */
    do {
2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104
        /* Get first item */
        firstitem = PyIter_Next(iter);
        if (firstitem == NULL) {
            if (PyErr_Occurred())
                goto error;

            /* nothing more to add */
            break;
        }

        /* Try to get a second item */
        obj = PyIter_Next(iter);
        if (obj == NULL) {
            if (PyErr_Occurred())
                goto error;

            /* Only one item to write */
            if (save(self, firstitem, 0) < 0)
                goto error;
2105
            if (_Pickler_Write(self, &append_op, 1) < 0)
2106 2107 2108 2109 2110 2111 2112 2113
                goto error;
            Py_CLEAR(firstitem);
            break;
        }

        /* More than one item to write */

        /* Pump out MARK, items, APPENDS. */
2114
        if (_Pickler_Write(self, &mark_op, 1) < 0)
2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131
            goto error;

        if (save(self, firstitem, 0) < 0)
            goto error;
        Py_CLEAR(firstitem);
        n = 1;

        /* Fetch and save up to BATCHSIZE items */
        while (obj) {
            if (save(self, obj, 0) < 0)
                goto error;
            Py_CLEAR(obj);
            n += 1;

            if (n == BATCHSIZE)
                break;

2132 2133 2134 2135 2136 2137 2138 2139
            obj = PyIter_Next(iter);
            if (obj == NULL) {
                if (PyErr_Occurred())
                    goto error;
                break;
            }
        }

2140
        if (_Pickler_Write(self, &appends_op, 1) < 0)
2141
            goto error;
2142 2143 2144 2145 2146

    } while (n == BATCHSIZE);
    return 0;

  error:
2147 2148
    Py_XDECREF(firstitem);
    Py_XDECREF(obj);
2149 2150 2151
    return -1;
}

2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207
/* This is a variant of batch_list() above, specialized for lists (with no
 * support for list subclasses). Like batch_list(), we batch up chunks of
 *     MARK item item ... item APPENDS
 * opcode sequences.  Calling code should have arranged to first create an
 * empty list, or list-like object, for the APPENDS to operate on.
 * Returns 0 on success, -1 on error.
 *
 * This version is considerably faster than batch_list(), if less general.
 *
 * Note that this only works for protocols > 0.
 */
static int
batch_list_exact(PicklerObject *self, PyObject *obj)
{
    PyObject *item = NULL;
    int this_batch, total;

    const char append_op = APPEND;
    const char appends_op = APPENDS;
    const char mark_op = MARK;

    assert(obj != NULL);
    assert(self->proto > 0);
    assert(PyList_CheckExact(obj));

    if (PyList_GET_SIZE(obj) == 1) {
        item = PyList_GET_ITEM(obj, 0);
        if (save(self, item, 0) < 0)
            return -1;
        if (_Pickler_Write(self, &append_op, 1) < 0)
            return -1;
        return 0;
    }

    /* Write in batches of BATCHSIZE. */
    total = 0;
    do {
        this_batch = 0;
        if (_Pickler_Write(self, &mark_op, 1) < 0)
            return -1;
        while (total < PyList_GET_SIZE(obj)) {
            item = PyList_GET_ITEM(obj, total);
            if (save(self, item, 0) < 0)
                return -1;
            total++;
            if (++this_batch == BATCHSIZE)
                break;
        }
        if (_Pickler_Write(self, &appends_op, 1) < 0)
            return -1;

    } while (total < PyList_GET_SIZE(obj));

    return 0;
}

2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228
static int
save_list(PicklerObject *self, PyObject *obj)
{
    char header[3];
    int len;
    int status = 0;

    if (self->fast && !fast_save_enter(self, obj))
        goto error;

    /* Create an empty list. */
    if (self->bin) {
        header[0] = EMPTY_LIST;
        len = 1;
    }
    else {
        header[0] = MARK;
        header[1] = LIST;
        len = 2;
    }

2229
    if (_Pickler_Write(self, header, len) < 0)
2230 2231 2232 2233 2234 2235 2236 2237 2238 2239
        goto error;

    /* Get list length, and bow out early if empty. */
    if ((len = PyList_Size(obj)) < 0)
        goto error;

    if (memo_put(self, obj) < 0)
        goto error;

    if (len != 0) {
2240 2241
        /* Materialize the list elements. */
        if (PyList_CheckExact(obj) && self->proto > 0) {
2242 2243 2244 2245
            if (Py_EnterRecursiveCall(" while pickling an object"))
                goto error;
            status = batch_list_exact(self, obj);
            Py_LeaveRecursiveCall();
2246 2247 2248 2249
        } else {
            PyObject *iter = PyObject_GetIter(obj);
            if (iter == NULL)
                goto error;
2250

2251 2252 2253
            if (Py_EnterRecursiveCall(" while pickling an object")) {
                Py_DECREF(iter);
                goto error;
2254
            }
2255 2256
            status = batch_list(self, iter);
            Py_LeaveRecursiveCall();
2257 2258 2259
            Py_DECREF(iter);
        }
    }
2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284
    if (0) {
  error:
        status = -1;
    }

    if (self->fast && !fast_save_leave(self, obj))
        status = -1;

    return status;
}

/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
 *     MARK key value ... key value SETITEMS
 * opcode sequences.  Calling code should have arranged to first create an
 * empty dict, or dict-like object, for the SETITEMS to operate on.
 * Returns 0 on success, <0 on error.
 *
 * This is very much like batch_list().  The difference between saving
 * elements directly, and picking apart two-tuples, is so long-winded at
 * the C level, though, that attempts to combine these routines were too
 * ugly to bear.
 */
static int
batch_dict(PicklerObject *self, PyObject *iter)
{
2285 2286
    PyObject *obj = NULL;
    PyObject *firstitem = NULL;
2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314
    int i, n;

    const char mark_op = MARK;
    const char setitem_op = SETITEM;
    const char setitems_op = SETITEMS;

    assert(iter != NULL);

    if (self->proto == 0) {
        /* SETITEMS isn't available; do one at a time. */
        for (;;) {
            obj = PyIter_Next(iter);
            if (obj == NULL) {
                if (PyErr_Occurred())
                    return -1;
                break;
            }
            if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
                PyErr_SetString(PyExc_TypeError, "dict items "
                                "iterator must return 2-tuples");
                return -1;
            }
            i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
            if (i >= 0)
                i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
            Py_DECREF(obj);
            if (i < 0)
                return -1;
2315
            if (_Pickler_Write(self, &setitem_op, 1) < 0)
2316 2317 2318 2319 2320 2321 2322
                return -1;
        }
        return 0;
    }

    /* proto > 0:  write in batches of BATCHSIZE. */
    do {
2323 2324 2325 2326
        /* Get first item */
        firstitem = PyIter_Next(iter);
        if (firstitem == NULL) {
            if (PyErr_Occurred())
2327
                goto error;
2328 2329 2330 2331 2332 2333 2334 2335

            /* nothing more to add */
            break;
        }
        if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
            PyErr_SetString(PyExc_TypeError, "dict items "
                                "iterator must return 2-tuples");
            goto error;
2336 2337
        }

2338 2339 2340 2341
        /* Try to get a second item */
        obj = PyIter_Next(iter);
        if (obj == NULL) {
            if (PyErr_Occurred())
2342
                goto error;
2343 2344 2345 2346 2347 2348

            /* Only one item to write */
            if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
                goto error;
            if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
                goto error;
2349
            if (_Pickler_Write(self, &setitem_op, 1) < 0)
2350
                goto error;
2351 2352
            Py_CLEAR(firstitem);
            break;
2353
        }
2354 2355 2356 2357

        /* More than one item to write */

        /* Pump out MARK, items, SETITEMS. */
2358
        if (_Pickler_Write(self, &mark_op, 1) < 0)
2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373
            goto error;

        if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
            goto error;
        if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
            goto error;
        Py_CLEAR(firstitem);
        n = 1;

        /* Fetch and save up to BATCHSIZE items */
        while (obj) {
            if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
                PyErr_SetString(PyExc_TypeError, "dict items "
                    "iterator must return 2-tuples");
                goto error;
2374
            }
2375
            if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2376
                save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2377
                goto error;
2378 2379
            Py_CLEAR(obj);
            n += 1;
2380

2381 2382 2383 2384 2385 2386 2387 2388 2389
            if (n == BATCHSIZE)
                break;

            obj = PyIter_Next(iter);
            if (obj == NULL) {
                if (PyErr_Occurred())
                    goto error;
                break;
            }
2390
        }
2391

2392
        if (_Pickler_Write(self, &setitems_op, 1) < 0)
2393 2394
            goto error;

2395 2396 2397 2398
    } while (n == BATCHSIZE);
    return 0;

  error:
2399 2400
    Py_XDECREF(firstitem);
    Py_XDECREF(obj);
2401 2402 2403
    return -1;
}

2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419
/* This is a variant of batch_dict() above that specializes for dicts, with no
 * support for dict subclasses. Like batch_dict(), we batch up chunks of
 *     MARK key value ... key value SETITEMS
 * opcode sequences.  Calling code should have arranged to first create an
 * empty dict, or dict-like object, for the SETITEMS to operate on.
 * Returns 0 on success, -1 on error.
 *
 * Note that this currently doesn't work for protocol 0.
 */
static int
batch_dict_exact(PicklerObject *self, PyObject *obj)
{
    PyObject *key = NULL, *value = NULL;
    int i;
    Py_ssize_t dict_size, ppos = 0;

2420 2421 2422
    const char mark_op = MARK;
    const char setitem_op = SETITEM;
    const char setitems_op = SETITEMS;
2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435

    assert(obj != NULL);
    assert(self->proto > 0);

    dict_size = PyDict_Size(obj);

    /* Special-case len(d) == 1 to save space. */
    if (dict_size == 1) {
        PyDict_Next(obj, &ppos, &key, &value);
        if (save(self, key, 0) < 0)
            return -1;
        if (save(self, value, 0) < 0)
            return -1;
2436
        if (_Pickler_Write(self, &setitem_op, 1) < 0)
2437 2438 2439 2440 2441 2442 2443
            return -1;
        return 0;
    }

    /* Write in batches of BATCHSIZE. */
    do {
        i = 0;
2444
        if (_Pickler_Write(self, &mark_op, 1) < 0)
2445 2446 2447 2448 2449 2450 2451 2452 2453
            return -1;
        while (PyDict_Next(obj, &ppos, &key, &value)) {
            if (save(self, key, 0) < 0)
                return -1;
            if (save(self, value, 0) < 0)
                return -1;
            if (++i == BATCHSIZE)
                break;
        }
2454
        if (_Pickler_Write(self, &setitems_op, 1) < 0)
2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466
            return -1;
        if (PyDict_Size(obj) != dict_size) {
            PyErr_Format(
                PyExc_RuntimeError,
                "dictionary changed size during iteration");
            return -1;
        }

    } while (i == BATCHSIZE);
    return 0;
}

2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488
static int
save_dict(PicklerObject *self, PyObject *obj)
{
    PyObject *items, *iter;
    char header[3];
    int len;
    int status = 0;

    if (self->fast && !fast_save_enter(self, obj))
        goto error;

    /* Create an empty dict. */
    if (self->bin) {
        header[0] = EMPTY_DICT;
        len = 1;
    }
    else {
        header[0] = MARK;
        header[1] = DICT;
        len = 2;
    }

2489
    if (_Pickler_Write(self, header, len) < 0)
2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500
        goto error;

    /* Get dict size, and bow out early if empty. */
    if ((len = PyDict_Size(obj)) < 0)
        goto error;

    if (memo_put(self, obj) < 0)
        goto error;

    if (len != 0) {
        /* Save the dict items. */
2501 2502 2503
        if (PyDict_CheckExact(obj) && self->proto > 0) {
            /* We can take certain shortcuts if we know this is a dict and
               not a dict subclass. */
2504 2505 2506 2507
            if (Py_EnterRecursiveCall(" while pickling an object"))
                goto error;
            status = batch_dict_exact(self, obj);
            Py_LeaveRecursiveCall();
2508 2509 2510 2511 2512 2513 2514 2515
        } else {
            items = PyObject_CallMethod(obj, "items", "()");
            if (items == NULL)
                goto error;
            iter = PyObject_GetIter(items);
            Py_DECREF(items);
            if (iter == NULL)
                goto error;
2516 2517 2518 2519
            if (Py_EnterRecursiveCall(" while pickling an object")) {
                Py_DECREF(iter);
                goto error;
            }
2520
            status = batch_dict(self, iter);
2521
            Py_LeaveRecursiveCall();
2522 2523
            Py_DECREF(iter);
        }
2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656
    }

    if (0) {
  error:
        status = -1;
    }

    if (self->fast && !fast_save_leave(self, obj))
        status = -1;

    return status;
}

static int
save_global(PicklerObject *self, PyObject *obj, PyObject *name)
{
    static PyObject *name_str = NULL;
    PyObject *global_name = NULL;
    PyObject *module_name = NULL;
    PyObject *module = NULL;
    PyObject *cls;
    int status = 0;

    const char global_op = GLOBAL;

    if (name_str == NULL) {
        name_str = PyUnicode_InternFromString("__name__");
        if (name_str == NULL)
            goto error;
    }

    if (name) {
        global_name = name;
        Py_INCREF(global_name);
    }
    else {
        global_name = PyObject_GetAttr(obj, name_str);
        if (global_name == NULL)
            goto error;
    }

    module_name = whichmodule(obj, global_name);
    if (module_name == NULL)
        goto error;

    /* XXX: Change to use the import C API directly with level=0 to disallow
       relative imports.

       XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
       builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
       custom import functions (IMHO, this would be a nice security
       feature). The import C API would need to be extended to support the
       extra parameters of __import__ to fix that. */
    module = PyImport_Import(module_name);
    if (module == NULL) {
        PyErr_Format(PicklingError,
                     "Can't pickle %R: import of module %R failed",
                     obj, module_name);
        goto error;
    }
    cls = PyObject_GetAttr(module, global_name);
    if (cls == NULL) {
        PyErr_Format(PicklingError,
                     "Can't pickle %R: attribute lookup %S.%S failed",
                     obj, module_name, global_name);
        goto error;
    }
    if (cls != obj) {
        Py_DECREF(cls);
        PyErr_Format(PicklingError,
                     "Can't pickle %R: it's not the same object as %S.%S",
                     obj, module_name, global_name);
        goto error;
    }
    Py_DECREF(cls);

    if (self->proto >= 2) {
        /* See whether this is in the extension registry, and if
         * so generate an EXT opcode.
         */
        PyObject *code_obj;      /* extension code as Python object */
        long code;               /* extension code as C value */
        char pdata[5];
        int n;

        PyTuple_SET_ITEM(two_tuple, 0, module_name);
        PyTuple_SET_ITEM(two_tuple, 1, global_name);
        code_obj = PyDict_GetItem(extension_registry, two_tuple);
        /* The object is not registered in the extension registry.
           This is the most likely code path. */
        if (code_obj == NULL)
            goto gen_global;

        /* XXX: pickle.py doesn't check neither the type, nor the range
           of the value returned by the extension_registry. It should for
           consistency. */

        /* Verify code_obj has the right type and value. */
        if (!PyLong_Check(code_obj)) {
            PyErr_Format(PicklingError,
                         "Can't pickle %R: extension code %R isn't an integer",
                         obj, code_obj);
            goto error;
        }
        code = PyLong_AS_LONG(code_obj);
        if (code <= 0 || code > 0x7fffffffL) {
            PyErr_Format(PicklingError,
                         "Can't pickle %R: extension code %ld is out of range",
                         obj, code);
            goto error;
        }

        /* Generate an EXT opcode. */
        if (code <= 0xff) {
            pdata[0] = EXT1;
            pdata[1] = (unsigned char)code;
            n = 2;
        }
        else if (code <= 0xffff) {
            pdata[0] = EXT2;
            pdata[1] = (unsigned char)(code & 0xff);
            pdata[2] = (unsigned char)((code >> 8) & 0xff);
            n = 3;
        }
        else {
            pdata[0] = EXT4;
            pdata[1] = (unsigned char)(code & 0xff);
            pdata[2] = (unsigned char)((code >> 8) & 0xff);
            pdata[3] = (unsigned char)((code >> 16) & 0xff);
            pdata[4] = (unsigned char)((code >> 24) & 0xff);
            n = 5;
        }

2657
        if (_Pickler_Write(self, pdata, n) < 0)
2658 2659 2660 2661 2662 2663 2664 2665 2666 2667
            goto error;
    }
    else {
        /* Generate a normal global opcode if we are using a pickle
           protocol <= 2, or if the object is not registered in the
           extension registry. */
        PyObject *encoded;
        PyObject *(*unicode_encoder)(PyObject *);

  gen_global:
2668
        if (_Pickler_Write(self, &global_op, 1) < 0)
2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681
            goto error;

        /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
           the module name and the global name using UTF-8. We do so only when
           we are using the pickle protocol newer than version 3. This is to
           ensure compatibility with older Unpickler running on Python 2.x. */
        if (self->proto >= 3) {
            unicode_encoder = PyUnicode_AsUTF8String;
        }
        else {
            unicode_encoder = PyUnicode_AsASCIIString;
        }

2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738
        /* For protocol < 3 and if the user didn't request against doing so,
           we convert module names to the old 2.x module names. */
        if (self->fix_imports) {
            PyObject *key;
            PyObject *item;

            key = PyTuple_Pack(2, module_name, global_name);
            if (key == NULL)
                goto error;
            item = PyDict_GetItemWithError(name_mapping_3to2, key);
            Py_DECREF(key);
            if (item) {
                if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
                    PyErr_Format(PyExc_RuntimeError,
                                 "_compat_pickle.REVERSE_NAME_MAPPING values "
                                 "should be 2-tuples, not %.200s",
                                 Py_TYPE(item)->tp_name);
                    goto error;
                }
                Py_CLEAR(module_name);
                Py_CLEAR(global_name);
                module_name = PyTuple_GET_ITEM(item, 0);
                global_name = PyTuple_GET_ITEM(item, 1);
                if (!PyUnicode_Check(module_name) ||
                    !PyUnicode_Check(global_name)) {
                    PyErr_Format(PyExc_RuntimeError,
                                 "_compat_pickle.REVERSE_NAME_MAPPING values "
                                 "should be pairs of str, not (%.200s, %.200s)",
                                 Py_TYPE(module_name)->tp_name,
                                 Py_TYPE(global_name)->tp_name);
                    goto error;
                }
                Py_INCREF(module_name);
                Py_INCREF(global_name);
            }
            else if (PyErr_Occurred()) {
                goto error;
            }

            item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
            if (item) {
                if (!PyUnicode_Check(item)) {
                    PyErr_Format(PyExc_RuntimeError,
                                 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
                                 "should be strings, not %.200s",
                                 Py_TYPE(item)->tp_name);
                    goto error;
                }
                Py_CLEAR(module_name);
                module_name = item;
                Py_INCREF(module_name);
            }
            else if (PyErr_Occurred()) {
                goto error;
            }
        }

2739 2740 2741 2742 2743 2744 2745 2746 2747
        /* Save the name of the module. */
        encoded = unicode_encoder(module_name);
        if (encoded == NULL) {
            if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
                PyErr_Format(PicklingError,
                             "can't pickle module identifier '%S' using "
                             "pickle protocol %i", module_name, self->proto);
            goto error;
        }
2748
        if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
2749 2750 2751 2752 2753
                          PyBytes_GET_SIZE(encoded)) < 0) {
            Py_DECREF(encoded);
            goto error;
        }
        Py_DECREF(encoded);
2754
        if(_Pickler_Write(self, "\n", 1) < 0)
2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765
            goto error;

        /* Save the name of the module. */
        encoded = unicode_encoder(global_name);
        if (encoded == NULL) {
            if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
                PyErr_Format(PicklingError,
                             "can't pickle global identifier '%S' using "
                             "pickle protocol %i", global_name, self->proto);
            goto error;
        }
2766
        if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
2767 2768 2769 2770 2771
                          PyBytes_GET_SIZE(encoded)) < 0) {
            Py_DECREF(encoded);
            goto error;
        }
        Py_DECREF(encoded);
2772
        if(_Pickler_Write(self, "\n", 1) < 0)
2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800
            goto error;

        /* Memoize the object. */
        if (memo_put(self, obj) < 0)
            goto error;
    }

    if (0) {
  error:
        status = -1;
    }
    Py_XDECREF(module_name);
    Py_XDECREF(global_name);
    Py_XDECREF(module);

    return status;
}

static int
save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
{
    PyObject *pid = NULL;
    int status = 0;

    const char persid_op = PERSID;
    const char binpersid_op = BINPERSID;

    Py_INCREF(obj);
2801
    pid = _Pickler_FastCall(self, func, obj);
2802 2803 2804 2805 2806 2807
    if (pid == NULL)
        return -1;

    if (pid != Py_None) {
        if (self->bin) {
            if (save(self, pid, 1) < 0 ||
2808
                _Pickler_Write(self, &binpersid_op, 1) < 0)
2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822
                goto error;
        }
        else {
            PyObject *pid_str = NULL;
            char *pid_ascii_bytes;
            Py_ssize_t size;

            pid_str = PyObject_Str(pid);
            if (pid_str == NULL)
                goto error;

            /* XXX: Should it check whether the persistent id only contains
               ASCII characters? And what if the pid contains embedded
               newlines? */
2823
            pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
2824 2825 2826 2827
            Py_DECREF(pid_str);
            if (pid_ascii_bytes == NULL)
                goto error;

2828 2829 2830
            if (_Pickler_Write(self, &persid_op, 1) < 0 ||
                _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
                _Pickler_Write(self, "\n", 1) < 0)
2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853
                goto error;
        }
        status = 1;
    }

    if (0) {
  error:
        status = -1;
    }
    Py_XDECREF(pid);

    return status;
}

/* We're saving obj, and args is the 2-thru-5 tuple returned by the
 * appropriate __reduce__ method for obj.
 */
static int
save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
{
    PyObject *callable;
    PyObject *argtup;
    PyObject *state = NULL;
2854 2855
    PyObject *listitems = Py_None;
    PyObject *dictitems = Py_None;
2856
    Py_ssize_t size;
2857 2858 2859 2860 2861 2862 2863

    int use_newobj = self->proto >= 2;

    const char reduce_op = REDUCE;
    const char build_op = BUILD;
    const char newobj_op = NEWOBJ;

2864 2865 2866 2867 2868 2869 2870
    size = PyTuple_Size(args);
    if (size < 2 || size > 5) {
        PyErr_SetString(PicklingError, "tuple returned by "
                        "__reduce__ must contain 2 through 5 elements");
        return -1;
    }

2871 2872 2873 2874 2875
    if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
                           &callable, &argtup, &state, &listitems, &dictitems))
        return -1;

    if (!PyCallable_Check(callable)) {
2876 2877
        PyErr_SetString(PicklingError, "first item of the tuple "
                        "returned by __reduce__ must be callable");
2878 2879 2880
        return -1;
    }
    if (!PyTuple_Check(argtup)) {
2881 2882
        PyErr_SetString(PicklingError, "second item of the tuple "
                        "returned by __reduce__ must be a tuple");
2883 2884 2885 2886 2887
        return -1;
    }

    if (state == Py_None)
        state = NULL;
2888

2889 2890
    if (listitems == Py_None)
        listitems = NULL;
2891 2892 2893 2894 2895 2896 2897
    else if (!PyIter_Check(listitems)) {
        PyErr_Format(PicklingError, "Fourth element of tuple"
                     "returned by __reduce__ must be an iterator, not %s",
                     Py_TYPE(listitems)->tp_name);
        return -1;
    }

2898 2899
    if (dictitems == Py_None)
        dictitems = NULL;
2900 2901 2902 2903 2904 2905
    else if (!PyIter_Check(dictitems)) {
        PyErr_Format(PicklingError, "Fifth element of tuple"
                     "returned by __reduce__ must be an iterator, not %s",
                     Py_TYPE(dictitems)->tp_name);
        return -1;
    }
2906 2907 2908 2909

    /* Protocol 2 special case: if callable's name is __newobj__, use
       NEWOBJ. */
    if (use_newobj) {
2910 2911
        static PyObject *newobj_str = NULL;
        PyObject *name_str;
2912 2913 2914

        if (newobj_str == NULL) {
            newobj_str = PyUnicode_InternFromString("__newobj__");
2915
            if (newobj_str == NULL)
2916
                return -1;
2917 2918
        }

2919 2920
        name_str = PyObject_GetAttrString(callable, "__name__");
        if (name_str == NULL) {
2921 2922 2923 2924 2925 2926 2927
            if (PyErr_ExceptionMatches(PyExc_AttributeError))
                PyErr_Clear();
            else
                return -1;
            use_newobj = 0;
        }
        else {
2928 2929 2930
            use_newobj = PyUnicode_Check(name_str) && 
                PyUnicode_Compare(name_str, newobj_str) == 0;
            Py_DECREF(name_str);
2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945
        }
    }
    if (use_newobj) {
        PyObject *cls;
        PyObject *newargtup;
        PyObject *obj_class;
        int p;

        /* Sanity checks. */
        if (Py_SIZE(argtup) < 1) {
            PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
            return -1;
        }

        cls = PyTuple_GET_ITEM(argtup, 0);
2946
        if (!PyObject_HasAttrString(cls, "__new__")) {
2947
            PyErr_SetString(PicklingError, "args[0] from "
2948
                            "__newobj__ args has no __new__");
2949 2950 2951 2952
            return -1;
        }

        if (obj != NULL) {
2953 2954 2955 2956 2957 2958 2959
            obj_class = PyObject_GetAttrString(obj, "__class__");
            if (obj_class == NULL) {
                if (PyErr_ExceptionMatches(PyExc_AttributeError))
                    PyErr_Clear();
                else
                    return -1;
            }
2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008
            p = obj_class != cls;    /* true iff a problem */
            Py_DECREF(obj_class);
            if (p) {
                PyErr_SetString(PicklingError, "args[0] from "
                                "__newobj__ args has the wrong class");
                return -1;
            }
        }
        /* XXX: These calls save() are prone to infinite recursion. Imagine
           what happen if the value returned by the __reduce__() method of
           some extension type contains another object of the same type. Ouch!

           Here is a quick example, that I ran into, to illustrate what I
           mean:

             >>> import pickle, copyreg
             >>> copyreg.dispatch_table.pop(complex)
             >>> pickle.dumps(1+2j)
             Traceback (most recent call last):
               ...
             RuntimeError: maximum recursion depth exceeded

           Removing the complex class from copyreg.dispatch_table made the
           __reduce_ex__() method emit another complex object:

             >>> (1+1j).__reduce_ex__(2)
             (<function __newobj__ at 0xb7b71c3c>,
               (<class 'complex'>, (1+1j)), None, None, None)

           Thus when save() was called on newargstup (the 2nd item) recursion
           ensued. Of course, the bug was in the complex class which had a
           broken __getnewargs__() that emitted another complex object. But,
           the point, here, is it is quite easy to end up with a broken reduce
           function. */

        /* Save the class and its __new__ arguments. */
        if (save(self, cls, 0) < 0)
            return -1;

        newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
        if (newargtup == NULL)
            return -1;

        p = save(self, newargtup, 0);
        Py_DECREF(newargtup);
        if (p < 0)
            return -1;

        /* Add NEWOBJ opcode. */
3009
        if (_Pickler_Write(self, &newobj_op, 1) < 0)
3010 3011 3012 3013 3014
            return -1;
    }
    else { /* Not using NEWOBJ. */
        if (save(self, callable, 0) < 0 ||
            save(self, argtup, 0) < 0 ||
3015
            _Pickler_Write(self, &reduce_op, 1) < 0)
3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033
            return -1;
    }

    /* obj can be NULL when save_reduce() is used directly. A NULL obj means
       the caller do not want to memoize the object. Not particularly useful,
       but that is to mimic the behavior save_reduce() in pickle.py when
       obj is None. */
    if (obj && memo_put(self, obj) < 0)
        return -1;

    if (listitems && batch_list(self, listitems) < 0)
        return -1;

    if (dictitems && batch_dict(self, dictitems) < 0)
        return -1;

    if (state) {
        if (save(self, state, 0) < 0 || 
3034
            _Pickler_Write(self, &build_op, 1) < 0)
3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048
            return -1;
    }

    return 0;
}

static int
save(PicklerObject *self, PyObject *obj, int pers_save)
{
    PyTypeObject *type;
    PyObject *reduce_func = NULL;
    PyObject *reduce_value = NULL;
    int status = 0;

3049
    if (Py_EnterRecursiveCall(" while pickling an object"))
3050
        return -1;
3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065

    /* The extra pers_save argument is necessary to avoid calling save_pers()
       on its returned object. */
    if (!pers_save && self->pers_func) {
        /* save_pers() returns:
            -1   to signal an error;
             0   if it did nothing successfully;
             1   if a persistent id was saved.
         */
        if ((status = save_pers(self, obj, self->pers_func)) != 0)
            goto done;
    }

    type = Py_TYPE(obj);

3066 3067 3068 3069
    /* The old cPickle had an optimization that used switch-case statement
       dispatching on the first letter of the type name.  This has was removed
       since benchmarks shown that this optimization was actually slowing
       things down. */
3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092

    /* Atom types; these aren't memoized, so don't check the memo. */

    if (obj == Py_None) {
        status = save_none(self, obj);
        goto done;
    }
    else if (obj == Py_False || obj == Py_True) {
        status = save_bool(self, obj);
        goto done;
    }
    else if (type == &PyLong_Type) {
        status = save_long(self, obj);
        goto done;
    }
    else if (type == &PyFloat_Type) {
        status = save_float(self, obj);
        goto done;
    }

    /* Check the memo to see if it has the object. If so, generate
       a GET (or BINGET) opcode, instead of pickling the object
       once again. */
3093 3094
    if (PyMemoTable_Get(self->memo, obj)) {
        if (memo_get(self, obj) < 0)
3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156
            goto error;
        goto done;
    }

    if (type == &PyBytes_Type) {
        status = save_bytes(self, obj);
        goto done;
    }
    else if (type == &PyUnicode_Type) {
        status = save_unicode(self, obj);
        goto done;
    }
    else if (type == &PyDict_Type) {
        status = save_dict(self, obj);
        goto done;
    }
    else if (type == &PyList_Type) {
        status = save_list(self, obj);
        goto done;
    }
    else if (type == &PyTuple_Type) {
        status = save_tuple(self, obj);
        goto done;
    }
    else if (type == &PyType_Type) {
        status = save_global(self, obj, NULL);
        goto done;
    }
    else if (type == &PyFunction_Type) {
        status = save_global(self, obj, NULL);
        if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
            /* fall back to reduce */
            PyErr_Clear();
        }
        else {
            goto done;
        }
    }
    else if (type == &PyCFunction_Type) {
        status = save_global(self, obj, NULL);
        goto done;
    }
    else if (PyType_IsSubtype(type, &PyType_Type)) {
        status = save_global(self, obj, NULL);
        goto done;
    }

    /* XXX: This part needs some unit tests. */

    /* Get a reduction callable, and call it.  This may come from
     * copyreg.dispatch_table, the object's __reduce_ex__ method,
     * or the object's __reduce__ method.
     */
    reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
    if (reduce_func != NULL) {
        /* Here, the reference count of the reduce_func object returned by
           PyDict_GetItem needs to be increased to be consistent with the one
           returned by PyObject_GetAttr. This is allow us to blindly DECREF
           reduce_func at the end of the save() routine.
        */
        Py_INCREF(reduce_func);
        Py_INCREF(obj);
3157
        reduce_value = _Pickler_FastCall(self, reduce_func, obj);
3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186
    }
    else {
        static PyObject *reduce_str = NULL;
        static PyObject *reduce_ex_str = NULL;

        /* Cache the name of the reduce methods. */
        if (reduce_str == NULL) {
            reduce_str = PyUnicode_InternFromString("__reduce__");
            if (reduce_str == NULL)
                goto error;
            reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
            if (reduce_ex_str == NULL)
                goto error;
        }

        /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
           automatically defined as __reduce__. While this is convenient, this
           make it impossible to know which method was actually called. Of
           course, this is not a big deal. But still, it would be nice to let
           the user know which method was called when something go
           wrong. Incidentally, this means if __reduce_ex__ is not defined, we
           don't actually have to check for a __reduce__ method. */

        /* Check for a __reduce_ex__ method. */
        reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
        if (reduce_func != NULL) {
            PyObject *proto;
            proto = PyLong_FromLong(self->proto);
            if (proto != NULL) {
3187
                reduce_value = _Pickler_FastCall(self, reduce_func, proto);
3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228
            }
        }
        else {
            if (PyErr_ExceptionMatches(PyExc_AttributeError))
                PyErr_Clear();
            else
                goto error;
            /* Check for a __reduce__ method. */
            reduce_func = PyObject_GetAttr(obj, reduce_str);
            if (reduce_func != NULL) {
                reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
            }
            else {
                PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
                             type->tp_name, obj);
                goto error;
            }
        }
    }

    if (reduce_value == NULL)
        goto error;

    if (PyUnicode_Check(reduce_value)) {
        status = save_global(self, obj, reduce_value);
        goto done;
    }

    if (!PyTuple_Check(reduce_value)) {
        PyErr_SetString(PicklingError,
                        "__reduce__ must return a string or tuple");
        goto error;
    }

    status = save_reduce(self, reduce_value, obj);

    if (0) {
  error:
        status = -1;
    }
  done:
3229
    Py_LeaveRecursiveCall();
3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246
    Py_XDECREF(reduce_func);
    Py_XDECREF(reduce_value);

    return status;
}

static int
dump(PicklerObject *self, PyObject *obj)
{
    const char stop_op = STOP;

    if (self->proto >= 2) {
        char header[2];

        header[0] = PROTO;
        assert(self->proto >= 0 && self->proto < 256);
        header[1] = (unsigned char)self->proto;
3247
        if (_Pickler_Write(self, header, 2) < 0)
3248 3249 3250 3251
            return -1;
    }

    if (save(self, obj, 0) < 0 ||
3252
        _Pickler_Write(self, &stop_op, 1) < 0)
3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269
        return -1;

    return 0;
}

PyDoc_STRVAR(Pickler_clear_memo_doc,
"clear_memo() -> None. Clears the pickler's \"memo\"."
"\n"
"The memo is the data structure that remembers which objects the\n"
"pickler has already seen, so that shared or recursive objects are\n"
"pickled by reference and not by value.  This method is useful when\n"
"re-using picklers.");

static PyObject *
Pickler_clear_memo(PicklerObject *self)
{
    if (self->memo)
3270
        PyMemoTable_Clear(self->memo);
3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282

    Py_RETURN_NONE;
}

PyDoc_STRVAR(Pickler_dump_doc,
"dump(obj) -> None. Write a pickled representation of obj to the open file.");

static PyObject *
Pickler_dump(PicklerObject *self, PyObject *args)
{
    PyObject *obj;

3283 3284 3285 3286 3287 3288 3289 3290 3291 3292
    /* Check whether the Pickler was initialized correctly (issue3664).
       Developers often forget to call __init__() in their subclasses, which
       would trigger a segfault without this check. */
    if (self->write == NULL) {
        PyErr_Format(PicklingError, 
                     "Pickler.__init__() was not called by %s.__init__()",
                     Py_TYPE(self)->tp_name);
        return NULL;
    }

3293 3294 3295
    if (!PyArg_ParseTuple(args, "O:dump", &obj))
        return NULL;

3296 3297 3298
    if (_Pickler_ClearBuffer(self) < 0)
        return NULL;

3299 3300 3301
    if (dump(self, obj) < 0)
        return NULL;

3302 3303 3304
    if (_Pickler_FlushToFile(self) < 0)
        return NULL;

3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320
    Py_RETURN_NONE;
}

static struct PyMethodDef Pickler_methods[] = {
    {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
     Pickler_dump_doc},
    {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
     Pickler_clear_memo_doc},
    {NULL, NULL}                /* sentinel */
};

static void
Pickler_dealloc(PicklerObject *self)
{
    PyObject_GC_UnTrack(self);

3321
    Py_XDECREF(self->output_buffer);
3322 3323 3324 3325 3326
    Py_XDECREF(self->write);
    Py_XDECREF(self->pers_func);
    Py_XDECREF(self->arg);
    Py_XDECREF(self->fast_memo);

3327
    PyMemoTable_Del(self->memo);
3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344

    Py_TYPE(self)->tp_free((PyObject *)self);
}

static int
Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
{
    Py_VISIT(self->write);
    Py_VISIT(self->pers_func);
    Py_VISIT(self->arg);
    Py_VISIT(self->fast_memo);
    return 0;
}

static int
Pickler_clear(PicklerObject *self)
{
3345
    Py_CLEAR(self->output_buffer);
3346 3347 3348 3349 3350
    Py_CLEAR(self->write);
    Py_CLEAR(self->pers_func);
    Py_CLEAR(self->arg);
    Py_CLEAR(self->fast_memo);

3351 3352 3353 3354 3355
    if (self->memo != NULL) {
        PyMemoTable *memo = self->memo;
        self->memo = NULL;
        PyMemoTable_Del(memo);
    }
3356 3357 3358
    return 0;
}

3359

3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377
PyDoc_STRVAR(Pickler_doc,
"Pickler(file, protocol=None)"
"\n"
"This takes a binary file for writing a pickle data stream.\n"
"\n"
"The optional protocol argument tells the pickler to use the\n"
"given protocol; supported protocols are 0, 1, 2, 3.  The default\n"
"protocol is 3; a backward-incompatible protocol designed for\n"
"Python 3.0.\n"
"\n"
"Specifying a negative protocol version selects the highest\n"
"protocol version supported.  The higher the protocol used, the\n"
"more recent the version of Python needed to read the pickle\n"
"produced.\n"
"\n"
"The file argument must have a write() method that accepts a single\n"
"bytes argument. It can thus be a file object opened for binary\n"
"writing, a io.BytesIO instance, or any other custom object that\n"
3378 3379 3380 3381 3382
"meets this interface.\n"
"\n"
"If fix_imports is True and protocol is less than 3, pickle will try to\n"
"map the new Python 3.x names to the old module names used in Python\n"
"2.x, so that the pickle data stream is readable with Python 2.x.\n");
3383 3384 3385 3386

static int
Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
{
3387
    static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
3388 3389
    PyObject *file;
    PyObject *proto_obj = NULL;
3390
    PyObject *fix_imports = Py_True;
3391

3392
    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
3393
                                     kwlist, &file, &proto_obj, &fix_imports))
3394 3395 3396 3397 3398 3399
        return -1;

    /* In case of multiple __init__() calls, clear previous content. */
    if (self->write != NULL)
        (void)Pickler_clear(self);

3400 3401
    if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
        return -1;
3402

3403
    if (_Pickler_SetOutputStream(self, file) < 0)
3404
        return -1;
3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418

    /* memo and output_buffer may have already been created in _Pickler_New */
    if (self->memo == NULL) {
        self->memo = PyMemoTable_New();
        if (self->memo == NULL)
            return -1;
    }
    self->output_len = 0;
    if (self->output_buffer == NULL) {
        self->max_output_len = WRITE_BUF_SIZE;
        self->output_buffer = PyBytes_FromStringAndSize(NULL,
                                                        self->max_output_len);
        if (self->output_buffer == NULL)
            return -1;
3419 3420
    }

3421 3422 3423 3424
    self->arg = NULL;
    self->fast = 0;
    self->fast_nesting = 0;
    self->fast_memo = NULL;
3425 3426 3427 3428 3429 3430 3431
    self->pers_func = NULL;
    if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
        self->pers_func = PyObject_GetAttrString((PyObject *)self,
                                                 "persistent_id");
        if (self->pers_func == NULL)
            return -1;
    }
3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544
    return 0;
}

/* Define a proxy object for the Pickler's internal memo object. This is to
 * avoid breaking code like:
 *  pickler.memo.clear()
 * and
 *  pickler.memo = saved_memo
 * Is this a good idea? Not really, but we don't want to break code that uses
 * it. Note that we don't implement the entire mapping API here. This is
 * intentional, as these should be treated as black-box implementation details.
 */

typedef struct {
    PyObject_HEAD
    PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
} PicklerMemoProxyObject;

PyDoc_STRVAR(pmp_clear_doc,
"memo.clear() -> None.  Remove all items from memo.");

static PyObject *
pmp_clear(PicklerMemoProxyObject *self)
{
    if (self->pickler->memo)
        PyMemoTable_Clear(self->pickler->memo);
    Py_RETURN_NONE;
}

PyDoc_STRVAR(pmp_copy_doc,
"memo.copy() -> new_memo.  Copy the memo to a new object.");

static PyObject *
pmp_copy(PicklerMemoProxyObject *self)
{
    Py_ssize_t i;
    PyMemoTable *memo;
    PyObject *new_memo = PyDict_New();
    if (new_memo == NULL)
        return NULL;

    memo = self->pickler->memo;
    for (i = 0; i < memo->mt_allocated; ++i) {
        PyMemoEntry entry = memo->mt_table[i];
        if (entry.me_key != NULL) {
            int status;
            PyObject *key, *value;

            key = PyLong_FromVoidPtr(entry.me_key);
            value = Py_BuildValue("lO", entry.me_value, entry.me_key);

            if (key == NULL || value == NULL) {
                Py_XDECREF(key);
                Py_XDECREF(value);
                goto error;
            }
            status = PyDict_SetItem(new_memo, key, value);
            Py_DECREF(key);
            Py_DECREF(value);
            if (status < 0)
                goto error;
        }
    }
    return new_memo;

  error:
    Py_XDECREF(new_memo);
    return NULL;
}

PyDoc_STRVAR(pmp_reduce_doc,
"memo.__reduce__(). Pickling support.");

static PyObject *
pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
{
    PyObject *reduce_value, *dict_args;
    PyObject *contents = pmp_copy(self);
    if (contents == NULL)
        return NULL;

    reduce_value = PyTuple_New(2);
    if (reduce_value == NULL) {
        Py_DECREF(contents);
        return NULL;
    }
    dict_args = PyTuple_New(1);
    if (dict_args == NULL) {
        Py_DECREF(contents);
        Py_DECREF(reduce_value);
        return NULL;
    }
    PyTuple_SET_ITEM(dict_args, 0, contents);
    Py_INCREF((PyObject *)&PyDict_Type);
    PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
    PyTuple_SET_ITEM(reduce_value, 1, dict_args);
    return reduce_value;
}

static PyMethodDef picklerproxy_methods[] = {
    {"clear",      (PyCFunction)pmp_clear,  METH_NOARGS,  pmp_clear_doc},
    {"copy",       (PyCFunction)pmp_copy,   METH_NOARGS,  pmp_copy_doc},
    {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
    {NULL, NULL} /* sentinel */
};

static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
{
    PyObject_GC_UnTrack(self);
    Py_XDECREF(self->pickler);
    PyObject_GC_Del((PyObject *)self);
}
3545

3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557
static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
                          visitproc visit, void *arg)
{
    Py_VISIT(self->pickler);
    return 0;
}

static int
PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
{
    Py_CLEAR(self->pickler);
3558 3559 3560
    return 0;
}

3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574
static PyTypeObject PicklerMemoProxyType = {
    PyVarObject_HEAD_INIT(NULL, 0)
    "_pickle.PicklerMemoProxy",                 /*tp_name*/
    sizeof(PicklerMemoProxyObject),             /*tp_basicsize*/
    0,
    (destructor)PicklerMemoProxy_dealloc,       /* tp_dealloc */
    0,                                          /* tp_print */
    0,                                          /* tp_getattr */
    0,                                          /* tp_setattr */
    0,                                          /* tp_compare */
    0,                                          /* tp_repr */
    0,                                          /* tp_as_number */
    0,                                          /* tp_as_sequence */
    0,                                          /* tp_as_mapping */
3575
    PyObject_HashNotImplemented,                /* tp_hash */
3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607
    0,                                          /* tp_call */
    0,                                          /* tp_str */
    PyObject_GenericGetAttr,                    /* tp_getattro */
    PyObject_GenericSetAttr,                    /* tp_setattro */
    0,                                          /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
    0,                                          /* tp_doc */
    (traverseproc)PicklerMemoProxy_traverse,    /* tp_traverse */
    (inquiry)PicklerMemoProxy_clear,            /* tp_clear */
    0,                                          /* tp_richcompare */
    0,                                          /* tp_weaklistoffset */
    0,                                          /* tp_iter */
    0,                                          /* tp_iternext */
    picklerproxy_methods,                       /* tp_methods */
};

static PyObject *
PicklerMemoProxy_New(PicklerObject *pickler)
{
    PicklerMemoProxyObject *self;

    self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
    if (self == NULL)
        return NULL;
    Py_INCREF(pickler);
    self->pickler = pickler;
    PyObject_GC_Track(self);
    return (PyObject *)self;
}

/*****************************************************************************/

3608 3609 3610
static PyObject *
Pickler_get_memo(PicklerObject *self)
{
3611
    return PicklerMemoProxy_New(self);
3612 3613 3614
}

static int
3615
Pickler_set_memo(PicklerObject *self, PyObject *obj)
3616
{
3617
    PyMemoTable *new_memo = NULL;
3618

3619
    if (obj == NULL) {
3620 3621 3622 3623
        PyErr_SetString(PyExc_TypeError,
                        "attribute deletion is not supported");
        return -1;
    }
3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661

    if (Py_TYPE(obj) == &PicklerMemoProxyType) {
        PicklerObject *pickler =
            ((PicklerMemoProxyObject *)obj)->pickler;

        new_memo = PyMemoTable_Copy(pickler->memo);
        if (new_memo == NULL)
            return -1;
    }
    else if (PyDict_Check(obj)) {
        Py_ssize_t i = 0;
        PyObject *key, *value;

        new_memo = PyMemoTable_New();
        if (new_memo == NULL)
            return -1;

        while (PyDict_Next(obj, &i, &key, &value)) {
            long memo_id;
            PyObject *memo_obj;

            if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
                PyErr_SetString(PyExc_TypeError,
                                "'memo' values must be 2-item tuples");
                goto error;
            }
            memo_id = PyLong_AsLong(PyTuple_GET_ITEM(value, 0));
            if (memo_id == -1 && PyErr_Occurred())
                goto error;
            memo_obj = PyTuple_GET_ITEM(value, 1);
            if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
                goto error;
        }
    }
    else {
        PyErr_Format(PyExc_TypeError,
                     "'memo' attribute must be an PicklerMemoProxy object"
                     "or dict, not %.200s", Py_TYPE(obj)->tp_name);
3662 3663 3664
        return -1;
    }

3665 3666
    PyMemoTable_Del(self->memo);
    self->memo = new_memo;
3667 3668

    return 0;
3669 3670 3671 3672 3673

  error:
    if (new_memo)
        PyMemoTable_Del(new_memo);
    return -1;
3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732
}

static PyObject *
Pickler_get_persid(PicklerObject *self)
{
    if (self->pers_func == NULL)
        PyErr_SetString(PyExc_AttributeError, "persistent_id");
    else
        Py_INCREF(self->pers_func);
    return self->pers_func;
}

static int
Pickler_set_persid(PicklerObject *self, PyObject *value)
{
    PyObject *tmp;

    if (value == NULL) {
        PyErr_SetString(PyExc_TypeError,
                        "attribute deletion is not supported");
        return -1;
    }
    if (!PyCallable_Check(value)) {
        PyErr_SetString(PyExc_TypeError,
                        "persistent_id must be a callable taking one argument");
        return -1;
    }

    tmp = self->pers_func;
    Py_INCREF(value);
    self->pers_func = value;
    Py_XDECREF(tmp);      /* self->pers_func can be NULL, so be careful. */

    return 0;
}

static PyMemberDef Pickler_members[] = {
    {"bin", T_INT, offsetof(PicklerObject, bin)},
    {"fast", T_INT, offsetof(PicklerObject, fast)},
    {NULL}
};

static PyGetSetDef Pickler_getsets[] = {
    {"memo",          (getter)Pickler_get_memo,
                      (setter)Pickler_set_memo},
    {"persistent_id", (getter)Pickler_get_persid,
                      (setter)Pickler_set_persid},
    {NULL}
};

static PyTypeObject Pickler_Type = {
    PyVarObject_HEAD_INIT(NULL, 0)
    "_pickle.Pickler"  ,                /*tp_name*/
    sizeof(PicklerObject),              /*tp_basicsize*/
    0,                                  /*tp_itemsize*/
    (destructor)Pickler_dealloc,        /*tp_dealloc*/
    0,                                  /*tp_print*/
    0,                                  /*tp_getattr*/
    0,                                  /*tp_setattr*/
3733
    0,                                  /*tp_reserved*/
3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813
    0,                                  /*tp_repr*/
    0,                                  /*tp_as_number*/
    0,                                  /*tp_as_sequence*/
    0,                                  /*tp_as_mapping*/
    0,                                  /*tp_hash*/
    0,                                  /*tp_call*/
    0,                                  /*tp_str*/
    0,                                  /*tp_getattro*/
    0,                                  /*tp_setattro*/
    0,                                  /*tp_as_buffer*/
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
    Pickler_doc,                        /*tp_doc*/
    (traverseproc)Pickler_traverse,     /*tp_traverse*/
    (inquiry)Pickler_clear,             /*tp_clear*/
    0,                                  /*tp_richcompare*/
    0,                                  /*tp_weaklistoffset*/
    0,                                  /*tp_iter*/
    0,                                  /*tp_iternext*/
    Pickler_methods,                    /*tp_methods*/
    Pickler_members,                    /*tp_members*/
    Pickler_getsets,                    /*tp_getset*/
    0,                                  /*tp_base*/
    0,                                  /*tp_dict*/
    0,                                  /*tp_descr_get*/
    0,                                  /*tp_descr_set*/
    0,                                  /*tp_dictoffset*/
    (initproc)Pickler_init,             /*tp_init*/
    PyType_GenericAlloc,                /*tp_alloc*/
    PyType_GenericNew,                  /*tp_new*/
    PyObject_GC_Del,                    /*tp_free*/
    0,                                  /*tp_is_gc*/
};

/* Temporary helper for calling self.find_class(). 

   XXX: It would be nice to able to avoid Python function call overhead, by
   using directly the C version of find_class(), when find_class() is not
   overridden by a subclass. Although, this could become rather hackish. A
   simpler optimization would be to call the C function when self is not a
   subclass instance. */
static PyObject *
find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
{
    return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
                               module_name, global_name);
}

static int
marker(UnpicklerObject *self)
{
    if (self->num_marks < 1) {
        PyErr_SetString(UnpicklingError, "could not find MARK");
        return -1;
    }

    return self->marks[--self->num_marks];
}

static int
load_none(UnpicklerObject *self)
{
    PDATA_APPEND(self->stack, Py_None, -1);
    return 0;
}

static int
bad_readline(void)
{
    PyErr_SetString(UnpicklingError, "pickle data was truncated");
    return -1;
}

static int
load_int(UnpicklerObject *self)
{
    PyObject *value;
    char *endptr, *s;
    Py_ssize_t len;
    long x;

3814
    if ((len = _Unpickler_Readline(self, &s)) < 0)
3815 3816 3817 3818 3819
        return -1;
    if (len < 2)
        return bad_readline();

    errno = 0;
3820 3821
    /* XXX: Should the base argument of strtol() be explicitly set to 10? 
       XXX(avassalotti): Should this uses PyOS_strtol()? */
3822 3823
    x = strtol(s, &endptr, 0);

3824
    if (errno || (*endptr != '\n' && *endptr != '\0')) {
3825 3826 3827 3828
        /* Hm, maybe we've got something long.  Let's try reading
         * it as a Python long object. */
        errno = 0;
        /* XXX: Same thing about the base here. */
3829
        value = PyLong_FromString(s, NULL, 0);
3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905
        if (value == NULL) {
            PyErr_SetString(PyExc_ValueError,
                            "could not convert string to int");
            return -1;
        }
    }
    else {
        if (len == 3 && (x == 0 || x == 1)) {
            if ((value = PyBool_FromLong(x)) == NULL)
                return -1;
        }
        else {
            if ((value = PyLong_FromLong(x)) == NULL)
                return -1;
        }
    }

    PDATA_PUSH(self->stack, value, -1);
    return 0;
}

static int
load_bool(UnpicklerObject *self, PyObject *boolean)
{
    assert(boolean == Py_True || boolean == Py_False);
    PDATA_APPEND(self->stack, boolean, -1);
    return 0;
}

/* s contains x bytes of a little-endian integer.  Return its value as a
 * C int.  Obscure:  when x is 1 or 2, this is an unsigned little-endian
 * int, but when x is 4 it's a signed one.  This is an historical source
 * of x-platform bugs.
 */
static long
calc_binint(char *bytes, int size)
{
    unsigned char *s = (unsigned char *)bytes;
    int i = size;
    long x = 0;

    for (i = 0; i < size; i++) {
        x |= (long)s[i] << (i * 8);
    }

    /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
     * is signed, so on a box with longs bigger than 4 bytes we need
     * to extend a BININT's sign bit to the full width.
     */
    if (SIZEOF_LONG > 4 && size == 4) {
        x |= -(x & (1L << 31));
    }

    return x;
}

static int
load_binintx(UnpicklerObject *self, char *s, int size)
{
    PyObject *value;
    long x;

    x = calc_binint(s, size);

    if ((value = PyLong_FromLong(x)) == NULL)
        return -1;

    PDATA_PUSH(self->stack, value, -1);
    return 0;
}

static int
load_binint(UnpicklerObject *self)
{
    char *s;

3906
    if (_Unpickler_Read(self, &s, 4) < 0)
3907 3908 3909 3910 3911 3912 3913 3914 3915 3916
        return -1;

    return load_binintx(self, s, 4);
}

static int
load_binint1(UnpicklerObject *self)
{
    char *s;

3917
    if (_Unpickler_Read(self, &s, 1) < 0)
3918 3919 3920 3921 3922 3923 3924 3925 3926 3927
        return -1;

    return load_binintx(self, s, 1);
}

static int
load_binint2(UnpicklerObject *self)
{
    char *s;

3928
    if (_Unpickler_Read(self, &s, 2) < 0)
3929 3930 3931 3932 3933 3934 3935 3936 3937
        return -1;

    return load_binintx(self, s, 2);
}

static int
load_long(UnpicklerObject *self)
{
    PyObject *value;
3938
    char *s;
3939 3940
    Py_ssize_t len;

3941
    if ((len = _Unpickler_Readline(self, &s)) < 0)
3942 3943 3944 3945
        return -1;
    if (len < 2)
        return bad_readline();

3946 3947 3948 3949
    /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
       the 'L' before calling PyLong_FromString.  In order to maintain
       compatibility with Python 3.0.0, we don't actually *require*
       the 'L' to be present. */
3950
    if (s[len-2] == 'L')
3951
        s[len-2] = '\0';
3952 3953
    /* XXX: Should the base argument explicitly set to 10? */
    value = PyLong_FromString(s, NULL, 0);
3954
    if (value == NULL)
3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971
        return -1;

    PDATA_PUSH(self->stack, value, -1);
    return 0;
}

/* 'size' bytes contain the # of bytes of little-endian 256's-complement
 * data following.
 */
static int
load_counted_long(UnpicklerObject *self, int size)
{
    PyObject *value;
    char *nbytes;
    char *pdata;

    assert(size == 1 || size == 4);
3972
    if (_Unpickler_Read(self, &nbytes, size) < 0)
3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986
        return -1;

    size = calc_binint(nbytes, size);
    if (size < 0) {
        /* Corrupt or hostile pickle -- we never write one like this */
        PyErr_SetString(UnpicklingError,
                        "LONG pickle has negative byte count");
        return -1;
    }

    if (size == 0)
        value = PyLong_FromLong(0L);
    else {
        /* Read the raw little-endian bytes and convert. */
3987
        if (_Unpickler_Read(self, &pdata, size) < 0)
3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005
            return -1;
        value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
                                      1 /* little endian */ , 1 /* signed */ );
    }
    if (value == NULL)
        return -1;
    PDATA_PUSH(self->stack, value, -1);
    return 0;
}

static int
load_float(UnpicklerObject *self)
{
    PyObject *value;
    char *endptr, *s;
    Py_ssize_t len;
    double d;

4006
    if ((len = _Unpickler_Readline(self, &s)) < 0)
4007 4008 4009 4010 4011
        return -1;
    if (len < 2)
        return bad_readline();

    errno = 0;
4012 4013 4014
    d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
    if (d == -1.0 && PyErr_Occurred())
        return -1;
4015
    if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
4016 4017 4018
        PyErr_SetString(PyExc_ValueError, "could not convert string to float");
        return -1;
    }
4019 4020
    value = PyFloat_FromDouble(d);
    if (value == NULL)
4021 4022 4023 4024
        return -1;

    PDATA_PUSH(self->stack, value, -1);
    return 0;
4025
}
4026 4027 4028 4029 4030 4031 4032 4033

static int
load_binfloat(UnpicklerObject *self)
{
    PyObject *value;
    double x;
    char *s;

4034
    if (_Unpickler_Read(self, &s, 8) < 0)
4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055
        return -1;

    x = _PyFloat_Unpack8((unsigned char *)s, 0);
    if (x == -1.0 && PyErr_Occurred())
        return -1;

    if ((value = PyFloat_FromDouble(x)) == NULL)
        return -1;

    PDATA_PUSH(self->stack, value, -1);
    return 0;
}

static int
load_string(UnpicklerObject *self)
{
    PyObject *bytes;
    PyObject *str = NULL;
    Py_ssize_t len;
    char *s, *p;

4056
    if ((len = _Unpickler_Readline(self, &s)) < 0)
4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105
        return -1;
    if (len < 3)
        return bad_readline();
    if ((s = strdup(s)) == NULL) {
        PyErr_NoMemory();
        return -1;
    }

    /* Strip outermost quotes */
    while (s[len - 1] <= ' ')
        len--;
    if (s[0] == '"' && s[len - 1] == '"') {
        s[len - 1] = '\0';
        p = s + 1;
        len -= 2;
    }
    else if (s[0] == '\'' && s[len - 1] == '\'') {
        s[len - 1] = '\0';
        p = s + 1;
        len -= 2;
    }
    else {
        free(s);
        PyErr_SetString(PyExc_ValueError, "insecure string pickle");
        return -1;
    }

    /* Use the PyBytes API to decode the string, since that is what is used
       to encode, and then coerce the result to Unicode. */
    bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
    free(s);
    if (bytes == NULL)
        return -1;
    str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
    Py_DECREF(bytes);
    if (str == NULL)
        return -1;

    PDATA_PUSH(self->stack, str, -1);
    return 0;
}

static int
load_binbytes(UnpicklerObject *self)
{
    PyObject *bytes;
    long x;
    char *s;

4106
    if (_Unpickler_Read(self, &s, 4) < 0)
4107 4108 4109 4110 4111 4112 4113 4114 4115
        return -1;

    x = calc_binint(s, 4);
    if (x < 0) {
        PyErr_SetString(UnpicklingError, 
                        "BINBYTES pickle has negative byte count");
        return -1;
    }

4116
    if (_Unpickler_Read(self, &s, x) < 0)
4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132
        return -1;
    bytes = PyBytes_FromStringAndSize(s, x);
    if (bytes == NULL)
        return -1;

    PDATA_PUSH(self->stack, bytes, -1);
    return 0;
}

static int
load_short_binbytes(UnpicklerObject *self)
{
    PyObject *bytes;
    unsigned char x;
    char *s;

4133
    if (_Unpickler_Read(self, &s, 1) < 0)
4134 4135 4136 4137
        return -1;

    x = (unsigned char)s[0];

4138
    if (_Unpickler_Read(self, &s, x) < 0)
4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155
        return -1;

    bytes = PyBytes_FromStringAndSize(s, x);
    if (bytes == NULL)
        return -1;

    PDATA_PUSH(self->stack, bytes, -1);
    return 0;
}

static int
load_binstring(UnpicklerObject *self)
{
    PyObject *str;
    long x;
    char *s;

4156
    if (_Unpickler_Read(self, &s, 4) < 0)
4157 4158 4159 4160 4161 4162 4163 4164 4165
        return -1;

    x = calc_binint(s, 4);
    if (x < 0) {
        PyErr_SetString(UnpicklingError, 
                        "BINSTRING pickle has negative byte count");
        return -1;
    }

4166
    if (_Unpickler_Read(self, &s, x) < 0)
4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184
        return -1;

    /* Convert Python 2.x strings to unicode. */
    str = PyUnicode_Decode(s, x, self->encoding, self->errors);
    if (str == NULL)
        return -1;

    PDATA_PUSH(self->stack, str, -1);
    return 0;
}

static int
load_short_binstring(UnpicklerObject *self)
{
    PyObject *str;
    unsigned char x;
    char *s;

4185
    if (_Unpickler_Read(self, &s, 1) < 0)
4186 4187 4188 4189
        return -1;

    x = (unsigned char)s[0];

4190
    if (_Unpickler_Read(self, &s, x) < 0)
4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208
        return -1;

    /* Convert Python 2.x strings to unicode. */
    str = PyUnicode_Decode(s, x, self->encoding, self->errors);
    if (str == NULL)
        return -1;

    PDATA_PUSH(self->stack, str, -1);
    return 0;
}

static int
load_unicode(UnpicklerObject *self)
{
    PyObject *str;
    Py_ssize_t len;
    char *s;

4209
    if ((len = _Unpickler_Readline(self, &s)) < 0)
4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228
        return -1;
    if (len < 1)
        return bad_readline();

    str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
    if (str == NULL)
        return -1;

    PDATA_PUSH(self->stack, str, -1);
    return 0;
}

static int
load_binunicode(UnpicklerObject *self)
{
    PyObject *str;
    long size;
    char *s;

4229
    if (_Unpickler_Read(self, &s, 4) < 0)
4230 4231 4232 4233 4234 4235 4236 4237 4238
        return -1;

    size = calc_binint(s, 4);
    if (size < 0) {
        PyErr_SetString(UnpicklingError, 
                        "BINUNICODE pickle has negative byte count");
        return -1;
    }

4239
    if (_Unpickler_Read(self, &s, size) < 0)
4240 4241
        return -1;

4242
    str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332
    if (str == NULL)
        return -1;

    PDATA_PUSH(self->stack, str, -1);
    return 0;
}

static int
load_tuple(UnpicklerObject *self)
{
    PyObject *tuple;
    int i;

    if ((i = marker(self)) < 0)
        return -1;

    tuple = Pdata_poptuple(self->stack, i);
    if (tuple == NULL)
        return -1;
    PDATA_PUSH(self->stack, tuple, -1);
    return 0;
}

static int
load_counted_tuple(UnpicklerObject *self, int len)
{
    PyObject *tuple;

    tuple = PyTuple_New(len);
    if (tuple == NULL)
        return -1;

    while (--len >= 0) {
        PyObject *item;

        PDATA_POP(self->stack, item);
        if (item == NULL)
            return -1;
        PyTuple_SET_ITEM(tuple, len, item);
    }
    PDATA_PUSH(self->stack, tuple, -1);
    return 0;
}

static int
load_empty_list(UnpicklerObject *self)
{
    PyObject *list;

    if ((list = PyList_New(0)) == NULL)
        return -1;
    PDATA_PUSH(self->stack, list, -1);
    return 0;
}

static int
load_empty_dict(UnpicklerObject *self)
{
    PyObject *dict;

    if ((dict = PyDict_New()) == NULL)
        return -1;
    PDATA_PUSH(self->stack, dict, -1);
    return 0;
}

static int
load_list(UnpicklerObject *self)
{
    PyObject *list;
    int i;

    if ((i = marker(self)) < 0)
        return -1;

    list = Pdata_poplist(self->stack, i);
    if (list == NULL)
        return -1;
    PDATA_PUSH(self->stack, list, -1);
    return 0;
}

static int
load_dict(UnpicklerObject *self)
{
    PyObject *dict, *key, *value;
    int i, j, k;

    if ((i = marker(self)) < 0)
        return -1;
4333
    j = Py_SIZE(self->stack);
4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353

    if ((dict = PyDict_New()) == NULL)
        return -1;

    for (k = i + 1; k < j; k += 2) {
        key = self->stack->data[k - 1];
        value = self->stack->data[k];
        if (PyDict_SetItem(dict, key, value) < 0) {
            Py_DECREF(dict);
            return -1;
        }
    }
    Pdata_clear(self->stack, i);
    PDATA_PUSH(self->stack, dict, -1);
    return 0;
}

static PyObject *
instantiate(PyObject *cls, PyObject *args)
{
4354 4355 4356 4357 4358 4359 4360 4361
    PyObject *result = NULL;
    /* Caller must assure args are a tuple.  Normally, args come from
       Pdata_poptuple which packs objects from the top of the stack
       into a newly created tuple. */
    assert(PyTuple_Check(args));
    if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
        PyObject_HasAttrString(cls, "__getinitargs__")) {
        result = PyObject_CallObject(cls, args);
4362
    }
4363 4364 4365 4366
    else {
        result = PyObject_CallMethod(cls, "__new__", "O", cls);
    }
    return result;
4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408
}

static int
load_obj(UnpicklerObject *self)
{
    PyObject *cls, *args, *obj = NULL;
    int i;

    if ((i = marker(self)) < 0)
        return -1;

    args = Pdata_poptuple(self->stack, i + 1);
    if (args == NULL)
        return -1;

    PDATA_POP(self->stack, cls);
    if (cls) {
        obj = instantiate(cls, args);
        Py_DECREF(cls);
    }
    Py_DECREF(args);
    if (obj == NULL)
        return -1;

    PDATA_PUSH(self->stack, obj, -1);
    return 0;
}

static int
load_inst(UnpicklerObject *self)
{
    PyObject *cls = NULL;
    PyObject *args = NULL;
    PyObject *obj = NULL;
    PyObject *module_name;
    PyObject *class_name;
    Py_ssize_t len;
    int i;
    char *s;

    if ((i = marker(self)) < 0)
        return -1;
4409
    if ((len = _Unpickler_Readline(self, &s)) < 0)
4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420
        return -1;
    if (len < 2)
        return bad_readline();

    /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
       identifiers are permitted in Python 3.0, since the INST opcode is only
       supported by older protocols on Python 2.x. */
    module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
    if (module_name == NULL)
        return -1;

4421
    if ((len = _Unpickler_Readline(self, &s)) >= 0) {
4422 4423 4424
        if (len < 2)
            return bad_readline();
        class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4425
        if (class_name != NULL) {
4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506
            cls = find_class(self, module_name, class_name);
            Py_DECREF(class_name);
        }
    }
    Py_DECREF(module_name);

    if (cls == NULL)
        return -1;

    if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
        obj = instantiate(cls, args);
        Py_DECREF(args);
    }
    Py_DECREF(cls);

    if (obj == NULL)
        return -1;

    PDATA_PUSH(self->stack, obj, -1);
    return 0;
}

static int
load_newobj(UnpicklerObject *self)
{
    PyObject *args = NULL;
    PyObject *clsraw = NULL;
    PyTypeObject *cls;          /* clsraw cast to its true type */
    PyObject *obj;

    /* Stack is ... cls argtuple, and we want to call
     * cls.__new__(cls, *argtuple).
     */
    PDATA_POP(self->stack, args);
    if (args == NULL)
        goto error;
    if (!PyTuple_Check(args)) {
        PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
        goto error;
    }

    PDATA_POP(self->stack, clsraw);
    cls = (PyTypeObject *)clsraw;
    if (cls == NULL)
        goto error;
    if (!PyType_Check(cls)) {
        PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
                        "isn't a type object");
        goto error;
    }
    if (cls->tp_new == NULL) {
        PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
                        "has NULL tp_new");
        goto error;
    }

    /* Call __new__. */
    obj = cls->tp_new(cls, args, NULL);
    if (obj == NULL)
        goto error;

    Py_DECREF(args);
    Py_DECREF(clsraw);
    PDATA_PUSH(self->stack, obj, -1);
    return 0;

  error:
    Py_XDECREF(args);
    Py_XDECREF(clsraw);
    return -1;
}

static int
load_global(UnpicklerObject *self)
{
    PyObject *global = NULL;
    PyObject *module_name;
    PyObject *global_name;
    Py_ssize_t len;
    char *s;

4507
    if ((len = _Unpickler_Readline(self, &s)) < 0)
4508 4509 4510 4511 4512 4513 4514
        return -1;
    if (len < 2)
        return bad_readline();
    module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
    if (!module_name)
        return -1;

4515
    if ((len = _Unpickler_Readline(self, &s)) >= 0) {
4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541
        if (len < 2) {
            Py_DECREF(module_name);
            return bad_readline();
        }
        global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
        if (global_name) {
            global = find_class(self, module_name, global_name);
            Py_DECREF(global_name);
        }
    }
    Py_DECREF(module_name);

    if (global == NULL)
        return -1;
    PDATA_PUSH(self->stack, global, -1);
    return 0;
}

static int
load_persid(UnpicklerObject *self)
{
    PyObject *pid;
    Py_ssize_t len;
    char *s;

    if (self->pers_func) {
4542
        if ((len = _Unpickler_Readline(self, &s)) < 0)
4543 4544 4545 4546 4547 4548 4549 4550
            return -1;
        if (len < 2)
            return bad_readline();

        pid = PyBytes_FromStringAndSize(s, len - 1);
        if (pid == NULL)
            return -1;

4551
        /* Ugh... this does not leak since _Unpickler_FastCall() steals the
4552
           reference to pid first. */
4553
        pid = _Unpickler_FastCall(self, self->pers_func, pid);
4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577
        if (pid == NULL)
            return -1;

        PDATA_PUSH(self->stack, pid, -1);
        return 0;
    }
    else {
        PyErr_SetString(UnpicklingError,
                        "A load persistent id instruction was encountered,\n"
                        "but no persistent_load function was specified.");
        return -1;
    }
}

static int
load_binpersid(UnpicklerObject *self)
{
    PyObject *pid;

    if (self->pers_func) {
        PDATA_POP(self->stack, pid);
        if (pid == NULL)
            return -1;

4578
        /* Ugh... this does not leak since _Unpickler_FastCall() steals the
4579
           reference to pid first. */
4580
        pid = _Unpickler_FastCall(self, self->pers_func, pid);
4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597
        if (pid == NULL)
            return -1;

        PDATA_PUSH(self->stack, pid, -1);
        return 0;
    }
    else {
        PyErr_SetString(UnpicklingError,
                        "A load persistent id instruction was encountered,\n"
                        "but no persistent_load function was specified.");
        return -1;
    }
}

static int
load_pop(UnpicklerObject *self)
{
4598
    int len = Py_SIZE(self->stack);
4599 4600 4601 4602

    /* Note that we split the (pickle.py) stack into two stacks,
     * an object stack and a mark stack. We have to be clever and
     * pop the right one. We do this by looking at the top of the
4603 4604 4605
     * mark stack first, and only signalling a stack underflow if
     * the object stack is empty and the mark stack doesn't match
     * our expectations.
4606
     */
4607
    if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
4608
        self->num_marks--;
4609
    } else if (len > 0) {
4610 4611
        len--;
        Py_DECREF(self->stack->data[len]);
4612
        Py_SIZE(self->stack) = len;
4613 4614
    } else {
        return stack_underflow();
4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637
    }
    return 0;
}

static int
load_pop_mark(UnpicklerObject *self)
{
    int i;

    if ((i = marker(self)) < 0)
        return -1;

    Pdata_clear(self->stack, i);

    return 0;
}

static int
load_dup(UnpicklerObject *self)
{
    PyObject *last;
    int len;

4638
    if ((len = Py_SIZE(self->stack)) <= 0)
4639 4640 4641 4642 4643 4644 4645 4646 4647 4648
        return stack_underflow();
    last = self->stack->data[len - 1];
    PDATA_APPEND(self->stack, last, -1);
    return 0;
}

static int
load_get(UnpicklerObject *self)
{
    PyObject *key, *value;
4649
    Py_ssize_t idx;
4650 4651 4652
    Py_ssize_t len;
    char *s;

4653
    if ((len = _Unpickler_Readline(self, &s)) < 0)
4654 4655 4656 4657 4658 4659 4660
        return -1;
    if (len < 2)
        return bad_readline();

    key = PyLong_FromString(s, NULL, 10);
    if (key == NULL)
        return -1;
4661 4662 4663 4664 4665
    idx = PyLong_AsSsize_t(key);
    if (idx == -1 && PyErr_Occurred()) {
        Py_DECREF(key);
        return -1;
    }
4666

4667
    value = _Unpickler_MemoGet(self, idx);
4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682
    if (value == NULL) {
        if (!PyErr_Occurred())
            PyErr_SetObject(PyExc_KeyError, key);
        Py_DECREF(key);
        return -1;
    }
    Py_DECREF(key);

    PDATA_APPEND(self->stack, value, -1);
    return 0;
}

static int
load_binget(UnpicklerObject *self)
{
4683 4684
    PyObject *value;
    Py_ssize_t idx;
4685 4686
    char *s;

4687
    if (_Unpickler_Read(self, &s, 1) < 0)
4688 4689
        return -1;

4690
    idx = Py_CHARMASK(s[0]);
4691

4692
    value = _Unpickler_MemoGet(self, idx);
4693
    if (value == NULL) {
4694
        PyObject *key = PyLong_FromSsize_t(idx);
4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707
        if (!PyErr_Occurred())
            PyErr_SetObject(PyExc_KeyError, key);
        Py_DECREF(key);
        return -1;
    }

    PDATA_APPEND(self->stack, value, -1);
    return 0;
}

static int
load_long_binget(UnpicklerObject *self)
{
4708 4709
    PyObject *value;
    Py_ssize_t idx;
4710 4711
    char *s;

4712
    if (_Unpickler_Read(self, &s, 4) < 0)
4713 4714
        return -1;

4715 4716 4717 4718
    idx =  (long)Py_CHARMASK(s[0]);
    idx |= (long)Py_CHARMASK(s[1]) << 8;
    idx |= (long)Py_CHARMASK(s[2]) << 16;
    idx |= (long)Py_CHARMASK(s[3]) << 24;
4719

4720
    value = _Unpickler_MemoGet(self, idx);
4721
    if (value == NULL) {
4722
        PyObject *key = PyLong_FromSsize_t(idx);
4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746
        if (!PyErr_Occurred())
            PyErr_SetObject(PyExc_KeyError, key);
        Py_DECREF(key);
        return -1;
    }

    PDATA_APPEND(self->stack, value, -1);
    return 0;
}

/* Push an object from the extension registry (EXT[124]).  nbytes is
 * the number of bytes following the opcode, holding the index (code) value.
 */
static int
load_extension(UnpicklerObject *self, int nbytes)
{
    char *codebytes;            /* the nbytes bytes after the opcode */
    long code;                  /* calc_binint returns long */
    PyObject *py_code;          /* code as a Python int */
    PyObject *obj;              /* the object to push */
    PyObject *pair;             /* (module_name, class_name) */
    PyObject *module_name, *class_name;

    assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
4747
    if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807
        return -1;
    code = calc_binint(codebytes, nbytes);
    if (code <= 0) {            /* note that 0 is forbidden */
        /* Corrupt or hostile pickle. */
        PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
        return -1;
    }

    /* Look for the code in the cache. */
    py_code = PyLong_FromLong(code);
    if (py_code == NULL)
        return -1;
    obj = PyDict_GetItem(extension_cache, py_code);
    if (obj != NULL) {
        /* Bingo. */
        Py_DECREF(py_code);
        PDATA_APPEND(self->stack, obj, -1);
        return 0;
    }

    /* Look up the (module_name, class_name) pair. */
    pair = PyDict_GetItem(inverted_registry, py_code);
    if (pair == NULL) {
        Py_DECREF(py_code);
        PyErr_Format(PyExc_ValueError, "unregistered extension "
                     "code %ld", code);
        return -1;
    }
    /* Since the extension registry is manipulable via Python code,
     * confirm that pair is really a 2-tuple of strings.
     */
    if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
        !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
        !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
        Py_DECREF(py_code);
        PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
                     "isn't a 2-tuple of strings", code);
        return -1;
    }
    /* Load the object. */
    obj = find_class(self, module_name, class_name);
    if (obj == NULL) {
        Py_DECREF(py_code);
        return -1;
    }
    /* Cache code -> obj. */
    code = PyDict_SetItem(extension_cache, py_code, obj);
    Py_DECREF(py_code);
    if (code < 0) {
        Py_DECREF(obj);
        return -1;
    }
    PDATA_PUSH(self->stack, obj, -1);
    return 0;
}

static int
load_put(UnpicklerObject *self)
{
    PyObject *key, *value;
4808
    Py_ssize_t idx;
4809 4810 4811
    Py_ssize_t len;
    char *s;

4812
    if ((len = _Unpickler_Readline(self, &s)) < 0)
4813 4814 4815
        return -1;
    if (len < 2)
        return bad_readline();
4816
    if (Py_SIZE(self->stack) <= 0)
4817
        return stack_underflow();
4818
    value = self->stack->data[Py_SIZE(self->stack) - 1];
4819 4820 4821 4822

    key = PyLong_FromString(s, NULL, 10);
    if (key == NULL)
        return -1;
4823
    idx = PyLong_AsSsize_t(key);
4824
    Py_DECREF(key);
4825 4826 4827 4828
    if (idx == -1 && PyErr_Occurred())
        return -1;

    return _Unpickler_MemoPut(self, idx, value);
4829 4830 4831 4832 4833
}

static int
load_binput(UnpicklerObject *self)
{
4834 4835
    PyObject *value;
    Py_ssize_t idx;
4836 4837
    char *s;

4838
    if (_Unpickler_Read(self, &s, 1) < 0)
4839
        return -1;
4840 4841

    if (Py_SIZE(self->stack) <= 0)
4842
        return stack_underflow();
4843
    value = self->stack->data[Py_SIZE(self->stack) - 1];
4844

4845
    idx = Py_CHARMASK(s[0]);
4846

4847
    return _Unpickler_MemoPut(self, idx, value);
4848 4849 4850 4851 4852
}

static int
load_long_binput(UnpicklerObject *self)
{
4853 4854
    PyObject *value;
    Py_ssize_t idx;
4855 4856
    char *s;

4857
    if (_Unpickler_Read(self, &s, 4) < 0)
4858 4859
        return -1;

4860 4861 4862
    if (Py_SIZE(self->stack) <= 0)
        return stack_underflow();
    value = self->stack->data[Py_SIZE(self->stack) - 1];
4863

4864 4865 4866 4867
    idx =  (long)Py_CHARMASK(s[0]);
    idx |= (long)Py_CHARMASK(s[1]) << 8;
    idx |= (long)Py_CHARMASK(s[2]) << 16;
    idx |= (long)Py_CHARMASK(s[3]) << 24;
4868

4869
    return _Unpickler_MemoPut(self, idx, value);
4870 4871 4872 4873 4874 4875 4876 4877 4878
}

static int
do_append(UnpicklerObject *self, int x)
{
    PyObject *value;
    PyObject *list;
    int len, i;

4879
    len = Py_SIZE(self->stack);
4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908
    if (x > len || x <= 0)
        return stack_underflow();
    if (len == x)  /* nothing to do */
        return 0;

    list = self->stack->data[x - 1];

    if (PyList_Check(list)) {
        PyObject *slice;
        Py_ssize_t list_len;

        slice = Pdata_poplist(self->stack, x);
        if (!slice)
            return -1;
        list_len = PyList_GET_SIZE(list);
        i = PyList_SetSlice(list, list_len, list_len, slice);
        Py_DECREF(slice);
        return i;
    }
    else {
        PyObject *append_func;

        append_func = PyObject_GetAttrString(list, "append");
        if (append_func == NULL)
            return -1;
        for (i = x; i < len; i++) {
            PyObject *result;

            value = self->stack->data[i];
4909
            result = _Unpickler_FastCall(self, append_func, value);
4910 4911
            if (result == NULL) {
                Pdata_clear(self->stack, i + 1);
4912
                Py_SIZE(self->stack) = x;
4913 4914 4915 4916
                return -1;
            }
            Py_DECREF(result);
        }
4917
        Py_SIZE(self->stack) = x;
4918 4919 4920 4921 4922 4923 4924 4925
    }

    return 0;
}

static int
load_append(UnpicklerObject *self)
{
4926
    return do_append(self, Py_SIZE(self->stack) - 1);
4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942
}

static int
load_appends(UnpicklerObject *self)
{
    return do_append(self, marker(self));
}

static int
do_setitems(UnpicklerObject *self, int x)
{
    PyObject *value, *key;
    PyObject *dict;
    int len, i;
    int status = 0;

4943
    len = Py_SIZE(self->stack);
4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973
    if (x > len || x <= 0)
        return stack_underflow();
    if (len == x)  /* nothing to do */
        return 0;
    if ((len - x) % 2 != 0) { 
        /* Currupt or hostile pickle -- we never write one like this. */
        PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
        return -1;
    }

    /* Here, dict does not actually need to be a PyDict; it could be anything
       that supports the __setitem__ attribute. */
    dict = self->stack->data[x - 1];

    for (i = x + 1; i < len; i += 2) {
        key = self->stack->data[i - 1];
        value = self->stack->data[i];
        if (PyObject_SetItem(dict, key, value) < 0) {
            status = -1;
            break;
        }
    }

    Pdata_clear(self->stack, x);
    return status;
}

static int
load_setitem(UnpicklerObject *self)
{
4974
    return do_setitems(self, Py_SIZE(self->stack) - 2);
4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992
}

static int
load_setitems(UnpicklerObject *self)
{
    return do_setitems(self, marker(self));
}

static int
load_build(UnpicklerObject *self)
{
    PyObject *state, *inst, *slotstate;
    PyObject *setstate;
    int status = 0;

    /* Stack is ... instance, state.  We want to leave instance at
     * the stack top, possibly mutated via instance.__setstate__(state).
     */
4993
    if (Py_SIZE(self->stack) < 2)
4994 4995 4996 4997 4998 4999
        return stack_underflow();

    PDATA_POP(self->stack, state);
    if (state == NULL)
        return -1;

5000
    inst = self->stack->data[Py_SIZE(self->stack) - 1];
5001 5002

    setstate = PyObject_GetAttrString(inst, "__setstate__");
5003 5004 5005
    if (setstate == NULL) {
        if (PyErr_ExceptionMatches(PyExc_AttributeError))
            PyErr_Clear();
5006 5007
        else {
            Py_DECREF(state);
5008
            return -1;
5009
        }
5010 5011 5012 5013 5014
    }
    else {
        PyObject *result;

        /* The explicit __setstate__ is responsible for everything. */
5015
        /* Ugh... this does not leak since _Unpickler_FastCall() steals the
5016
           reference to state first. */
5017
        result = _Unpickler_FastCall(self, setstate, state);
5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042
        Py_DECREF(setstate);
        if (result == NULL)
            return -1;
        Py_DECREF(result);
        return 0;
    }

    /* A default __setstate__.  First see whether state embeds a
     * slot state dict too (a proto 2 addition).
     */
    if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
        PyObject *tmp = state;

        state = PyTuple_GET_ITEM(tmp, 0);
        slotstate = PyTuple_GET_ITEM(tmp, 1);
        Py_INCREF(state);
        Py_INCREF(slotstate);
        Py_DECREF(tmp);
    }
    else
        slotstate = NULL;

    /* Set inst.__dict__ from the state dict (if any). */
    if (state != Py_None) {
        PyObject *dict;
5043 5044
        PyObject *d_key, *d_value;
        Py_ssize_t i;
5045 5046 5047 5048 5049 5050 5051 5052 5053

        if (!PyDict_Check(state)) {
            PyErr_SetString(UnpicklingError, "state is not a dictionary");
            goto error;
        }
        dict = PyObject_GetAttrString(inst, "__dict__");
        if (dict == NULL)
            goto error;

5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066
        i = 0;
        while (PyDict_Next(state, &i, &d_key, &d_value)) {
            /* normally the keys for instance attributes are
               interned.  we should try to do that here. */
            Py_INCREF(d_key);
            if (PyUnicode_CheckExact(d_key))
                PyUnicode_InternInPlace(&d_key);
            if (PyObject_SetItem(dict, d_key, d_value) < 0) {
                Py_DECREF(d_key);
                goto error;
            }
            Py_DECREF(d_key);
        }
5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111
        Py_DECREF(dict);
    }

    /* Also set instance attributes from the slotstate dict (if any). */
    if (slotstate != NULL) {
        PyObject *d_key, *d_value;
        Py_ssize_t i;

        if (!PyDict_Check(slotstate)) {
            PyErr_SetString(UnpicklingError,
                            "slot state is not a dictionary");
            goto error;
        }
        i = 0;
        while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
            if (PyObject_SetAttr(inst, d_key, d_value) < 0)
                goto error;
        }
    }

    if (0) {
  error:
        status = -1;
    }

    Py_DECREF(state);
    Py_XDECREF(slotstate);
    return status;
}

static int
load_mark(UnpicklerObject *self)
{

    /* Note that we split the (pickle.py) stack into two stacks, an
     * object stack and a mark stack. Here we push a mark onto the
     * mark stack.
     */

    if ((self->num_marks + 1) >= self->marks_size) {
        size_t alloc;
        int *marks;

        /* Use the size_t type to check for overflow. */
        alloc = ((size_t)self->num_marks << 1) + 20;
5112 5113
        if (alloc > PY_SSIZE_T_MAX || 
            alloc <= ((size_t)self->num_marks + 1)) {
5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129
            PyErr_NoMemory();
            return -1;
        }

        if (self->marks == NULL)
            marks = (int *)PyMem_Malloc(alloc * sizeof(int));
        else
            marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
        if (marks == NULL) {
            PyErr_NoMemory();
            return -1;
        }
        self->marks = marks;
        self->marks_size = (Py_ssize_t)alloc;
    }

5130
    self->marks[self->num_marks++] = Py_SIZE(self->stack);
5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146

    return 0;
}

static int
load_reduce(UnpicklerObject *self)
{
    PyObject *callable = NULL;
    PyObject *argtup = NULL;
    PyObject *obj = NULL;

    PDATA_POP(self->stack, argtup);
    if (argtup == NULL)
        return -1;
    PDATA_POP(self->stack, callable);
    if (callable) {
5147
        obj = PyObject_CallObject(callable, argtup);
5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167
        Py_DECREF(callable);
    }
    Py_DECREF(argtup);

    if (obj == NULL)
        return -1;

    PDATA_PUSH(self->stack, obj, -1);
    return 0;
}

/* Just raises an error if we don't know the protocol specified.  PROTO
 * is the first opcode for protocols >= 2.
 */
static int
load_proto(UnpicklerObject *self)
{
    char *s;
    int i;

5168
    if (_Unpickler_Read(self, &s, 1) < 0)
5169 5170 5171
        return -1;

    i = (unsigned char)s[0];
5172 5173
    if (i <= HIGHEST_PROTOCOL) {
        self->proto = i;
5174
        return 0;
5175
    }
5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188

    PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
    return -1;
}

static PyObject *
load(UnpicklerObject *self)
{
    PyObject *err;
    PyObject *value = NULL;
    char *s;

    self->num_marks = 0;
5189
    if (Py_SIZE(self->stack))
5190 5191 5192 5193 5194 5195 5196 5197 5198 5199
        Pdata_clear(self->stack, 0);

    /* Convenient macros for the dispatch while-switch loop just below. */
#define OP(opcode, load_func) \
    case opcode: if (load_func(self) < 0) break; continue;

#define OP_ARG(opcode, load_func, arg) \
    case opcode: if (load_func(self, (arg)) < 0) break; continue;

    while (1) {
5200
        if (_Unpickler_Read(self, &s, 1) < 0)
5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274
            break;

        switch ((enum opcode)s[0]) {
        OP(NONE, load_none)
        OP(BININT, load_binint)
        OP(BININT1, load_binint1)
        OP(BININT2, load_binint2)
        OP(INT, load_int)
        OP(LONG, load_long)
        OP_ARG(LONG1, load_counted_long, 1)
        OP_ARG(LONG4, load_counted_long, 4)
        OP(FLOAT, load_float)
        OP(BINFLOAT, load_binfloat)
        OP(BINBYTES, load_binbytes)
        OP(SHORT_BINBYTES, load_short_binbytes)
        OP(BINSTRING, load_binstring)
        OP(SHORT_BINSTRING, load_short_binstring)
        OP(STRING, load_string)
        OP(UNICODE, load_unicode)
        OP(BINUNICODE, load_binunicode)
        OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
        OP_ARG(TUPLE1, load_counted_tuple, 1)
        OP_ARG(TUPLE2, load_counted_tuple, 2)
        OP_ARG(TUPLE3, load_counted_tuple, 3)
        OP(TUPLE, load_tuple)
        OP(EMPTY_LIST, load_empty_list)
        OP(LIST, load_list)
        OP(EMPTY_DICT, load_empty_dict)
        OP(DICT, load_dict)
        OP(OBJ, load_obj)
        OP(INST, load_inst)
        OP(NEWOBJ, load_newobj)
        OP(GLOBAL, load_global)
        OP(APPEND, load_append)
        OP(APPENDS, load_appends)
        OP(BUILD, load_build)
        OP(DUP, load_dup)
        OP(BINGET, load_binget)
        OP(LONG_BINGET, load_long_binget)
        OP(GET, load_get)
        OP(MARK, load_mark)
        OP(BINPUT, load_binput)
        OP(LONG_BINPUT, load_long_binput)
        OP(PUT, load_put)
        OP(POP, load_pop)
        OP(POP_MARK, load_pop_mark)
        OP(SETITEM, load_setitem)
        OP(SETITEMS, load_setitems)
        OP(PERSID, load_persid)
        OP(BINPERSID, load_binpersid)
        OP(REDUCE, load_reduce)
        OP(PROTO, load_proto)
        OP_ARG(EXT1, load_extension, 1)
        OP_ARG(EXT2, load_extension, 2)
        OP_ARG(EXT4, load_extension, 4)
        OP_ARG(NEWTRUE, load_bool, Py_True)
        OP_ARG(NEWFALSE, load_bool, Py_False)

        case STOP:
            break;

        case '\0':
            PyErr_SetNone(PyExc_EOFError);
            return NULL;

        default:
            PyErr_Format(UnpicklingError,
                         "invalid load key, '%c'.", s[0]);
            return NULL;
        }

        break;                  /* and we are done! */
    }

5275 5276 5277
    if (_Unpickler_SkipConsumed(self) < 0)
        return NULL;

5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339
    /* XXX: It is not clear what this is actually for. */
    if ((err = PyErr_Occurred())) {
        if (err == PyExc_EOFError) {
            PyErr_SetNone(PyExc_EOFError);
        }
        return NULL;
    }

    PDATA_POP(self->stack, value);
    return value;
}

PyDoc_STRVAR(Unpickler_load_doc,
"load() -> object. Load a pickle."
"\n"
"Read a pickled object representation from the open file object given in\n"
"the constructor, and return the reconstituted object hierarchy specified\n"
"therein.\n");

static PyObject *
Unpickler_load(UnpicklerObject *self)
{
    /* Check whether the Unpickler was initialized correctly. This prevents
       segfaulting if a subclass overridden __init__ with a function that does
       not call Unpickler.__init__(). Here, we simply ensure that self->read
       is not NULL. */
    if (self->read == NULL) {
        PyErr_Format(UnpicklingError, 
                     "Unpickler.__init__() was not called by %s.__init__()",
                     Py_TYPE(self)->tp_name);
        return NULL;
    }

    return load(self);
}

/* The name of find_class() is misleading. In newer pickle protocols, this
   function is used for loading any global (i.e., functions), not just
   classes. The name is kept only for backward compatibility. */

PyDoc_STRVAR(Unpickler_find_class_doc,
"find_class(module_name, global_name) -> object.\n"
"\n"
"Return an object from a specified module, importing the module if\n"
"necessary.  Subclasses may override this method (e.g. to restrict\n"
"unpickling of arbitrary classes and functions).\n"
"\n"
"This method is called whenever a class or a function object is\n"
"needed.  Both arguments passed are str objects.\n");

static PyObject *
Unpickler_find_class(UnpicklerObject *self, PyObject *args)
{
    PyObject *global;
    PyObject *modules_dict;
    PyObject *module;
    PyObject *module_name, *global_name;

    if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
                           &module_name, &global_name))
        return NULL;

5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392
    /* Try to map the old names used in Python 2.x to the new ones used in
       Python 3.x.  We do this only with old pickle protocols and when the
       user has not disabled the feature. */
    if (self->proto < 3 && self->fix_imports) {
        PyObject *key;
        PyObject *item;

        /* Check if the global (i.e., a function or a class) was renamed
           or moved to another module. */
        key = PyTuple_Pack(2, module_name, global_name);
        if (key == NULL)
            return NULL;
        item = PyDict_GetItemWithError(name_mapping_2to3, key);
        Py_DECREF(key);
        if (item) {
            if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
                PyErr_Format(PyExc_RuntimeError,
                             "_compat_pickle.NAME_MAPPING values should be "
                             "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
                return NULL;
            }
            module_name = PyTuple_GET_ITEM(item, 0);
            global_name = PyTuple_GET_ITEM(item, 1);
            if (!PyUnicode_Check(module_name) ||
                !PyUnicode_Check(global_name)) {
                PyErr_Format(PyExc_RuntimeError,
                             "_compat_pickle.NAME_MAPPING values should be "
                             "pairs of str, not (%.200s, %.200s)",
                             Py_TYPE(module_name)->tp_name,
                             Py_TYPE(global_name)->tp_name);
                return NULL;
            }
        }
        else if (PyErr_Occurred()) {
            return NULL;
        }

        /* Check if the module was renamed. */
        item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
        if (item) {
            if (!PyUnicode_Check(item)) {
                PyErr_Format(PyExc_RuntimeError,
                             "_compat_pickle.IMPORT_MAPPING values should be "
                             "strings, not %.200s", Py_TYPE(item)->tp_name);
                return NULL;
            }
            module_name = item;
        }
        else if (PyErr_Occurred()) {
            return NULL;
        }
    }

5393 5394 5395 5396
    modules_dict = PySys_GetObject("modules");
    if (modules_dict == NULL)
        return NULL;

5397
    module = PyDict_GetItemWithError(modules_dict, module_name);
5398
    if (module == NULL) {
5399 5400
        if (PyErr_Occurred())
            return NULL;
5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426
        module = PyImport_Import(module_name);
        if (module == NULL)
            return NULL;
        global = PyObject_GetAttr(module, global_name);
        Py_DECREF(module);
    }
    else { 
        global = PyObject_GetAttr(module, global_name);
    }
    return global;
}

static struct PyMethodDef Unpickler_methods[] = {
    {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
     Unpickler_load_doc},
    {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
     Unpickler_find_class_doc},
    {NULL, NULL}                /* sentinel */
};

static void
Unpickler_dealloc(UnpicklerObject *self)
{
    PyObject_GC_UnTrack((PyObject *)self);
    Py_XDECREF(self->readline);
    Py_XDECREF(self->read);
5427
    Py_XDECREF(self->peek);
5428 5429 5430
    Py_XDECREF(self->stack);
    Py_XDECREF(self->pers_func);
    Py_XDECREF(self->arg);
5431 5432 5433 5434
    if (self->buffer.buf != NULL) {
        PyBuffer_Release(&self->buffer);
        self->buffer.buf = NULL;
    }
5435

5436
    _Unpickler_MemoCleanup(self);
5437
    PyMem_Free(self->marks);
5438
    PyMem_Free(self->input_line);
5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449
    free(self->encoding);
    free(self->errors);

    Py_TYPE(self)->tp_free((PyObject *)self);
}

static int
Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
{
    Py_VISIT(self->readline);
    Py_VISIT(self->read);
5450
    Py_VISIT(self->peek);
5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461
    Py_VISIT(self->stack);
    Py_VISIT(self->pers_func);
    Py_VISIT(self->arg);
    return 0;
}

static int
Unpickler_clear(UnpicklerObject *self)
{
    Py_CLEAR(self->readline);
    Py_CLEAR(self->read);
5462
    Py_CLEAR(self->peek);
5463 5464 5465
    Py_CLEAR(self->stack);
    Py_CLEAR(self->pers_func);
    Py_CLEAR(self->arg);
5466 5467 5468 5469
    if (self->buffer.buf != NULL) {
        PyBuffer_Release(&self->buffer);
        self->buffer.buf = NULL;
    }
5470

5471
    _Unpickler_MemoCleanup(self);
5472 5473
    PyMem_Free(self->marks);
    self->marks = NULL;
5474 5475
    PyMem_Free(self->input_line);
    self->input_line = NULL;
5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498
    free(self->encoding);
    self->encoding = NULL;
    free(self->errors);
    self->errors = NULL;

    return 0;
}

PyDoc_STRVAR(Unpickler_doc,
"Unpickler(file, *, encoding='ASCII', errors='strict')"
"\n"
"This takes a binary file for reading a pickle data stream.\n"
"\n"
"The protocol version of the pickle is detected automatically, so no\n"
"proto argument is needed.\n"
"\n"
"The file-like object must have two methods, a read() method\n"
"that takes an integer argument, and a readline() method that\n"
"requires no arguments.  Both methods should return bytes.\n"
"Thus file-like object can be a binary file object opened for\n"
"reading, a BytesIO object, or any other custom object that\n"
"meets this interface.\n"
"\n"
5499 5500 5501 5502 5503 5504 5505
"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
"which are used to control compatiblity support for pickle stream\n"
"generated by Python 2.x.  If *fix_imports* is True, pickle will try to\n"
"map the old Python 2.x names to the new names used in Python 3.x.  The\n"
"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
"instances pickled by Python 2.x; these default to 'ASCII' and\n"
"'strict', respectively.\n");
5506 5507 5508 5509

static int
Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
{
5510
    static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
5511
    PyObject *file;
5512
    PyObject *fix_imports = Py_True;
5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530
    char *encoding = NULL;
    char *errors = NULL;

    /* XXX: That is an horrible error message. But, I don't know how to do
       better... */
    if (Py_SIZE(args) != 1) {
        PyErr_Format(PyExc_TypeError,
                     "%s takes exactly one positional argument (%zd given)",
                     Py_TYPE(self)->tp_name, Py_SIZE(args));
        return -1;
    }

    /* Arguments parsing needs to be done in the __init__() method to allow
       subclasses to define their own __init__() method, which may (or may
       not) support Unpickler arguments. However, this means we need to be
       extra careful in the other Unpickler methods, since a subclass could
       forget to call Unpickler.__init__() thus breaking our internal
       invariants. */
5531
    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
5532
                                     &file, &fix_imports, &encoding, &errors))
5533 5534 5535 5536 5537 5538
        return -1;

    /* In case of multiple __init__() calls, clear previous content. */
    if (self->read != NULL)
        (void)Unpickler_clear(self);

5539
    if (_Unpickler_SetInputStream(self, file) < 0)
5540 5541
        return -1;

5542 5543
    if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
        return -1;
5544

5545 5546
    self->fix_imports = PyObject_IsTrue(fix_imports);
    if (self->fix_imports == -1)
5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562
        return -1;

    if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
        self->pers_func = PyObject_GetAttrString((PyObject *)self,
                                                 "persistent_load");
        if (self->pers_func == NULL)
            return -1;
    }
    else {
        self->pers_func = NULL;
    }

    self->stack = (Pdata *)Pdata_New();
    if (self->stack == NULL)
        return -1;

5563 5564
    self->memo_size = 32;
    self->memo = _Unpickler_NewMemo(self->memo_size);
5565 5566 5567
    if (self->memo == NULL)
        return -1;

5568
    self->arg = NULL;
5569
    self->proto = 0;
5570

5571 5572 5573
    return 0;
}

5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712
/* Define a proxy object for the Unpickler's internal memo object. This is to
 * avoid breaking code like:
 *  unpickler.memo.clear()
 * and
 *  unpickler.memo = saved_memo
 * Is this a good idea? Not really, but we don't want to break code that uses
 * it. Note that we don't implement the entire mapping API here. This is
 * intentional, as these should be treated as black-box implementation details.
 *
 * We do, however, have to implement pickling/unpickling support because of
 * real-world code like cvs2svn. 
 */

typedef struct {
    PyObject_HEAD
    UnpicklerObject *unpickler;
} UnpicklerMemoProxyObject;

PyDoc_STRVAR(ump_clear_doc,
"memo.clear() -> None.  Remove all items from memo.");

static PyObject *
ump_clear(UnpicklerMemoProxyObject *self)
{
    _Unpickler_MemoCleanup(self->unpickler);
    self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
    if (self->unpickler->memo == NULL)
        return NULL;
    Py_RETURN_NONE;
}

PyDoc_STRVAR(ump_copy_doc,
"memo.copy() -> new_memo.  Copy the memo to a new object.");

static PyObject *
ump_copy(UnpicklerMemoProxyObject *self)
{
    Py_ssize_t i;
    PyObject *new_memo = PyDict_New();
    if (new_memo == NULL)
        return NULL;

    for (i = 0; i < self->unpickler->memo_size; i++) {
        int status;
        PyObject *key, *value;

        value = self->unpickler->memo[i];
        if (value == NULL)
            continue;

        key = PyLong_FromSsize_t(i);
        if (key == NULL)
            goto error;
        status = PyDict_SetItem(new_memo, key, value);
        Py_DECREF(key);
        if (status < 0)
            goto error;
    }
    return new_memo;

error:
    Py_DECREF(new_memo);
    return NULL;
}

PyDoc_STRVAR(ump_reduce_doc,
"memo.__reduce__(). Pickling support.");

static PyObject *
ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
{
    PyObject *reduce_value;
    PyObject *constructor_args;
    PyObject *contents = ump_copy(self);
    if (contents == NULL)
        return NULL;

    reduce_value = PyTuple_New(2);
    if (reduce_value == NULL) {
        Py_DECREF(contents);
        return NULL;
    }
    constructor_args = PyTuple_New(1);
    if (constructor_args == NULL) {
        Py_DECREF(contents);
        Py_DECREF(reduce_value);
        return NULL;
    }
    PyTuple_SET_ITEM(constructor_args, 0, contents);
    Py_INCREF((PyObject *)&PyDict_Type);
    PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
    PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
    return reduce_value;
}

static PyMethodDef unpicklerproxy_methods[] = {
    {"clear",       (PyCFunction)ump_clear,  METH_NOARGS,  ump_clear_doc},
    {"copy",        (PyCFunction)ump_copy,   METH_NOARGS,  ump_copy_doc},
    {"__reduce__",  (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
    {NULL, NULL}    /* sentinel */
};

static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
{
    PyObject_GC_UnTrack(self);
    Py_XDECREF(self->unpickler);
    PyObject_GC_Del((PyObject *)self);
}

static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
                            visitproc visit, void *arg)
{
    Py_VISIT(self->unpickler);
    return 0;
}

static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
{
    Py_CLEAR(self->unpickler);
    return 0;
}

static PyTypeObject UnpicklerMemoProxyType = {
    PyVarObject_HEAD_INIT(NULL, 0)
    "_pickle.UnpicklerMemoProxy",               /*tp_name*/
    sizeof(UnpicklerMemoProxyObject),           /*tp_basicsize*/
    0,
    (destructor)UnpicklerMemoProxy_dealloc,     /* tp_dealloc */
    0,                                          /* tp_print */
    0,                                          /* tp_getattr */
    0,                                          /* tp_setattr */
    0,                                          /* tp_compare */
    0,                                          /* tp_repr */
    0,                                          /* tp_as_number */
    0,                                          /* tp_as_sequence */
    0,                                          /* tp_as_mapping */
5713
    PyObject_HashNotImplemented,                /* tp_hash */
5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747
    0,                                          /* tp_call */
    0,                                          /* tp_str */
    PyObject_GenericGetAttr,                    /* tp_getattro */
    PyObject_GenericSetAttr,                    /* tp_setattro */
    0,                                          /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
    0,                                          /* tp_doc */
    (traverseproc)UnpicklerMemoProxy_traverse,  /* tp_traverse */
    (inquiry)UnpicklerMemoProxy_clear,          /* tp_clear */
    0,                                          /* tp_richcompare */
    0,                                          /* tp_weaklistoffset */
    0,                                          /* tp_iter */
    0,                                          /* tp_iternext */
    unpicklerproxy_methods,                     /* tp_methods */
};

static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
{
    UnpicklerMemoProxyObject *self;

    self = PyObject_GC_New(UnpicklerMemoProxyObject,
                           &UnpicklerMemoProxyType);
    if (self == NULL)
        return NULL;
    Py_INCREF(unpickler);
    self->unpickler = unpickler;
    PyObject_GC_Track(self);
    return (PyObject *)self;
}

/*****************************************************************************/


5748 5749 5750
static PyObject *
Unpickler_get_memo(UnpicklerObject *self)
{
5751
    return UnpicklerMemoProxy_New(self);
5752 5753 5754
}

static int
5755
Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
5756
{
5757 5758 5759
    PyObject **new_memo;
    Py_ssize_t new_memo_size = 0;
    Py_ssize_t i;
5760

5761
    if (obj == NULL) {
5762 5763 5764 5765
        PyErr_SetString(PyExc_TypeError,
                        "attribute deletion is not supported");
        return -1;
    }
5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807

    if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
        UnpicklerObject *unpickler =
            ((UnpicklerMemoProxyObject *)obj)->unpickler;

        new_memo_size = unpickler->memo_size;
        new_memo = _Unpickler_NewMemo(new_memo_size);
        if (new_memo == NULL)
            return -1;

        for (i = 0; i < new_memo_size; i++) {
            Py_XINCREF(unpickler->memo[i]);
            new_memo[i] = unpickler->memo[i];
        }
    }
    else if (PyDict_Check(obj)) {
        Py_ssize_t i = 0;
        PyObject *key, *value;

        new_memo_size = PyDict_Size(obj);
        new_memo = _Unpickler_NewMemo(new_memo_size);
        if (new_memo == NULL)
            return -1;

        while (PyDict_Next(obj, &i, &key, &value)) {
            Py_ssize_t idx;
            if (!PyLong_Check(key)) {
                PyErr_SetString(PyExc_TypeError,
                                "memo key must be integers");
                goto error;
            }
            idx = PyLong_AsSsize_t(key);
            if (idx == -1 && PyErr_Occurred())
                goto error;
            if (_Unpickler_MemoPut(self, idx, value) < 0)
                goto error;
        }
    }
    else {
        PyErr_Format(PyExc_TypeError,
                     "'memo' attribute must be an UnpicklerMemoProxy object"
                     "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5808 5809 5810
        return -1;
    }

5811 5812 5813
    _Unpickler_MemoCleanup(self);
    self->memo_size = new_memo_size;
    self->memo = new_memo;
5814 5815

    return 0;
5816 5817 5818 5819 5820 5821 5822 5823 5824 5825

  error:
    if (new_memo_size) {
        i = new_memo_size;
        while (--i >= 0) {
            Py_XDECREF(new_memo[i]);
        }
        PyMem_FREE(new_memo);
    }
    return -1;
5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877
}

static PyObject *
Unpickler_get_persload(UnpicklerObject *self)
{
    if (self->pers_func == NULL)
        PyErr_SetString(PyExc_AttributeError, "persistent_load");
    else
        Py_INCREF(self->pers_func);
    return self->pers_func;
}

static int
Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
{
    PyObject *tmp;

    if (value == NULL) {
        PyErr_SetString(PyExc_TypeError,
                        "attribute deletion is not supported");
        return -1;
    }
    if (!PyCallable_Check(value)) {
        PyErr_SetString(PyExc_TypeError,
                        "persistent_load must be a callable taking "
                        "one argument");
        return -1;
    }

    tmp = self->pers_func;
    Py_INCREF(value);
    self->pers_func = value;
    Py_XDECREF(tmp);      /* self->pers_func can be NULL, so be careful. */

    return 0;
}

static PyGetSetDef Unpickler_getsets[] = {
    {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
    {"persistent_load", (getter)Unpickler_get_persload,
                        (setter)Unpickler_set_persload},
    {NULL}
};

static PyTypeObject Unpickler_Type = {
    PyVarObject_HEAD_INIT(NULL, 0)
    "_pickle.Unpickler",                /*tp_name*/
    sizeof(UnpicklerObject),            /*tp_basicsize*/
    0,                                  /*tp_itemsize*/
    (destructor)Unpickler_dealloc,      /*tp_dealloc*/
    0,                                  /*tp_print*/
    0,                                  /*tp_getattr*/
5878
    0,                                  /*tp_setattr*/
5879
    0,                                  /*tp_reserved*/
5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912
    0,                                  /*tp_repr*/
    0,                                  /*tp_as_number*/
    0,                                  /*tp_as_sequence*/
    0,                                  /*tp_as_mapping*/
    0,                                  /*tp_hash*/
    0,                                  /*tp_call*/
    0,                                  /*tp_str*/
    0,                                  /*tp_getattro*/
    0,                                  /*tp_setattro*/
    0,                                  /*tp_as_buffer*/
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
    Unpickler_doc,                      /*tp_doc*/
    (traverseproc)Unpickler_traverse,   /*tp_traverse*/
    (inquiry)Unpickler_clear,           /*tp_clear*/
    0,                                  /*tp_richcompare*/
    0,                                  /*tp_weaklistoffset*/
    0,                                  /*tp_iter*/
    0,                                  /*tp_iternext*/
    Unpickler_methods,                  /*tp_methods*/
    0,                                  /*tp_members*/
    Unpickler_getsets,                  /*tp_getset*/
    0,                                  /*tp_base*/
    0,                                  /*tp_dict*/
    0,                                  /*tp_descr_get*/
    0,                                  /*tp_descr_set*/
    0,                                  /*tp_dictoffset*/
    (initproc)Unpickler_init,           /*tp_init*/
    PyType_GenericAlloc,                /*tp_alloc*/
    PyType_GenericNew,                  /*tp_new*/
    PyObject_GC_Del,                    /*tp_free*/
    0,                                  /*tp_is_gc*/
};

5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186
PyDoc_STRVAR(pickle_dump_doc,
"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
"\n"
"Write a pickled representation of obj to the open file object file.  This\n"
"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
"efficient.\n"
"\n"
"The optional protocol argument tells the pickler to use the given protocol;\n"
"supported protocols are 0, 1, 2, 3.  The default protocol is 3; a\n"
"backward-incompatible protocol designed for Python 3.0.\n"
"\n"
"Specifying a negative protocol version selects the highest protocol version\n"
"supported.  The higher the protocol used, the more recent the version of\n"
"Python needed to read the pickle produced.\n"
"\n"
"The file argument must have a write() method that accepts a single bytes\n"
"argument.  It can thus be a file object opened for binary writing, a\n"
"io.BytesIO instance, or any other custom object that meets this interface.\n"
"\n"
"If fix_imports is True and protocol is less than 3, pickle will try to\n"
"map the new Python 3.x names to the old module names used in Python 2.x,\n"
"so that the pickle data stream is readable with Python 2.x.\n");

static PyObject *
pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
{
    static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
    PyObject *obj;
    PyObject *file;
    PyObject *proto = NULL;
    PyObject *fix_imports = Py_True;
    PicklerObject *pickler;

    /* fix_imports is a keyword-only argument.  */
    if (Py_SIZE(args) > 3) {
        PyErr_Format(PyExc_TypeError,
                     "pickle.dump() takes at most 3 positional "
                     "argument (%zd given)", Py_SIZE(args));
        return NULL;
    }

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
                                     &obj, &file, &proto, &fix_imports))
        return NULL;

    pickler = _Pickler_New();
    if (pickler == NULL)
        return NULL;

    if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
        goto error;

    if (_Pickler_SetOutputStream(pickler, file) < 0)
        goto error;

    if (dump(pickler, obj) < 0)
        goto error;

    if (_Pickler_FlushToFile(pickler) < 0)
        goto error;

    Py_DECREF(pickler);
    Py_RETURN_NONE;

  error:
    Py_XDECREF(pickler);
    return NULL;
}

PyDoc_STRVAR(pickle_dumps_doc,
"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
"\n"
"Return the pickled representation of the object as a bytes\n"
"object, instead of writing it to a file.\n"
"\n"
"The optional protocol argument tells the pickler to use the given protocol;\n"
"supported protocols are 0, 1, 2, 3.  The default protocol is 3; a\n"
"backward-incompatible protocol designed for Python 3.0.\n"
"\n"
"Specifying a negative protocol version selects the highest protocol version\n"
"supported.  The higher the protocol used, the more recent the version of\n"
"Python needed to read the pickle produced.\n"
"\n"
"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
"map the new Python 3.x names to the old module names used in Python 2.x,\n"
"so that the pickle data stream is readable with Python 2.x.\n");

static PyObject *
pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
{
    static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
    PyObject *obj;
    PyObject *proto = NULL;
    PyObject *result;
    PyObject *fix_imports = Py_True;
    PicklerObject *pickler;

    /* fix_imports is a keyword-only argument.  */
    if (Py_SIZE(args) > 2) {
        PyErr_Format(PyExc_TypeError,
                     "pickle.dumps() takes at most 2 positional "
                     "argument (%zd given)", Py_SIZE(args));
        return NULL;
    }

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
                                     &obj, &proto, &fix_imports))
        return NULL;

    pickler = _Pickler_New();
    if (pickler == NULL)
        return NULL;

    if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
        goto error;

    if (dump(pickler, obj) < 0)
        goto error;

    result = _Pickler_GetString(pickler);
    Py_DECREF(pickler);
    return result;

  error:
    Py_XDECREF(pickler);
    return NULL;
}

PyDoc_STRVAR(pickle_load_doc,
"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
"\n"
"Read a pickled object representation from the open file object file and\n"
"return the reconstituted object hierarchy specified therein.  This is\n"
"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
"\n"
"The protocol version of the pickle is detected automatically, so no protocol\n"
"argument is needed.  Bytes past the pickled object's representation are\n"
"ignored.\n"
"\n"
"The argument file must have two methods, a read() method that takes an\n"
"integer argument, and a readline() method that requires no arguments.  Both\n"
"methods should return bytes.  Thus *file* can be a binary file object opened\n"
"for reading, a BytesIO object, or any other custom object that meets this\n"
"interface.\n"
"\n"
"Optional keyword arguments are fix_imports, encoding and errors,\n"
"which are used to control compatiblity support for pickle stream generated\n"
"by Python 2.x.  If fix_imports is True, pickle will try to map the old\n"
"Python 2.x names to the new names used in Python 3.x.  The encoding and\n"
"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
"2.x; these default to 'ASCII' and 'strict', respectively.\n");

static PyObject *
pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
{
    static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
    PyObject *file;
    PyObject *fix_imports = Py_True;
    PyObject *result;
    char *encoding = NULL;
    char *errors = NULL;
    UnpicklerObject *unpickler;

    /* fix_imports, encoding and errors are a keyword-only argument.  */
    if (Py_SIZE(args) != 1) {
        PyErr_Format(PyExc_TypeError,
                     "pickle.load() takes exactly one positional "
                     "argument (%zd given)", Py_SIZE(args));
        return NULL;
    }

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
                                     &file, &fix_imports, &encoding, &errors))
        return NULL;

    unpickler = _Unpickler_New();
    if (unpickler == NULL)
        return NULL;

    if (_Unpickler_SetInputStream(unpickler, file) < 0)
        goto error;

    if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
        goto error;

    unpickler->fix_imports = PyObject_IsTrue(fix_imports);
    if (unpickler->fix_imports == -1)
        goto error;

    result = load(unpickler);
    Py_DECREF(unpickler);
    return result;

  error:
    Py_XDECREF(unpickler);
    return NULL;
}

PyDoc_STRVAR(pickle_loads_doc,
"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
"\n"
"Read a pickled object hierarchy from a bytes object and return the\n"
"reconstituted object hierarchy specified therein\n"
"\n"
"The protocol version of the pickle is detected automatically, so no protocol\n"
"argument is needed.  Bytes past the pickled object's representation are\n"
"ignored.\n"
"\n"
"Optional keyword arguments are fix_imports, encoding and errors, which\n"
"are used to control compatiblity support for pickle stream generated\n"
"by Python 2.x.  If fix_imports is True, pickle will try to map the old\n"
"Python 2.x names to the new names used in Python 3.x.  The encoding and\n"
"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
"2.x; these default to 'ASCII' and 'strict', respectively.\n");

static PyObject *
pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
{
    static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
    PyObject *input;
    PyObject *fix_imports = Py_True;
    PyObject *result;
    char *encoding = NULL;
    char *errors = NULL;
    UnpicklerObject *unpickler;

    /* fix_imports, encoding and errors are a keyword-only argument.  */
    if (Py_SIZE(args) != 1) {
        PyErr_Format(PyExc_TypeError,
                     "pickle.loads() takes exactly one positional "
                     "argument (%zd given)", Py_SIZE(args));
        return NULL;
    }

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
                                     &input, &fix_imports, &encoding, &errors))
        return NULL;

    unpickler = _Unpickler_New();
    if (unpickler == NULL)
        return NULL;

    if (_Unpickler_SetStringInput(unpickler, input) < 0)
        goto error;

    if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
        goto error;

    unpickler->fix_imports = PyObject_IsTrue(fix_imports);
    if (unpickler->fix_imports == -1)
        goto error;

    result = load(unpickler);
    Py_DECREF(unpickler);
    return result;

  error:
    Py_XDECREF(unpickler);
    return NULL;
}


static struct PyMethodDef pickle_methods[] = {
    {"dump",  (PyCFunction)pickle_dump,  METH_VARARGS|METH_KEYWORDS,
     pickle_dump_doc},
    {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
     pickle_dumps_doc},
    {"load",  (PyCFunction)pickle_load,  METH_VARARGS|METH_KEYWORDS,
     pickle_load_doc},
    {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
     pickle_loads_doc},
    {NULL, NULL} /* sentinel */
};

6187
static int
6188
initmodule(void)
6189
{
6190 6191 6192 6193 6194 6195
    PyObject *copyreg = NULL;
    PyObject *compat_pickle = NULL;

    /* XXX: We should ensure that the types of the dictionaries imported are
       exactly PyDict objects. Otherwise, it is possible to crash the pickle
       since we use the PyDict API directly to access these dictionaries. */
6196 6197 6198

    copyreg = PyImport_ImportModule("copyreg");
    if (!copyreg)
6199
        goto error;
6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212
    dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
    if (!dispatch_table)
        goto error;
    extension_registry = \
        PyObject_GetAttrString(copyreg, "_extension_registry");
    if (!extension_registry)
        goto error;
    inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
    if (!inverted_registry)
        goto error;
    extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
    if (!extension_cache)
        goto error;
6213
    Py_CLEAR(copyreg);
6214

6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259
    /* Load the 2.x -> 3.x stdlib module mapping tables */
    compat_pickle = PyImport_ImportModule("_compat_pickle");
    if (!compat_pickle)
        goto error;
    name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
    if (!name_mapping_2to3)
        goto error;
    if (!PyDict_CheckExact(name_mapping_2to3)) {
        PyErr_Format(PyExc_RuntimeError,
                     "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
                     Py_TYPE(name_mapping_2to3)->tp_name);
        goto error;
    }
    import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
                                                 "IMPORT_MAPPING");
    if (!import_mapping_2to3)
        goto error;
    if (!PyDict_CheckExact(import_mapping_2to3)) {
        PyErr_Format(PyExc_RuntimeError,
                     "_compat_pickle.IMPORT_MAPPING should be a dict, "
                     "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
        goto error;
    }
    /* ... and the 3.x -> 2.x mapping tables */
    name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
                                               "REVERSE_NAME_MAPPING");
    if (!name_mapping_3to2)
        goto error;
    if (!PyDict_CheckExact(name_mapping_3to2)) {
        PyErr_Format(PyExc_RuntimeError,
                     "_compat_pickle.REVERSE_NAME_MAPPING shouldbe a dict, "
                     "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
        goto error;
    }
    import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
                                                 "REVERSE_IMPORT_MAPPING");
    if (!import_mapping_3to2)
        goto error;
    if (!PyDict_CheckExact(import_mapping_3to2)) {
        PyErr_Format(PyExc_RuntimeError,
                     "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
                     "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
        goto error;
    }
    Py_CLEAR(compat_pickle);
6260 6261 6262

    empty_tuple = PyTuple_New(0);
    if (empty_tuple == NULL)
6263
        goto error;
6264 6265
    two_tuple = PyTuple_New(2);
    if (two_tuple == NULL)
6266
        goto error;
6267 6268 6269 6270 6271 6272 6273 6274 6275
    /* We use this temp container with no regard to refcounts, or to
     * keeping containees alive.  Exempt from GC, because we don't
     * want anything looking at two_tuple() by magic.
     */
    PyObject_GC_UnTrack(two_tuple);

    return 0;

  error:
6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287
    Py_CLEAR(copyreg);
    Py_CLEAR(dispatch_table);
    Py_CLEAR(extension_registry);
    Py_CLEAR(inverted_registry);
    Py_CLEAR(extension_cache);
    Py_CLEAR(compat_pickle);
    Py_CLEAR(name_mapping_2to3);
    Py_CLEAR(import_mapping_2to3);
    Py_CLEAR(name_mapping_3to2);
    Py_CLEAR(import_mapping_3to2);
    Py_CLEAR(empty_tuple);
    Py_CLEAR(two_tuple);
6288 6289 6290 6291 6292 6293 6294 6295
    return -1;
}

static struct PyModuleDef _picklemodule = {
    PyModuleDef_HEAD_INIT,
    "_pickle",
    pickle_module_doc,
    -1,
6296
    pickle_methods,
6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313
    NULL,
    NULL,
    NULL,
    NULL
};

PyMODINIT_FUNC
PyInit__pickle(void)
{
    PyObject *m;

    if (PyType_Ready(&Unpickler_Type) < 0)
        return NULL;
    if (PyType_Ready(&Pickler_Type) < 0)
        return NULL;
    if (PyType_Ready(&Pdata_Type) < 0)
        return NULL;
6314 6315 6316 6317
    if (PyType_Ready(&PicklerMemoProxyType) < 0)
        return NULL;
    if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
        return NULL;
6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348

    /* Create the module and add the functions. */
    m = PyModule_Create(&_picklemodule);
    if (m == NULL)
        return NULL;

    if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
        return NULL;
    if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
        return NULL;

    /* Initialize the exceptions. */
    PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
    if (PickleError == NULL)
        return NULL;
    PicklingError = \
        PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
    if (PicklingError == NULL)
        return NULL;
    UnpicklingError = \
        PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
    if (UnpicklingError == NULL)
        return NULL;

    if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
        return NULL;
    if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
        return NULL;
    if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
        return NULL;

6349
    if (initmodule() < 0)
6350 6351 6352 6353
        return NULL;

    return m;
}