Commit 5103ab4c authored by Victor Stinner's avatar Victor Stinner

hashtable.h now supports keys of any size

Issue #26588: hashtable.h now supports keys of any size, not only
sizeof(void*). It allows to support key larger than sizeof(void*), but also to
use less memory for key smaller than sizeof(void*).
parent 68068073
......@@ -196,23 +196,38 @@ set_reentrant(int reentrant)
}
#endif
static Py_uhash_t
hashtable_hash_pyobject(size_t key_size, const void *pkey)
{
PyObject *obj;
_Py_HASHTABLE_READ_KEY(key_size, pkey, obj);
return PyObject_Hash(obj);
}
static int
hashtable_compare_unicode(const void *key, const _Py_hashtable_entry_t *entry)
hashtable_compare_unicode(size_t key_size, const void *pkey,
const _Py_hashtable_entry_t *entry)
{
if (key != NULL && entry->key != NULL)
return (PyUnicode_Compare((PyObject *)key, (PyObject *)entry->key) == 0);
PyObject *key, *entry_key;
_Py_HASHTABLE_READ_KEY(key_size, pkey, key);
_Py_HASHTABLE_ENTRY_READ_KEY(key_size, entry, entry_key);
if (key != NULL && entry_key != NULL)
return (PyUnicode_Compare(key, entry_key) == 0);
else
return key == entry->key;
return key == entry_key;
}
static _Py_hashtable_allocator_t hashtable_alloc = {malloc, free};
static _Py_hashtable_t *
hashtable_new(size_t data_size,
hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
return _Py_hashtable_new_full(data_size, 0,
return _Py_hashtable_new_full(key_size, data_size, 0,
hash_func, compare_func,
NULL, NULL, NULL, &hashtable_alloc);
}
......@@ -230,20 +245,25 @@ raw_free(void *ptr)
}
static Py_uhash_t
hashtable_hash_traceback(const void *key)
hashtable_hash_traceback(size_t key_size, const void *pkey)
{
const traceback_t *traceback = key;
const traceback_t *traceback;
_Py_HASHTABLE_READ_KEY(key_size, pkey, traceback);
return traceback->hash;
}
static int
hashtable_compare_traceback(const traceback_t *traceback1,
hashtable_compare_traceback(size_t key_size, const void *pkey,
const _Py_hashtable_entry_t *he)
{
const traceback_t *traceback2 = he->key;
traceback_t *traceback1, *traceback2;
const frame_t *frame1, *frame2;
int i;
_Py_HASHTABLE_READ_KEY(key_size, pkey, traceback1);
_Py_HASHTABLE_ENTRY_READ_KEY(key_size, he, traceback2);
if (traceback1->nframe != traceback2->nframe)
return 0;
......@@ -312,15 +332,16 @@ tracemalloc_get_frame(PyFrameObject *pyframe, frame_t *frame)
}
/* intern the filename */
entry = _Py_hashtable_get_entry(tracemalloc_filenames, filename);
entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_filenames, filename);
if (entry != NULL) {
filename = (PyObject *)entry->key;
_Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_filenames->key_size, entry,
filename);
}
else {
/* tracemalloc_filenames is responsible to keep a reference
to the filename */
Py_INCREF(filename);
if (_Py_hashtable_set(tracemalloc_filenames, filename, NULL, 0) < 0) {
if (_Py_HASHTABLE_SET_NODATA(tracemalloc_filenames, filename) < 0) {
Py_DECREF(filename);
#ifdef TRACE_DEBUG
tracemalloc_error("failed to intern the filename");
......@@ -403,9 +424,10 @@ traceback_new(void)
traceback->hash = traceback_hash(traceback);
/* intern the traceback */
entry = _Py_hashtable_get_entry(tracemalloc_tracebacks, traceback);
entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_tracebacks, traceback);
if (entry != NULL) {
traceback = (traceback_t *)entry->key;
_Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_tracebacks->key_size, entry,
traceback);
}
else {
traceback_t *copy;
......@@ -422,7 +444,7 @@ traceback_new(void)
}
memcpy(copy, traceback, traceback_size);
if (_Py_hashtable_set(tracemalloc_tracebacks, copy, NULL, 0) < 0) {
if (_Py_HASHTABLE_SET_NODATA(tracemalloc_tracebacks, copy) < 0) {
raw_free(copy);
#ifdef TRACE_DEBUG
tracemalloc_error("failed to intern the traceback: putdata failed");
......@@ -464,7 +486,7 @@ tracemalloc_remove_trace(void *ptr)
{
trace_t trace;
if (_Py_hashtable_pop(tracemalloc_traces, ptr, &trace, sizeof(trace))) {
if (_Py_HASHTABLE_POP(tracemalloc_traces, ptr, trace)) {
assert(tracemalloc_traced_memory >= trace.size);
tracemalloc_traced_memory -= trace.size;
}
......@@ -714,17 +736,23 @@ tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size)
#endif /* TRACE_RAW_MALLOC */
static int
tracemalloc_clear_filename(_Py_hashtable_entry_t *entry, void *user_data)
tracemalloc_clear_filename(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
void *user_data)
{
PyObject *filename = (PyObject *)entry->key;
PyObject *filename;
_Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, filename);
Py_DECREF(filename);
return 0;
}
static int
traceback_free_traceback(_Py_hashtable_entry_t *entry, void *user_data)
traceback_free_traceback(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
void *user_data)
{
traceback_t *traceback = (traceback_t *)entry->key;
traceback_t *traceback;
_Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, traceback);
raw_free(traceback);
return 0;
}
......@@ -791,21 +819,20 @@ tracemalloc_init(void)
}
#endif
tracemalloc_filenames = hashtable_new(0,
(_Py_hashtable_hash_func)PyObject_Hash,
tracemalloc_filenames = hashtable_new(sizeof(PyObject *), 0,
hashtable_hash_pyobject,
hashtable_compare_unicode);
tracemalloc_tracebacks = hashtable_new(0,
(_Py_hashtable_hash_func)hashtable_hash_traceback,
(_Py_hashtable_compare_func)hashtable_compare_traceback);
tracemalloc_tracebacks = hashtable_new(sizeof(traceback_t *), 0,
hashtable_hash_traceback,
hashtable_compare_traceback);
tracemalloc_traces = hashtable_new(sizeof(trace_t),
tracemalloc_traces = hashtable_new(sizeof(void*), sizeof(trace_t),
_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct);
if (tracemalloc_filenames == NULL || tracemalloc_tracebacks == NULL
|| tracemalloc_traces == NULL)
{
|| tracemalloc_traces == NULL) {
PyErr_NoMemory();
return -1;
}
......@@ -840,9 +867,9 @@ tracemalloc_deinit(void)
tracemalloc_stop();
/* destroy hash tables */
_Py_hashtable_destroy(tracemalloc_traces);
_Py_hashtable_destroy(tracemalloc_tracebacks);
_Py_hashtable_destroy(tracemalloc_filenames);
_Py_hashtable_destroy(tracemalloc_traces);
#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC)
if (tables_lock != NULL) {
......@@ -935,8 +962,9 @@ tracemalloc_stop(void)
PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem);
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj);
/* release memory */
tracemalloc_clear_traces();
/* release memory */
raw_free(tracemalloc_traceback);
tracemalloc_traceback = NULL;
}
......@@ -1065,14 +1093,15 @@ typedef struct {
} get_traces_t;
static int
tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data)
tracemalloc_get_traces_fill(_Py_hashtable_t *traces, _Py_hashtable_entry_t *entry,
void *user_data)
{
get_traces_t *get_traces = user_data;
trace_t *trace;
PyObject *tracemalloc_obj;
int res;
trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(entry);
trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(traces, entry);
tracemalloc_obj = trace_to_pyobject(trace, get_traces->tracebacks);
if (tracemalloc_obj == NULL)
......@@ -1087,9 +1116,11 @@ tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data)
}
static int
tracemalloc_pyobject_decref_cb(_Py_hashtable_entry_t *entry, void *user_data)
tracemalloc_pyobject_decref_cb(_Py_hashtable_t *tracebacks,
_Py_hashtable_entry_t *entry,
void *user_data)
{
PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(tracebacks, entry);
Py_DECREF(obj);
return 0;
}
......@@ -1120,7 +1151,7 @@ py_tracemalloc_get_traces(PyObject *self, PyObject *obj)
/* the traceback hash table is used temporarily to intern traceback tuple
of (filename, lineno) tuples */
get_traces.tracebacks = hashtable_new(sizeof(PyObject *),
get_traces.tracebacks = hashtable_new(sizeof(traceback_t *), sizeof(PyObject *),
_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct);
if (get_traces.tracebacks == NULL) {
......@@ -1152,7 +1183,7 @@ error:
finally:
if (get_traces.tracebacks != NULL) {
_Py_hashtable_foreach(get_traces.tracebacks,
tracemalloc_pyobject_decref_cb, NULL);
tracemalloc_pyobject_decref_cb, NULL);
_Py_hashtable_destroy(get_traces.tracebacks);
}
if (get_traces.traces != NULL)
......
This diff is collapsed.
#ifndef Py_HASHTABLE_H
#define Py_HASHTABLE_H
/* The whole API is private */
#ifndef Py_LIMITED_API
/* Single linked list */
typedef struct _Py_slist_item_s {
struct _Py_slist_item_s *next;
} _Py_slist_item_t;
......@@ -16,30 +17,55 @@ typedef struct {
#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head)
/* _Py_hashtable: table entry */
typedef struct {
/* used by _Py_hashtable_t.buckets to link entries */
_Py_slist_item_t _Py_slist_item;
const void *key;
Py_uhash_t key_hash;
/* data follows */
/* key (key_size bytes) and then data (data_size bytes) follows */
} _Py_hashtable_entry_t;
#define _Py_HASHTABLE_ENTRY_DATA(ENTRY) \
((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t))
#define _Py_HASHTABLE_ENTRY_KEY(ENTRY) \
((const void *)((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t)))
#define _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY) \
((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t) + (TABLE)->key_size)
#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(TABLE, ENTRY) \
(*(void **)_Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY))
/* Get a key value from pkey: use memcpy() rather than a pointer dereference
to avoid memory alignment issues. */
#define _Py_HASHTABLE_READ_KEY(KEY_SIZE, PKEY, DST_KEY) \
do { \
assert(sizeof(DST_KEY) == (KEY_SIZE)); \
memcpy(&(DST_KEY), (PKEY), sizeof(DST_KEY)); \
} while (0)
#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \
(*(void **)_Py_HASHTABLE_ENTRY_DATA(ENTRY))
#define _Py_HASHTABLE_ENTRY_READ_KEY(KEY_SIZE, ENTRY, KEY) \
do { \
assert(sizeof(KEY) == (KEY_SIZE)); \
memcpy(&(KEY), _Py_HASHTABLE_ENTRY_KEY(ENTRY), sizeof(KEY)); \
} while (0)
#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \
#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, ENTRY, DATA_SIZE, DATA) \
do { \
assert((DATA_SIZE) == (TABLE)->data_size); \
memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \
memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY), DATA_SIZE); \
} while (0)
typedef Py_uhash_t (*_Py_hashtable_hash_func) (const void *key);
typedef int (*_Py_hashtable_compare_func) (const void *key, const _Py_hashtable_entry_t *he);
/* _Py_hashtable: prototypes */
typedef Py_uhash_t (*_Py_hashtable_hash_func) (size_t key_size,
const void *pkey);
typedef int (*_Py_hashtable_compare_func) (size_t key_size,
const void *pkey,
const _Py_hashtable_entry_t *he);
typedef void* (*_Py_hashtable_copy_data_func)(void *data);
typedef void (*_Py_hashtable_free_data_func)(void *data);
typedef size_t (*_Py_hashtable_get_data_size_func)(void *data);
......@@ -52,10 +78,14 @@ typedef struct {
void (*free) (void *ptr);
} _Py_hashtable_allocator_t;
/* _Py_hashtable: table */
typedef struct {
size_t num_buckets;
size_t entries; /* Total number of entries in the table. */
_Py_slist_t *buckets;
size_t key_size;
size_t data_size;
_Py_hashtable_hash_func hash_func;
......@@ -66,16 +96,25 @@ typedef struct {
_Py_hashtable_allocator_t alloc;
} _Py_hashtable_t;
/* hash and compare functions for integers and pointers */
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key);
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_int(const void *key);
PyAPI_FUNC(int) _Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry);
/* hash a pointer (void*) */
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
size_t key_size,
const void *pkey);
/* comparison using memcmp() */
PyAPI_FUNC(int) _Py_hashtable_compare_direct(
size_t key_size,
const void *pkey,
const _Py_hashtable_entry_t *entry);
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new(
size_t key_size,
size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func);
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full(
size_t key_size,
size_t data_size,
size_t init_size,
_Py_hashtable_hash_func hash_func,
......@@ -84,45 +123,95 @@ PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full(
_Py_hashtable_free_data_func free_data_func,
_Py_hashtable_get_data_size_func get_data_size_func,
_Py_hashtable_allocator_t *allocator);
PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht);
/* Return a copy of the hash table */
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src);
PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht);
PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht);
typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_entry_t *entry, void *arg);
typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_t *ht,
_Py_hashtable_entry_t *entry,
void *arg);
/* Call func() on each entry of the hashtable.
Iteration stops if func() result is non-zero, in this case it's the result
of the call. Otherwise, the function returns 0. */
PyAPI_FUNC(int) _Py_hashtable_foreach(
_Py_hashtable_t *ht,
_Py_hashtable_foreach_func func, void *arg);
_Py_hashtable_foreach_func func,
void *arg);
PyAPI_FUNC(size_t) _Py_hashtable_size(_Py_hashtable_t *ht);
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
_Py_hashtable_t *ht,
const void *key);
/* Add a new entry to the hash. The key must not be present in the hash table.
Return 0 on success, -1 on memory error.
Don't call directly this function,
but use _Py_HASHTABLE_SET() and _Py_HASHTABLE_SET_NODATA() macros */
PyAPI_FUNC(int) _Py_hashtable_set(
_Py_hashtable_t *ht,
const void *key,
void *data,
size_t data_size);
size_t key_size,
const void *pkey,
size_t data_size,
void *data);
#define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \
_Py_hashtable_set(TABLE, sizeof(KEY), &KEY, sizeof(DATA), &(DATA))
#define _Py_HASHTABLE_SET_NODATA(TABLE, KEY) \
_Py_hashtable_set(TABLE, sizeof(KEY), &KEY, 0, NULL)
/* Get an entry.
Return NULL if the key does not exist.
Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
macro */
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey);
#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
_Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
/* Get data from an entry. Copy entry data into data and return 1 if the entry
exists, return 0 if the entry does not exist.
Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
PyAPI_FUNC(int) _Py_hashtable_get(
_Py_hashtable_t *ht,
const void *key,
void *data,
size_t data_size);
size_t key_size,
const void *pkey,
size_t data_size,
void *data);
#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
_Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
/* Don't call directly this function, but use _Py_HASHTABLE_POP() macro */
PyAPI_FUNC(int) _Py_hashtable_pop(
_Py_hashtable_t *ht,
const void *key,
void *data,
size_t data_size);
PyAPI_FUNC(void) _Py_hashtable_delete(
_Py_hashtable_t *ht,
const void *key);
size_t key_size,
const void *pkey,
size_t data_size,
void *data);
#define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \
_Py_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA))
#define _Py_HASHTABLE_POP(TABLE, KEY, DATA) \
_Py_hashtable_pop(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
_Py_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA))
#endif /* Py_LIMITED_API */
/* Delete an entry.
WARNING: The entry must exist. */
PyAPI_FUNC(void) _Py_hashtable_delete(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey);
#endif /* Py_LIMITED_API */
#endif
......@@ -263,10 +263,10 @@ w_ref(PyObject *v, char *flag, WFILE *p)
if (Py_REFCNT(v) == 1)
return 0;
entry = _Py_hashtable_get_entry(p->hashtable, v);
entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
if (entry != NULL) {
/* write the reference index to the stream */
_Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, &w, sizeof(w), entry);
_Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, sizeof(w), &w);
/* we don't store "long" indices in the dict */
assert(0 <= w && w <= 0x7fffffff);
w_byte(TYPE_REF, p);
......@@ -571,7 +571,8 @@ static int
w_init_refs(WFILE *wf, int version)
{
if (version >= 3) {
wf->hashtable = _Py_hashtable_new(sizeof(int), _Py_hashtable_hash_ptr,
wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct);
if (wf->hashtable == NULL) {
PyErr_NoMemory();
......@@ -582,9 +583,13 @@ w_init_refs(WFILE *wf, int version)
}
static int
w_decref_entry(_Py_hashtable_entry_t *entry, void *Py_UNUSED(data))
w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
void *Py_UNUSED(data))
{
Py_XDECREF(entry->key);
PyObject *entry_key;
_Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, entry_key);
Py_XDECREF(entry_key);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment