Commit bcd5cbe0 authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #4751: hashlib now releases the GIL when hashing large buffers

(with a hardwired threshold of 2048 bytes), allowing better parallelization
on multi-CPU systems. Contributed by Lukas Lueg (ebfe) and Victor Stinner.
parent 5bad41ee
...@@ -35,6 +35,11 @@ to the buffer interface (normally :class:`bytes` objects) using the ...@@ -35,6 +35,11 @@ to the buffer interface (normally :class:`bytes` objects) using the
concatenation of the data fed to it so far using the :meth:`digest` or concatenation of the data fed to it so far using the :meth:`digest` or
:meth:`hexdigest` methods. :meth:`hexdigest` methods.
.. note::
For better multithreading performance, the Python GIL is released for
strings of more than 2047 bytes at object creation or on update.
.. note:: .. note::
Feeding string objects is to :meth:`update` is not supported, as hashes work Feeding string objects is to :meth:`update` is not supported, as hashes work
......
...@@ -198,6 +198,19 @@ class HashLibTestCase(unittest.TestCase): ...@@ -198,6 +198,19 @@ class HashLibTestCase(unittest.TestCase):
"e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+ "e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+
"de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b") "de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b")
def test_gil(self):
# Check things work fine with an input larger than the size required
# for multithreaded operation (which is hardwired to 2048).
gil_minsize = 2048
m = hashlib.md5()
m.update(b'1')
m.update(b'#' * gil_minsize)
m.update(b'1')
self.assertEquals(m.hexdigest(), 'cb1e1a2cbc80be75e19935d621fb9b21')
m = hashlib.md5(b'x' * gil_minsize)
self.assertEquals(m.hexdigest(), 'cfb767f225d58469c5de3632a8803958')
def test_main(): def test_main():
support.run_unittest(HashLibTestCase) support.run_unittest(HashLibTestCase)
......
...@@ -256,6 +256,10 @@ C-API ...@@ -256,6 +256,10 @@ C-API
Extension Modules Extension Modules
----------------- -----------------
- Issue #4751: hashlib now releases the GIL when hashing large buffers
(with a hardwired threshold of 2048 bytes), allowing better parallelization
on multi-CPU systems. Contributed by Lukas Lueg (ebfe) and Victor Stinner.
- Issue #4051: Prevent conflict of UNICODE macros in cPickle. - Issue #4051: Prevent conflict of UNICODE macros in cPickle.
- Issue #4738: Each zlib object now has a separate lock, allowing to compress - Issue #4738: Each zlib object now has a separate lock, allowing to compress
......
...@@ -26,15 +26,35 @@ ...@@ -26,15 +26,35 @@
#define HASH_OBJ_CONSTRUCTOR 0 #define HASH_OBJ_CONSTRUCTOR 0
#endif #endif
#define HASHLIB_GIL_MINSIZE 2048
#ifdef WITH_THREAD
#include "pythread.h"
#define ENTER_HASHLIB(obj) \
if ((obj)->lock) { \
if (!PyThread_acquire_lock((obj)->lock, 0)) { \
Py_BEGIN_ALLOW_THREADS \
PyThread_acquire_lock((obj)->lock, 1); \
Py_END_ALLOW_THREADS \
} \
}
#define LEAVE_HASHLIB(obj) \
if ((obj)->lock) { \
PyThread_release_lock((obj)->lock); \
}
#else
#define ENTER_HASHLIB(obj)
#define LEAVE_HASHLIB(obj)
#endif
typedef struct { typedef struct {
PyObject_HEAD PyObject_HEAD
PyObject *name; /* name of this hash algorithm */ PyObject *name; /* name of this hash algorithm */
EVP_MD_CTX ctx; /* OpenSSL message digest context */ EVP_MD_CTX ctx; /* OpenSSL message digest context */
/* #ifdef WITH_THREAD
* TODO investigate performance impact of including a lock for this object PyThread_type_lock lock; /* OpenSSL context lock */
* here and releasing the Python GIL while hash updates are in progress. #endif
* (perhaps only release GIL if input length will take long to process?)
*/
} EVPobject; } EVPobject;
...@@ -63,19 +83,42 @@ newEVPobject(PyObject *name) ...@@ -63,19 +83,42 @@ newEVPobject(PyObject *name)
if (retval != NULL) { if (retval != NULL) {
Py_INCREF(name); Py_INCREF(name);
retval->name = name; retval->name = name;
#ifdef WITH_THREAD
retval->lock = NULL;
#endif
} }
return retval; return retval;
} }
static void
EVP_hash(EVPobject *self, const void *vp, Py_ssize_t len)
{
unsigned int process;
const unsigned char *cp = (const unsigned char *)vp;
while (0 < len) {
if (len > (Py_ssize_t)MUNCH_SIZE)
process = MUNCH_SIZE;
else
process = Py_SAFE_DOWNCAST(len, Py_ssize_t, unsigned int);
EVP_DigestUpdate(&self->ctx, (const void*)cp, process);
len -= process;
cp += process;
}
}
/* Internal methods for a hash object */ /* Internal methods for a hash object */
static void static void
EVP_dealloc(PyObject *ptr) EVP_dealloc(EVPobject *self)
{ {
EVP_MD_CTX_cleanup(&((EVPobject *)ptr)->ctx); #ifdef WITH_THREAD
Py_XDECREF(((EVPobject *)ptr)->name); if (self->lock != NULL)
PyObject_Del(ptr); PyThread_free_lock(self->lock);
#endif
EVP_MD_CTX_cleanup(&self->ctx);
Py_XDECREF(self->name);
PyObject_Del(self);
} }
...@@ -91,7 +134,9 @@ EVP_copy(EVPobject *self, PyObject *unused) ...@@ -91,7 +134,9 @@ EVP_copy(EVPobject *self, PyObject *unused)
if ( (newobj = newEVPobject(self->name))==NULL) if ( (newobj = newEVPobject(self->name))==NULL)
return NULL; return NULL;
ENTER_HASHLIB(self);
EVP_MD_CTX_copy(&newobj->ctx, &self->ctx); EVP_MD_CTX_copy(&newobj->ctx, &self->ctx);
LEAVE_HASHLIB(self);
return (PyObject *)newobj; return (PyObject *)newobj;
} }
...@@ -106,7 +151,9 @@ EVP_digest(EVPobject *self, PyObject *unused) ...@@ -106,7 +151,9 @@ EVP_digest(EVPobject *self, PyObject *unused)
PyObject *retval; PyObject *retval;
unsigned int digest_size; unsigned int digest_size;
ENTER_HASHLIB(self);
EVP_MD_CTX_copy(&temp_ctx, &self->ctx); EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
LEAVE_HASHLIB(self);
digest_size = EVP_MD_CTX_size(&temp_ctx); digest_size = EVP_MD_CTX_size(&temp_ctx);
EVP_DigestFinal(&temp_ctx, digest, NULL); EVP_DigestFinal(&temp_ctx, digest, NULL);
...@@ -128,7 +175,9 @@ EVP_hexdigest(EVPobject *self, PyObject *unused) ...@@ -128,7 +175,9 @@ EVP_hexdigest(EVPobject *self, PyObject *unused)
unsigned int i, j, digest_size; unsigned int i, j, digest_size;
/* Get the raw (binary) digest value */ /* Get the raw (binary) digest value */
ENTER_HASHLIB(self);
EVP_MD_CTX_copy(&temp_ctx, &self->ctx); EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
LEAVE_HASHLIB(self);
digest_size = EVP_MD_CTX_size(&temp_ctx); digest_size = EVP_MD_CTX_size(&temp_ctx);
EVP_DigestFinal(&temp_ctx, digest, NULL); EVP_DigestFinal(&temp_ctx, digest, NULL);
...@@ -155,7 +204,12 @@ EVP_hexdigest(EVPobject *self, PyObject *unused) ...@@ -155,7 +204,12 @@ EVP_hexdigest(EVPobject *self, PyObject *unused)
} }
#define MY_GET_BUFFER_VIEW_OR_ERROUT(obj, viewp) do { \ #define MY_GET_BUFFER_VIEW_OR_ERROUT(obj, viewp) do { \
if (PyUnicode_Check(obj) || !PyObject_CheckBuffer((obj))) { \ if (PyUnicode_Check((obj))) { \
PyErr_SetString(PyExc_TypeError, \
"Unicode-objects must be encoded before hashing");\
return NULL; \
} \
if (!PyObject_CheckBuffer((obj))) { \
PyErr_SetString(PyExc_TypeError, \ PyErr_SetString(PyExc_TypeError, \
"object supporting the buffer API required"); \ "object supporting the buffer API required"); \
return NULL; \ return NULL; \
...@@ -184,21 +238,32 @@ EVP_update(EVPobject *self, PyObject *args) ...@@ -184,21 +238,32 @@ EVP_update(EVPobject *self, PyObject *args)
return NULL; return NULL;
MY_GET_BUFFER_VIEW_OR_ERROUT(obj, &view); MY_GET_BUFFER_VIEW_OR_ERROUT(obj, &view);
if (view.len > 0 && view.len <= MUNCH_SIZE) {
EVP_DigestUpdate(&self->ctx, view.buf, view.len); #ifdef WITH_THREAD
} else { if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) {
Py_ssize_t offset = 0, len = view.len; self->lock = PyThread_allocate_lock();
while (len) { if (self->lock == NULL) {
unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len; PyBuffer_Release(&view);
EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf + offset, process); PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
len -= process; return NULL;
offset += process;
} }
} }
PyBuffer_Release(&view);
Py_INCREF(Py_None); if (self->lock != NULL) {
return Py_None; Py_BEGIN_ALLOW_THREADS
PyThread_acquire_lock(self->lock, 1);
EVP_hash(self, view.buf, view.len);
PyThread_release_lock(self->lock);
Py_END_ALLOW_THREADS
} else {
EVP_hash(self, view.buf, view.len);
}
#else
EVP_hash(self, view.buf, view.len);
#endif
PyBuffer_Release(&view);
Py_RETURN_NONE;
} }
static PyMethodDef EVP_methods[] = { static PyMethodDef EVP_methods[] = {
...@@ -212,13 +277,21 @@ static PyMethodDef EVP_methods[] = { ...@@ -212,13 +277,21 @@ static PyMethodDef EVP_methods[] = {
static PyObject * static PyObject *
EVP_get_block_size(EVPobject *self, void *closure) EVP_get_block_size(EVPobject *self, void *closure)
{ {
return PyLong_FromLong(EVP_MD_CTX_block_size(&((EVPobject *)self)->ctx)); long block_size;
ENTER_HASHLIB(self);
block_size = EVP_MD_CTX_block_size(&self->ctx);
LEAVE_HASHLIB(self);
return PyLong_FromLong(block_size);
} }
static PyObject * static PyObject *
EVP_get_digest_size(EVPobject *self, void *closure) EVP_get_digest_size(EVPobject *self, void *closure)
{ {
return PyLong_FromLong(EVP_MD_CTX_size(&((EVPobject *)self)->ctx)); long size;
ENTER_HASHLIB(self);
size = EVP_MD_CTX_size(&self->ctx);
LEAVE_HASHLIB(self);
return PyLong_FromLong(size);
} }
static PyMemberDef EVP_members[] = { static PyMemberDef EVP_members[] = {
...@@ -246,11 +319,11 @@ static PyGetSetDef EVP_getseters[] = { ...@@ -246,11 +319,11 @@ static PyGetSetDef EVP_getseters[] = {
static PyObject * static PyObject *
EVP_repr(PyObject *self) EVP_repr(EVPobject *self)
{ {
char buf[100]; char buf[100];
PyOS_snprintf(buf, sizeof(buf), "<%s HASH object @ %p>", PyOS_snprintf(buf, sizeof(buf), "<%s HASH object @ %p>",
_PyUnicode_AsString(((EVPobject *)self)->name), self); _PyUnicode_AsString(self->name), self);
return PyUnicode_FromString(buf); return PyUnicode_FromString(buf);
} }
...@@ -293,17 +366,12 @@ EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds) ...@@ -293,17 +366,12 @@ EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds)
Py_INCREF(self->name); Py_INCREF(self->name);
if (data_obj) { if (data_obj) {
if (len > 0 && len <= MUNCH_SIZE) { if (view.len >= HASHLIB_GIL_MINSIZE) {
EVP_DigestUpdate(&self->ctx, cp, Py_SAFE_DOWNCAST(len, Py_ssize_t, Py_BEGIN_ALLOW_THREADS
unsigned int)); EVP_hash(self, view.buf, view.len);
Py_END_ALLOW_THREADS
} else { } else {
Py_ssize_t offset = 0, len = view.len; EVP_hash(self, view.buf, view.len);
while (len) {
unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf + offset, process);
len -= process;
offset += process;
}
} }
PyBuffer_Release(&view); PyBuffer_Release(&view);
} }
...@@ -335,12 +403,12 @@ static PyTypeObject EVPtype = { ...@@ -335,12 +403,12 @@ static PyTypeObject EVPtype = {
sizeof(EVPobject), /*tp_basicsize*/ sizeof(EVPobject), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
/* methods */ /* methods */
EVP_dealloc, /*tp_dealloc*/ (destructor)EVP_dealloc, /*tp_dealloc*/
0, /*tp_print*/ 0, /*tp_print*/
0, /*tp_getattr*/ 0, /*tp_getattr*/
0, /*tp_setattr*/ 0, /*tp_setattr*/
0, /*tp_compare*/ 0, /*tp_compare*/
EVP_repr, /*tp_repr*/ (reprfunc)EVP_repr, /*tp_repr*/
0, /*tp_as_number*/ 0, /*tp_as_number*/
0, /*tp_as_sequence*/ 0, /*tp_as_sequence*/
0, /*tp_as_mapping*/ 0, /*tp_as_mapping*/
...@@ -395,17 +463,12 @@ EVPnew(PyObject *name_obj, ...@@ -395,17 +463,12 @@ EVPnew(PyObject *name_obj,
} }
if (cp && len) { if (cp && len) {
if (len > 0 && len <= MUNCH_SIZE) { if (len >= HASHLIB_GIL_MINSIZE) {
EVP_DigestUpdate(&self->ctx, cp, Py_SAFE_DOWNCAST(len, Py_ssize_t, Py_BEGIN_ALLOW_THREADS
unsigned int)); EVP_hash(self, cp, len);
Py_END_ALLOW_THREADS
} else { } else {
Py_ssize_t offset = 0; EVP_hash(self, cp, len);
while (len) {
unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
EVP_DigestUpdate(&self->ctx, cp + offset, process);
len -= process;
offset += process;
}
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment