Commit 121b9487 authored by Christian Heimes's avatar Christian Heimes

Issue #26798: Add BLAKE2 (blake2b and blake2s) to hashlib.

parent 5d75f441
......@@ -15,5 +15,6 @@ Here's an overview:
.. toctree::
hashlib.rst
hashlib-blake2.rst
hmac.rst
secrets.rst
This diff was suppressed by a .gitattributes entry.
This diff is collapsed.
......@@ -65,11 +65,15 @@ concatenation of the data fed to it so far using the :meth:`digest` or
Constructors for hash algorithms that are always present in this module are
:func:`sha1`, :func:`sha224`, :func:`sha256`, :func:`sha384`,
and :func:`sha512`. :func:`md5` is normally available as well, though it
:func:`sha512`, :func:`blake2b`, and :func:`blake2s`.
:func:`md5` is normally available as well, though it
may be missing if you are using a rare "FIPS compliant" build of Python.
Additional algorithms may also be available depending upon the OpenSSL
library that Python uses on your platform.
.. versionadded:: 3.6
:func:`blake2b` and :func:`blake2s` were added.
For example, to obtain the digest of the byte string ``b'Nobody inspects the
spammish repetition'``::
......@@ -243,6 +247,12 @@ include a `salt <https://en.wikipedia.org/wiki/Salt_%28cryptography%29>`_.
.. versionadded:: 3.6
BLAKE2
------
BLAKE2 takes additional arguments, see :ref:`hashlib-blake2`.
.. seealso::
Module :mod:`hmac`
......@@ -251,6 +261,8 @@ include a `salt <https://en.wikipedia.org/wiki/Salt_%28cryptography%29>`_.
Module :mod:`base64`
Another way to encode binary hashes for non-binary environments.
See :ref:`hashlib-blake2`.
http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
The FIPS 180-2 publication on Secure Hash Algorithms.
......
......@@ -4,14 +4,14 @@
__doc__ = """hashlib module - A common interface to many hash functions.
new(name, data=b'') - returns a new hash object implementing the
given hash function; initializing the hash
using the given binary data.
new(name, data=b'', **kwargs) - returns a new hash object implementing the
given hash function; initializing the hash
using the given binary data.
Named constructor functions are also available, these are faster
than using new(name):
md5(), sha1(), sha224(), sha256(), sha384(), and sha512()
md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), and blake2s()
More algorithms may be available on your platform but the above are guaranteed
to exist. See the algorithms_guaranteed and algorithms_available attributes
......@@ -54,7 +54,8 @@ More condensed:
# This tuple and __get_builtin_constructor() must be modified if a new
# always available algorithm is added.
__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512')
__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512',
'blake2b', 'blake2s')
algorithms_guaranteed = set(__always_supported)
algorithms_available = set(__always_supported)
......@@ -85,6 +86,10 @@ def __get_builtin_constructor(name):
import _sha512
cache['SHA384'] = cache['sha384'] = _sha512.sha384
cache['SHA512'] = cache['sha512'] = _sha512.sha512
elif name in ('blake2b', 'blake2s'):
import _blake2
cache['blake2b'] = _blake2.blake2b
cache['blake2s'] = _blake2.blake2s
except ImportError:
pass # no extension module, this hash is unsupported.
......@@ -107,17 +112,23 @@ def __get_openssl_constructor(name):
return __get_builtin_constructor(name)
def __py_new(name, data=b''):
"""new(name, data=b'') - Return a new hashing object using the named algorithm;
optionally initialized with data (which must be bytes).
def __py_new(name, data=b'', **kwargs):
"""new(name, data=b'', **kwargs) - Return a new hashing object using the
named algorithm; optionally initialized with data (which must be bytes).
"""
return __get_builtin_constructor(name)(data)
return __get_builtin_constructor(name)(data, **kwargs)
def __hash_new(name, data=b''):
def __hash_new(name, data=b'', **kwargs):
"""new(name, data=b'') - Return a new hashing object using the named algorithm;
optionally initialized with data (which must be bytes).
"""
if name in {'blake2b', 'blake2s'}:
# Prefer our blake2 implementation.
# OpenSSL 1.1.0 comes with a limited implementation of blake2b/s.
# It does neither support keyed blake2 nor advanced features like
# salt, personal, tree hashing or SSE.
return __get_builtin_constructor(name)(data, **kwargs)
try:
return _hashlib.new(name, data)
except ValueError:
......@@ -218,6 +229,7 @@ for __func_name in __always_supported:
import logging
logging.exception('code for hash %s was not found.', __func_name)
# Cleanup locals()
del __always_supported, __func_name, __get_hash
del __py_new, __hash_new, __get_openssl_constructor
This diff is collapsed.
......@@ -541,7 +541,7 @@ coverage-report:
# Run "Argument Clinic" over all source files
# (depends on python having already been built)
.PHONY=clinic
clinic: $(BUILDPYTHON)
clinic: $(BUILDPYTHON) Modules/_blake2/blake2s_impl.c
$(RUNSHARED) $(PYTHON_FOR_BUILD) ./Tools/clinic/clinic.py --make
# Build the interpreter
......@@ -571,6 +571,11 @@ pybuilddir.txt: $(BUILDPYTHON)
Modules/_math.o: Modules/_math.c Modules/_math.h
$(CC) -c $(CCSHARED) $(PY_CORE_CFLAGS) -o $@ $<
# blake2s is auto-generated from blake2b
Modules/_blake2/blake2s_impl.c: $(BUILDPYTHON) Modules/_blake2/blake2b_impl.c Modules/_blake2/blake2b2s.py
$(RUNSHARED) $(PYTHON_FOR_BUILD) Modules/_blake2/blake2b2s.py
$(RUNSHARED) $(PYTHON_FOR_BUILD) Tools/clinic/clinic.py -f $@
# Build the shared modules
# Under GNU make, MAKEFLAGS are sorted and normalized; the 's' for
# -s, --silent or --quiet is always the first char.
......@@ -584,6 +589,7 @@ sharedmods: $(BUILDPYTHON) pybuilddir.txt Modules/_math.o
_TCLTK_INCLUDES='$(TCLTK_INCLUDES)' _TCLTK_LIBS='$(TCLTK_LIBS)' \
$(PYTHON_FOR_BUILD) $(srcdir)/setup.py $$quiet build
# Build static library
# avoid long command lines, same as LIBRARY_OBJS
$(LIBRARY): $(LIBRARY_OBJS)
......
......@@ -89,6 +89,8 @@ Core and Builtins
Library
-------
- Issue #26798: Add BLAKE2 (blake2b and blake2s) to hashlib.
- Issue #25596: Optimized glob() and iglob() functions in the
glob module; they are now about 3--6 times faster.
......
#!/usr/bin/python3
import os
import re
HERE = os.path.dirname(os.path.abspath(__file__))
BLAKE2 = os.path.join(HERE, 'impl')
PUBLIC_SEARCH = re.compile(r'\ int (blake2[bs]p?[a-z_]*)\(')
def getfiles():
for name in os.listdir(BLAKE2):
name = os.path.join(BLAKE2, name)
if os.path.isfile(name):
yield name
def find_public():
public_funcs = set()
for name in getfiles():
with open(name) as f:
for line in f:
# find public functions
mo = PUBLIC_SEARCH.search(line)
if mo:
public_funcs.add(mo.group(1))
for f in sorted(public_funcs):
print('#define {0:<18} PyBlake2_{0}'.format(f))
return public_funcs
def main():
lines = []
with open(os.path.join(HERE, 'blake2b_impl.c')) as f:
for line in f:
line = line.replace('blake2b', 'blake2s')
line = line.replace('BLAKE2b', 'BLAKE2s')
line = line.replace('BLAKE2B', 'BLAKE2S')
lines.append(line)
with open(os.path.join(HERE, 'blake2s_impl.c'), 'w') as f:
f.write(''.join(lines))
# find_public()
if __name__ == '__main__':
main()
This diff is collapsed.
/*
* Written in 2013 by Dmitry Chestnykh <dmitry@codingrobots.com>
* Modified for CPython by Christian Heimes <christian@python.org>
*
* To the extent possible under law, the author have dedicated all
* copyright and related and neighboring rights to this software to
* the public domain worldwide. This software is distributed without
* any warranty. http://creativecommons.org/publicdomain/zero/1.0/
*/
#include "Python.h"
#include "impl/blake2.h"
extern PyTypeObject PyBlake2_BLAKE2bType;
extern PyTypeObject PyBlake2_BLAKE2sType;
PyDoc_STRVAR(blake2mod__doc__,
"_blake2b provides BLAKE2b for hashlib\n"
);
static struct PyMethodDef blake2mod_functions[] = {
{NULL, NULL}
};
static struct PyModuleDef blake2_module = {
PyModuleDef_HEAD_INIT,
"_blake2",
blake2mod__doc__,
-1,
blake2mod_functions,
NULL,
NULL,
NULL,
NULL
};
#define ADD_INT(d, name, value) do { \
PyObject *x = PyLong_FromLong(value); \
if (!x) { \
Py_DECREF(m); \
return NULL; \
} \
if (PyDict_SetItemString(d, name, x) < 0) { \
Py_DECREF(m); \
return NULL; \
} \
Py_DECREF(x); \
} while(0)
PyMODINIT_FUNC
PyInit__blake2(void)
{
PyObject *m;
PyObject *d;
m = PyModule_Create(&blake2_module);
if (m == NULL)
return NULL;
/* BLAKE2b */
Py_TYPE(&PyBlake2_BLAKE2bType) = &PyType_Type;
if (PyType_Ready(&PyBlake2_BLAKE2bType) < 0) {
return NULL;
}
Py_INCREF(&PyBlake2_BLAKE2bType);
PyModule_AddObject(m, "blake2b", (PyObject *)&PyBlake2_BLAKE2bType);
d = PyBlake2_BLAKE2bType.tp_dict;
ADD_INT(d, "SALT_SIZE", BLAKE2B_SALTBYTES);
ADD_INT(d, "PERSON_SIZE", BLAKE2B_PERSONALBYTES);
ADD_INT(d, "MAX_KEY_SIZE", BLAKE2B_KEYBYTES);
ADD_INT(d, "MAX_DIGEST_SIZE", BLAKE2B_OUTBYTES);
PyModule_AddIntConstant(m, "BLAKE2B_SALT_SIZE", BLAKE2B_SALTBYTES);
PyModule_AddIntConstant(m, "BLAKE2B_PERSON_SIZE", BLAKE2B_PERSONALBYTES);
PyModule_AddIntConstant(m, "BLAKE2B_MAX_KEY_SIZE", BLAKE2B_KEYBYTES);
PyModule_AddIntConstant(m, "BLAKE2B_MAX_DIGEST_SIZE", BLAKE2B_OUTBYTES);
/* BLAKE2s */
Py_TYPE(&PyBlake2_BLAKE2sType) = &PyType_Type;
if (PyType_Ready(&PyBlake2_BLAKE2sType) < 0) {
return NULL;
}
Py_INCREF(&PyBlake2_BLAKE2sType);
PyModule_AddObject(m, "blake2s", (PyObject *)&PyBlake2_BLAKE2sType);
d = PyBlake2_BLAKE2sType.tp_dict;
ADD_INT(d, "SALT_SIZE", BLAKE2S_SALTBYTES);
ADD_INT(d, "PERSON_SIZE", BLAKE2S_PERSONALBYTES);
ADD_INT(d, "MAX_KEY_SIZE", BLAKE2S_KEYBYTES);
ADD_INT(d, "MAX_DIGEST_SIZE", BLAKE2S_OUTBYTES);
PyModule_AddIntConstant(m, "BLAKE2S_SALT_SIZE", BLAKE2S_SALTBYTES);
PyModule_AddIntConstant(m, "BLAKE2S_PERSON_SIZE", BLAKE2S_PERSONALBYTES);
PyModule_AddIntConstant(m, "BLAKE2S_MAX_KEY_SIZE", BLAKE2S_KEYBYTES);
PyModule_AddIntConstant(m, "BLAKE2S_MAX_DIGEST_SIZE", BLAKE2S_OUTBYTES);
return m;
}
/* Prefix all public blake2 symbols with PyBlake2_
*/
#ifndef Py_BLAKE2_NS
#define Py_BLAKE2_NS
#define blake2b PyBlake2_blake2b
#define blake2b_compress PyBlake2_blake2b_compress
#define blake2b_final PyBlake2_blake2b_final
#define blake2b_init PyBlake2_blake2b_init
#define blake2b_init_key PyBlake2_blake2b_init_key
#define blake2b_init_param PyBlake2_blake2b_init_param
#define blake2b_update PyBlake2_blake2b_update
#define blake2bp PyBlake2_blake2bp
#define blake2bp_final PyBlake2_blake2bp_final
#define blake2bp_init PyBlake2_blake2bp_init
#define blake2bp_init_key PyBlake2_blake2bp_init_key
#define blake2bp_update PyBlake2_blake2bp_update
#define blake2s PyBlake2_blake2s
#define blake2s_compress PyBlake2_blake2s_compress
#define blake2s_final PyBlake2_blake2s_final
#define blake2s_init PyBlake2_blake2s_init
#define blake2s_init_key PyBlake2_blake2s_init_key
#define blake2s_init_param PyBlake2_blake2s_init_param
#define blake2s_update PyBlake2_blake2s_update
#define blake2sp PyBlake2_blake2sp
#define blake2sp_final PyBlake2_blake2sp_final
#define blake2sp_init PyBlake2_blake2sp_init
#define blake2sp_init_key PyBlake2_blake2sp_init_key
#define blake2sp_update PyBlake2_blake2sp_update
#endif /* Py_BLAKE2_NS */
This diff is collapsed.
/*[clinic input]
preserve
[clinic start generated code]*/
PyDoc_STRVAR(py_blake2b_new__doc__,
"blake2b(string=None, *, digest_size=_blake2b.blake2b.MAX_DIGEST_SIZE,\n"
" key=None, salt=None, person=None, fanout=1, depth=1,\n"
" leaf_size=None, node_offset=None, node_depth=0, inner_size=0,\n"
" last_node=False)\n"
"--\n"
"\n"
"Return a new BLAKE2b hash object.");
static PyObject *
py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size,
Py_buffer *key, Py_buffer *salt, Py_buffer *person,
int fanout, int depth, PyObject *leaf_size_obj,
PyObject *node_offset_obj, int node_depth,
int inner_size, int last_node);
static PyObject *
py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"string", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", NULL};
static _PyArg_Parser _parser = {"|O$iy*y*y*iiOOiip:blake2b", _keywords, 0};
PyObject *data = NULL;
int digest_size = BLAKE2B_OUTBYTES;
Py_buffer key = {NULL, NULL};
Py_buffer salt = {NULL, NULL};
Py_buffer person = {NULL, NULL};
int fanout = 1;
int depth = 1;
PyObject *leaf_size_obj = NULL;
PyObject *node_offset_obj = NULL;
int node_depth = 0;
int inner_size = 0;
int last_node = 0;
if (!_PyArg_ParseTupleAndKeywordsFast(args, kwargs, &_parser,
&data, &digest_size, &key, &salt, &person, &fanout, &depth, &leaf_size_obj, &node_offset_obj, &node_depth, &inner_size, &last_node)) {
goto exit;
}
return_value = py_blake2b_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size_obj, node_offset_obj, node_depth, inner_size, last_node);
exit:
/* Cleanup for key */
if (key.obj) {
PyBuffer_Release(&key);
}
/* Cleanup for salt */
if (salt.obj) {
PyBuffer_Release(&salt);
}
/* Cleanup for person */
if (person.obj) {
PyBuffer_Release(&person);
}
return return_value;
}
PyDoc_STRVAR(_blake2b_blake2b_copy__doc__,
"copy($self, /)\n"
"--\n"
"\n"
"Return a copy of the hash object.");
#define _BLAKE2B_BLAKE2B_COPY_METHODDEF \
{"copy", (PyCFunction)_blake2b_blake2b_copy, METH_NOARGS, _blake2b_blake2b_copy__doc__},
static PyObject *
_blake2b_blake2b_copy_impl(BLAKE2bObject *self);
static PyObject *
_blake2b_blake2b_copy(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored))
{
return _blake2b_blake2b_copy_impl(self);
}
PyDoc_STRVAR(_blake2b_blake2b_update__doc__,
"update($self, obj, /)\n"
"--\n"
"\n"
"Update this hash object\'s state with the provided string.");
#define _BLAKE2B_BLAKE2B_UPDATE_METHODDEF \
{"update", (PyCFunction)_blake2b_blake2b_update, METH_O, _blake2b_blake2b_update__doc__},
PyDoc_STRVAR(_blake2b_blake2b_digest__doc__,
"digest($self, /)\n"
"--\n"
"\n"
"Return the digest value as a string of binary data.");
#define _BLAKE2B_BLAKE2B_DIGEST_METHODDEF \
{"digest", (PyCFunction)_blake2b_blake2b_digest, METH_NOARGS, _blake2b_blake2b_digest__doc__},
static PyObject *
_blake2b_blake2b_digest_impl(BLAKE2bObject *self);
static PyObject *
_blake2b_blake2b_digest(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored))
{
return _blake2b_blake2b_digest_impl(self);
}
PyDoc_STRVAR(_blake2b_blake2b_hexdigest__doc__,
"hexdigest($self, /)\n"
"--\n"
"\n"
"Return the digest value as a string of hexadecimal digits.");
#define _BLAKE2B_BLAKE2B_HEXDIGEST_METHODDEF \
{"hexdigest", (PyCFunction)_blake2b_blake2b_hexdigest, METH_NOARGS, _blake2b_blake2b_hexdigest__doc__},
static PyObject *
_blake2b_blake2b_hexdigest_impl(BLAKE2bObject *self);
static PyObject *
_blake2b_blake2b_hexdigest(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored))
{
return _blake2b_blake2b_hexdigest_impl(self);
}
/*[clinic end generated code: output=535a54852c98e51c input=a9049054013a1b77]*/
/*[clinic input]
preserve
[clinic start generated code]*/
PyDoc_STRVAR(py_blake2s_new__doc__,
"blake2s(string=None, *, digest_size=_blake2s.blake2s.MAX_DIGEST_SIZE,\n"
" key=None, salt=None, person=None, fanout=1, depth=1,\n"
" leaf_size=None, node_offset=None, node_depth=0, inner_size=0,\n"
" last_node=False)\n"
"--\n"
"\n"
"Return a new BLAKE2s hash object.");
static PyObject *
py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size,
Py_buffer *key, Py_buffer *salt, Py_buffer *person,
int fanout, int depth, PyObject *leaf_size_obj,
PyObject *node_offset_obj, int node_depth,
int inner_size, int last_node);
static PyObject *
py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"string", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", NULL};
static _PyArg_Parser _parser = {"|O$iy*y*y*iiOOiip:blake2s", _keywords, 0};
PyObject *data = NULL;
int digest_size = BLAKE2S_OUTBYTES;
Py_buffer key = {NULL, NULL};
Py_buffer salt = {NULL, NULL};
Py_buffer person = {NULL, NULL};
int fanout = 1;
int depth = 1;
PyObject *leaf_size_obj = NULL;
PyObject *node_offset_obj = NULL;
int node_depth = 0;
int inner_size = 0;
int last_node = 0;
if (!_PyArg_ParseTupleAndKeywordsFast(args, kwargs, &_parser,
&data, &digest_size, &key, &salt, &person, &fanout, &depth, &leaf_size_obj, &node_offset_obj, &node_depth, &inner_size, &last_node)) {
goto exit;
}
return_value = py_blake2s_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size_obj, node_offset_obj, node_depth, inner_size, last_node);
exit:
/* Cleanup for key */
if (key.obj) {
PyBuffer_Release(&key);
}
/* Cleanup for salt */
if (salt.obj) {
PyBuffer_Release(&salt);
}
/* Cleanup for person */
if (person.obj) {
PyBuffer_Release(&person);
}
return return_value;
}
PyDoc_STRVAR(_blake2s_blake2s_copy__doc__,
"copy($self, /)\n"
"--\n"
"\n"
"Return a copy of the hash object.");
#define _BLAKE2S_BLAKE2S_COPY_METHODDEF \
{"copy", (PyCFunction)_blake2s_blake2s_copy, METH_NOARGS, _blake2s_blake2s_copy__doc__},
static PyObject *
_blake2s_blake2s_copy_impl(BLAKE2sObject *self);
static PyObject *
_blake2s_blake2s_copy(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored))
{
return _blake2s_blake2s_copy_impl(self);
}
PyDoc_STRVAR(_blake2s_blake2s_update__doc__,
"update($self, obj, /)\n"
"--\n"
"\n"
"Update this hash object\'s state with the provided string.");
#define _BLAKE2S_BLAKE2S_UPDATE_METHODDEF \
{"update", (PyCFunction)_blake2s_blake2s_update, METH_O, _blake2s_blake2s_update__doc__},
PyDoc_STRVAR(_blake2s_blake2s_digest__doc__,
"digest($self, /)\n"
"--\n"
"\n"
"Return the digest value as a string of binary data.");
#define _BLAKE2S_BLAKE2S_DIGEST_METHODDEF \
{"digest", (PyCFunction)_blake2s_blake2s_digest, METH_NOARGS, _blake2s_blake2s_digest__doc__},
static PyObject *
_blake2s_blake2s_digest_impl(BLAKE2sObject *self);
static PyObject *
_blake2s_blake2s_digest(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored))
{
return _blake2s_blake2s_digest_impl(self);
}
PyDoc_STRVAR(_blake2s_blake2s_hexdigest__doc__,
"hexdigest($self, /)\n"
"--\n"
"\n"
"Return the digest value as a string of hexadecimal digits.");
#define _BLAKE2S_BLAKE2S_HEXDIGEST_METHODDEF \
{"hexdigest", (PyCFunction)_blake2s_blake2s_hexdigest, METH_NOARGS, _blake2s_blake2s_hexdigest__doc__},
static PyObject *
_blake2s_blake2s_hexdigest_impl(BLAKE2sObject *self);
static PyObject *
_blake2s_blake2s_hexdigest(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored))
{
return _blake2s_blake2s_hexdigest_impl(self);
}
/*[clinic end generated code: output=535ea7903f9ccf76 input=a9049054013a1b77]*/
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2_CONFIG_H__
#define __BLAKE2_CONFIG_H__
/* These don't work everywhere */
#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__)
#define HAVE_SSE2
#endif
#if defined(__SSSE3__)
#define HAVE_SSSE3
#endif
#if defined(__SSE4_1__)
#define HAVE_SSE41
#endif
#if defined(__AVX__)
#define HAVE_AVX
#endif
#if defined(__XOP__)
#define HAVE_XOP
#endif
#ifdef HAVE_AVX2
#ifndef HAVE_AVX
#define HAVE_AVX
#endif
#endif
#ifdef HAVE_XOP
#ifndef HAVE_AVX
#define HAVE_AVX
#endif
#endif
#ifdef HAVE_AVX
#ifndef HAVE_SSE41
#define HAVE_SSE41
#endif
#endif
#ifdef HAVE_SSE41
#ifndef HAVE_SSSE3
#define HAVE_SSSE3
#endif
#endif
#ifdef HAVE_SSSE3
#define HAVE_SSE2
#endif
#if !defined(HAVE_SSE2)
#error "This code requires at least SSE2."
#endif
#endif
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2_IMPL_H__
#define __BLAKE2_IMPL_H__
#include <stdint.h>
#include <string.h>
BLAKE2_LOCAL_INLINE(uint32_t) load32( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint32_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
uint32_t w = *p++;
w |= ( uint32_t )( *p++ ) << 8;
w |= ( uint32_t )( *p++ ) << 16;
w |= ( uint32_t )( *p++ ) << 24;
return w;
#endif
}
BLAKE2_LOCAL_INLINE(uint64_t) load64( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
uint64_t w = *p++;
w |= ( uint64_t )( *p++ ) << 8;
w |= ( uint64_t )( *p++ ) << 16;
w |= ( uint64_t )( *p++ ) << 24;
w |= ( uint64_t )( *p++ ) << 32;
w |= ( uint64_t )( *p++ ) << 40;
w |= ( uint64_t )( *p++ ) << 48;
w |= ( uint64_t )( *p++ ) << 56;
return w;
#endif
}
BLAKE2_LOCAL_INLINE(void) store32( void *dst, uint32_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
#endif
}
BLAKE2_LOCAL_INLINE(void) store64( void *dst, uint64_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
#endif
}
BLAKE2_LOCAL_INLINE(uint64_t) load48( const void *src )
{
const uint8_t *p = ( const uint8_t * )src;
uint64_t w = *p++;
w |= ( uint64_t )( *p++ ) << 8;
w |= ( uint64_t )( *p++ ) << 16;
w |= ( uint64_t )( *p++ ) << 24;
w |= ( uint64_t )( *p++ ) << 32;
w |= ( uint64_t )( *p++ ) << 40;
return w;
}
BLAKE2_LOCAL_INLINE(void) store48( void *dst, uint64_t w )
{
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
}
BLAKE2_LOCAL_INLINE(uint32_t) rotl32( const uint32_t w, const unsigned c )
{
return ( w << c ) | ( w >> ( 32 - c ) );
}
BLAKE2_LOCAL_INLINE(uint64_t) rotl64( const uint64_t w, const unsigned c )
{
return ( w << c ) | ( w >> ( 64 - c ) );
}
BLAKE2_LOCAL_INLINE(uint32_t) rotr32( const uint32_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 32 - c ) );
}
BLAKE2_LOCAL_INLINE(uint64_t) rotr64( const uint64_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 64 - c ) );
}
/* prevents compiler optimizing out memset() */
BLAKE2_LOCAL_INLINE(void) secure_zero_memory(void *v, size_t n)
{
static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
memset_v(v, 0, n);
}
#endif
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2_H__
#define __BLAKE2_H__
#include <stddef.h>
#include <stdint.h>
#ifdef BLAKE2_NO_INLINE
#define BLAKE2_LOCAL_INLINE(type) static type
#endif
#ifndef BLAKE2_LOCAL_INLINE
#define BLAKE2_LOCAL_INLINE(type) static inline type
#endif
#if defined(__cplusplus)
extern "C" {
#endif
enum blake2s_constant
{
BLAKE2S_BLOCKBYTES = 64,
BLAKE2S_OUTBYTES = 32,
BLAKE2S_KEYBYTES = 32,
BLAKE2S_SALTBYTES = 8,
BLAKE2S_PERSONALBYTES = 8
};
enum blake2b_constant
{
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
typedef struct __blake2s_state
{
uint32_t h[8];
uint32_t t[2];
uint32_t f[2];
uint8_t buf[2 * BLAKE2S_BLOCKBYTES];
size_t buflen;
uint8_t last_node;
} blake2s_state;
typedef struct __blake2b_state
{
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[2 * BLAKE2B_BLOCKBYTES];
size_t buflen;
uint8_t last_node;
} blake2b_state;
typedef struct __blake2sp_state
{
blake2s_state S[8][1];
blake2s_state R[1];
uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
size_t buflen;
} blake2sp_state;
typedef struct __blake2bp_state
{
blake2b_state S[4][1];
blake2b_state R[1];
uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
size_t buflen;
} blake2bp_state;
#pragma pack(push, 1)
typedef struct __blake2s_param
{
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint8_t node_offset[6];// 14
uint8_t node_depth; /* 15 */
uint8_t inner_length; /* 16 */
/* uint8_t reserved[0]; */
uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */
uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */
} blake2s_param;
typedef struct __blake2b_param
{
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint64_t node_offset; /* 16 */
uint8_t node_depth; /* 17 */
uint8_t inner_length; /* 18 */
uint8_t reserved[14]; /* 32 */
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
} blake2b_param;
#pragma pack(pop)
/* Streaming API */
int blake2s_init( blake2s_state *S, const uint8_t outlen );
int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen );
int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen );
int blake2b_init( blake2b_state *S, const uint8_t outlen );
int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen );
int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen );
int blake2sp_init( blake2sp_state *S, const uint8_t outlen );
int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen );
int blake2sp_final( blake2sp_state *S, uint8_t *out, uint8_t outlen );
int blake2bp_init( blake2bp_state *S, const uint8_t outlen );
int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen );
int blake2bp_final( blake2bp_state *S, uint8_t *out, uint8_t outlen );
/* Simple API */
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2sp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2bp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
static inline int blake2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
{
return blake2b( out, in, key, outlen, inlen, keylen );
}
#if defined(__cplusplus)
}
#endif
#endif
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2B_LOAD_SSE2_H__
#define __BLAKE2B_LOAD_SSE2_H__
#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
#endif
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2B_LOAD_SSE41_H__
#define __BLAKE2B_LOAD_SSE41_H__
#define LOAD_MSG_0_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_0_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_1_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_1_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_1_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_1_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#define LOAD_MSG_2_1(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m5, 8); \
b1 = _mm_unpackhi_epi64(m2, m7); \
} while(0)
#define LOAD_MSG_2_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m0); \
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
} while(0)
#define LOAD_MSG_2_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
b1 = _mm_unpackhi_epi64(m3, m4); \
} while(0)
#define LOAD_MSG_2_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m3); \
b1 = _mm_alignr_epi8(m2, m0, 8); \
} while(0)
#define LOAD_MSG_3_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_unpackhi_epi64(m6, m5); \
} while(0)
#define LOAD_MSG_3_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m0); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_3_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_3_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m5); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_4_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m2); \
b1 = _mm_unpacklo_epi64(m1, m5); \
} while(0)
#define LOAD_MSG_4_2(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_4_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
} while(0)
#define LOAD_MSG_4_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m0, 8); \
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
} while(0)
#define LOAD_MSG_5_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m3); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_5_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m5); \
b1 = _mm_unpackhi_epi64(m5, m1); \
} while(0)
#define LOAD_MSG_5_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
b1 = _mm_unpackhi_epi64(m7, m0); \
} while(0)
#define LOAD_MSG_5_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m2); \
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
} while(0)
#define LOAD_MSG_6_1(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
b1 = _mm_unpacklo_epi64(m7, m2); \
} while(0)
#define LOAD_MSG_6_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_alignr_epi8(m5, m6, 8); \
} while(0)
#define LOAD_MSG_6_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m3); \
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
} while(0)
#define LOAD_MSG_6_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
} while(0)
#define LOAD_MSG_7_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m3); \
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
} while(0)
#define LOAD_MSG_7_2(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpackhi_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_7_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_unpacklo_epi64(m4, m1); \
} while(0)
#define LOAD_MSG_7_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m2); \
b1 = _mm_unpacklo_epi64(m3, m5); \
} while(0)
#define LOAD_MSG_8_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m7); \
b1 = _mm_alignr_epi8(m0, m5, 8); \
} while(0)
#define LOAD_MSG_8_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_alignr_epi8(m4, m1, 8); \
} while(0)
#define LOAD_MSG_8_3(b0, b1) \
do \
{ \
b0 = m6; \
b1 = _mm_alignr_epi8(m5, m0, 8); \
} while(0)
#define LOAD_MSG_8_4(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
b1 = m2; \
} while(0)
#define LOAD_MSG_9_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_unpackhi_epi64(m3, m0); \
} while(0)
#define LOAD_MSG_9_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m2); \
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
} while(0)
#define LOAD_MSG_9_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_unpackhi_epi64(m1, m6); \
} while(0)
#define LOAD_MSG_9_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpacklo_epi64(m6, m0); \
} while(0)
#define LOAD_MSG_10_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_10_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_11_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_11_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_11_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_11_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#endif
This diff is collapsed.
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2B_ROUND_H__
#define __BLAKE2B_ROUND_H__
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
#define TOF(reg) _mm_castsi128_ps((reg))
#define TOI(reg) _mm_castps_si128((reg))
#define LIKELY(x) __builtin_expect((x),1)
/* Microarchitecture-specific macros */
#ifndef HAVE_XOP
#ifdef HAVE_SSSE3
#define _mm_roti_epi64(x, c) \
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
#else
#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) ))
#endif
#else
/* ... */
#endif
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -32); \
row4h = _mm_roti_epi64(row4h, -32); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -24); \
row2h = _mm_roti_epi64(row2h, -24); \
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -16); \
row4h = _mm_roti_epi64(row4h, -16); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -63); \
row2h = _mm_roti_epi64(row2h, -63); \
#if defined(HAVE_SSSE3)
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
row4l = t1; \
row4h = t0;
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
row4l = t1; \
row4h = t0;
#else
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = row4l;\
t1 = row2l;\
row4l = row3l;\
row3l = row3h;\
row3h = row4l;\
row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = row3l;\
row3l = row3h;\
row3h = t0;\
t0 = row2l;\
t1 = row4l;\
row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
#endif
#if defined(HAVE_SSE41)
#include "blake2b-load-sse41.h"
#else
#include "blake2b-load-sse2.h"
#endif
#define ROUND(r) \
LOAD_MSG_ ##r ##_1(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_2(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
LOAD_MSG_ ##r ##_3(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_4(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
#endif
This diff is collapsed.
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2S_LOAD_SSE2_H__
#define __BLAKE2S_LOAD_SSE2_H__
#define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0)
#define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1)
#define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m14,m12,m10,m8)
#define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m15,m13,m11,m9)
#define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13,m9,m4,m14)
#define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6,m15,m8,m10)
#define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m5,m11,m0,m1)
#define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m3,m7,m2,m12)
#define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15,m5,m12,m11)
#define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13,m2,m0,m8)
#define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m9,m7,m3,m10)
#define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m4,m1,m6,m14)
#define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11,m13,m3,m7)
#define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14,m12,m1,m9)
#define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m15,m4,m5,m2)
#define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m8,m0,m10,m6)
#define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10,m2,m5,m9)
#define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15,m4,m7,m0)
#define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m3,m6,m11,m14)
#define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m13,m8,m12,m1)
#define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8,m0,m6,m2)
#define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3,m11,m10,m12)
#define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m1,m15,m7,m4)
#define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m9,m14,m5,m13)
#define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4,m14,m1,m12)
#define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10,m13,m15,m5)
#define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m8,m9,m6,m0)
#define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m11,m2,m3,m7)
#define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3,m12,m7,m13)
#define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9,m1,m14,m11)
#define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m2,m8,m15,m5)
#define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m10,m6,m4,m0)
#define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0,m11,m14,m6)
#define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8,m3,m9,m15)
#define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m10,m1,m13,m12)
#define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m5,m4,m7,m2)
#define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1,m7,m8,m10)
#define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5,m6,m4,m2)
#define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m13,m3,m9,m15)
#define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m0,m12,m14,m11)
#endif
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2S_LOAD_SSE41_H__
#define __BLAKE2S_LOAD_SSE41_H__
#define LOAD_MSG_0_1(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0)));
#define LOAD_MSG_0_2(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1)));
#define LOAD_MSG_0_3(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(2,0,2,0)));
#define LOAD_MSG_0_4(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(3,1,3,1)));
#define LOAD_MSG_1_1(buf) \
t0 = _mm_blend_epi16(m1, m2, 0x0C); \
t1 = _mm_slli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3));
#define LOAD_MSG_1_2(buf) \
t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \
t1 = _mm_blend_epi16(m1,m3,0xC0); \
t2 = _mm_blend_epi16(t0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_1_3(buf) \
t0 = _mm_slli_si128(m1, 4); \
t1 = _mm_blend_epi16(m2, t0, 0x30); \
t2 = _mm_blend_epi16(m0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_1_4(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_slli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0, t1, 0x0C); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_2_1(buf) \
t0 = _mm_unpackhi_epi32(m2,m3); \
t1 = _mm_blend_epi16(m3,m1,0x0C); \
t2 = _mm_blend_epi16(t0, t1, 0x0F); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
#define LOAD_MSG_2_2(buf) \
t0 = _mm_unpacklo_epi32(m2,m0); \
t1 = _mm_blend_epi16(t0, m0, 0xF0); \
t2 = _mm_slli_si128(m3, 8); \
buf = _mm_blend_epi16(t1, t2, 0xC0);
#define LOAD_MSG_2_3(buf) \
t0 = _mm_blend_epi16(m0, m2, 0x3C); \
t1 = _mm_srli_si128(m1, 12); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_2_4(buf) \
t0 = _mm_slli_si128(m3, 4); \
t1 = _mm_blend_epi16(m0, m1, 0x33); \
t2 = _mm_blend_epi16(t1, t0, 0xC0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,1,2,3));
#define LOAD_MSG_3_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_unpackhi_epi32(t0, m2); \
t2 = _mm_blend_epi16(t1, m3, 0x0C); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
#define LOAD_MSG_3_2(buf) \
t0 = _mm_slli_si128(m2, 8); \
t1 = _mm_blend_epi16(m3,m0,0x0C); \
t2 = _mm_blend_epi16(t1, t0, 0xC0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
#define LOAD_MSG_3_3(buf) \
t0 = _mm_blend_epi16(m0,m1,0x0F); \
t1 = _mm_blend_epi16(t0, m3, 0xC0); \
buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
#define LOAD_MSG_3_4(buf) \
t0 = _mm_unpacklo_epi32(m0,m2); \
t1 = _mm_unpackhi_epi32(m1,m2); \
buf = _mm_unpacklo_epi64(t1,t0);
#define LOAD_MSG_4_1(buf) \
t0 = _mm_unpacklo_epi64(m1,m2); \
t1 = _mm_unpackhi_epi64(m0,m2); \
t2 = _mm_blend_epi16(t0,t1,0x33); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
#define LOAD_MSG_4_2(buf) \
t0 = _mm_unpackhi_epi64(m1,m3); \
t1 = _mm_unpacklo_epi64(m0,m1); \
buf = _mm_blend_epi16(t0,t1,0x33);
#define LOAD_MSG_4_3(buf) \
t0 = _mm_unpackhi_epi64(m3,m1); \
t1 = _mm_unpackhi_epi64(m2,m0); \
buf = _mm_blend_epi16(t1,t0,0x33);
#define LOAD_MSG_4_4(buf) \
t0 = _mm_blend_epi16(m0,m2,0x03); \
t1 = _mm_slli_si128(t0, 8); \
t2 = _mm_blend_epi16(t1,m3,0x0F); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,0,3));
#define LOAD_MSG_5_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_unpacklo_epi32(m0,m2); \
buf = _mm_unpacklo_epi64(t0,t1);
#define LOAD_MSG_5_2(buf) \
t0 = _mm_srli_si128(m2, 4); \
t1 = _mm_blend_epi16(m0,m3,0x03); \
buf = _mm_blend_epi16(t1,t0,0x3C);
#define LOAD_MSG_5_3(buf) \
t0 = _mm_blend_epi16(m1,m0,0x0C); \
t1 = _mm_srli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0,t1,0x30); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0));
#define LOAD_MSG_5_4(buf) \
t0 = _mm_unpacklo_epi64(m1,m2); \
t1= _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,2,0,1)); \
buf = _mm_blend_epi16(t0,t1,0x33);
#define LOAD_MSG_6_1(buf) \
t0 = _mm_slli_si128(m1, 12); \
t1 = _mm_blend_epi16(m0,m3,0x33); \
buf = _mm_blend_epi16(t1,t0,0xC0);
#define LOAD_MSG_6_2(buf) \
t0 = _mm_blend_epi16(m3,m2,0x30); \
t1 = _mm_srli_si128(m1, 4); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0));
#define LOAD_MSG_6_3(buf) \
t0 = _mm_unpacklo_epi64(m0,m2); \
t1 = _mm_srli_si128(m1, 4); \
buf = _mm_shuffle_epi32(_mm_blend_epi16(t0,t1,0x0C), _MM_SHUFFLE(2,3,1,0));
#define LOAD_MSG_6_4(buf) \
t0 = _mm_unpackhi_epi32(m1,m2); \
t1 = _mm_unpackhi_epi64(m0,t0); \
buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
#define LOAD_MSG_7_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_blend_epi16(t0,m3,0x0F); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1));
#define LOAD_MSG_7_2(buf) \
t0 = _mm_blend_epi16(m2,m3,0x30); \
t1 = _mm_srli_si128(m0,4); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3));
#define LOAD_MSG_7_3(buf) \
t0 = _mm_unpackhi_epi64(m0,m3); \
t1 = _mm_unpacklo_epi64(m1,m2); \
t2 = _mm_blend_epi16(t0,t1,0x3C); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,3,1));
#define LOAD_MSG_7_4(buf) \
t0 = _mm_unpacklo_epi32(m0,m1); \
t1 = _mm_unpackhi_epi32(m1,m2); \
buf = _mm_unpacklo_epi64(t0,t1);
#define LOAD_MSG_8_1(buf) \
t0 = _mm_unpackhi_epi32(m1,m3); \
t1 = _mm_unpacklo_epi64(t0,m0); \
t2 = _mm_blend_epi16(t1,m2,0xC0); \
buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_8_2(buf) \
t0 = _mm_unpackhi_epi32(m0,m3); \
t1 = _mm_blend_epi16(m2,t0,0xF0); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3));
#define LOAD_MSG_8_3(buf) \
t0 = _mm_blend_epi16(m2,m0,0x0C); \
t1 = _mm_slli_si128(t0,4); \
buf = _mm_blend_epi16(t1,m3,0x0F);
#define LOAD_MSG_8_4(buf) \
t0 = _mm_blend_epi16(m1,m0,0x30); \
buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_9_1(buf) \
t0 = _mm_blend_epi16(m0,m2,0x03); \
t1 = _mm_blend_epi16(m1,m2,0x30); \
t2 = _mm_blend_epi16(t1,t0,0x0F); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2));
#define LOAD_MSG_9_2(buf) \
t0 = _mm_slli_si128(m0,4); \
t1 = _mm_blend_epi16(m1,t0,0xC0); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3));
#define LOAD_MSG_9_3(buf) \
t0 = _mm_unpackhi_epi32(m0,m3); \
t1 = _mm_unpacklo_epi32(m2,m3); \
t2 = _mm_unpackhi_epi64(t0,t1); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(3,0,2,1));
#define LOAD_MSG_9_4(buf) \
t0 = _mm_blend_epi16(m3,m2,0xC0); \
t1 = _mm_unpacklo_epi32(m0,m3); \
t2 = _mm_blend_epi16(t0,t1,0x0F); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3));
#endif
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2S_LOAD_XOP_H__
#define __BLAKE2S_LOAD_XOP_H__
#define TOB(x) ((x)*4*0x01010101 + 0x03020100) /* ..or not TOB */
/* Basic VPPERM emulation, for testing purposes */
/*static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel)
{
const __m128i sixteen = _mm_set1_epi8(16);
const __m128i t0 = _mm_shuffle_epi8(src1, sel);
const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen));
const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen),
_mm_cmpgt_epi8(sel, sixteen)); /* (>=16) = 0xff : 00 */
return _mm_blendv_epi8(t0, s1, mask);
}*/
#define LOAD_MSG_0_1(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
#define LOAD_MSG_0_2(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) );
#define LOAD_MSG_0_3(buf) \
buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
#define LOAD_MSG_0_4(buf) \
buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) );
#define LOAD_MSG_1_1(buf) \
t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(5),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_1_2(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(2),TOB(0),TOB(4),TOB(6)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_1_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(0),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_1_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(7),TOB(2),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) );
#define LOAD_MSG_2_1(buf) \
t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(1),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(4),TOB(0)) );
#define LOAD_MSG_2_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(2),TOB(0),TOB(4)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_2_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(7),TOB(3),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_2_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(1),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_3_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(3),TOB(7)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(1),TOB(0)) );
#define LOAD_MSG_3_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(1),TOB(5)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(1),TOB(0)) );
#define LOAD_MSG_3_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(5),TOB(2)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_3_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_4_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(5)) );
#define LOAD_MSG_4_2(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_4_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(6),TOB(0),TOB(0)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_4_4(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(4),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(4),TOB(0)) );
#define LOAD_MSG_5_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(2)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_5_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) );
#define LOAD_MSG_5_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(0),TOB(7),TOB(4)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_5_4(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(5),TOB(0),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(5)) );
#define LOAD_MSG_6_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(0),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(4)) );
#define LOAD_MSG_6_2(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(6),TOB(0),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(7),TOB(0)) );
#define LOAD_MSG_6_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(5),TOB(1),TOB(0)) );
#define LOAD_MSG_6_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(3),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_7_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(0),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(5)) );
#define LOAD_MSG_7_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(5),TOB(1),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_7_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(2),TOB(0),TOB(0),TOB(5)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) );
#define LOAD_MSG_7_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(6),TOB(4),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_8_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_8_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(4),TOB(3),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) );
#define LOAD_MSG_8_3(buf) \
t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \
#define LOAD_MSG_8_4(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) );
#define LOAD_MSG_9_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(7),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(4),TOB(6)) );
#define LOAD_MSG_9_2(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(6),TOB(4),TOB(2)) );
#define LOAD_MSG_9_3(buf) \
t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(3),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(7)) );
#define LOAD_MSG_9_4(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) );
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -214,6 +214,9 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Modules\_bisectmodule.c" />
<ClCompile Include="..\Modules\_blake2\blake2module.c" />
<ClCompile Include="..\Modules\_blake2\blake2b_impl.c" />
<ClCompile Include="..\Modules\_blake2\blake2s_impl.c" />
<ClCompile Include="..\Modules\_codecsmodule.c" />
<ClCompile Include="..\Modules\_collectionsmodule.c" />
<ClCompile Include="..\Modules\_csv.c" />
......
......@@ -449,6 +449,15 @@
<ClCompile Include="..\Modules\_bisectmodule.c">
<Filter>Modules</Filter>
</ClCompile>
<ClCompile Include="..\Modules\_blake2\blake2module.c">
<Filter>Modules</Filter>
</ClCompile>
<ClCompile Include="..\Modules\_blake2\blake2b_impl.c">
<Filter>Modules</Filter>
</ClCompile>
<ClCompile Include="..\Modules\_blake2\blake2s_impl.c">
<Filter>Modules</Filter>
</ClCompile>
<ClCompile Include="..\Modules\_codecsmodule.c">
<Filter>Modules</Filter>
</ClCompile>
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment