Commit f1d8f24a authored by Andrew M. Kuchling's avatar Andrew M. Kuchling

Removed Tools/perfecthash, per python-dev discussion

parent b40c463a
#! /usr/bin/env python
import sys
import string
import perfect_hash
# This is a user of perfect_hash.py
# that takes as input the UnicodeData.txt file available from:
# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
# It generates a hash table from Unicode Character Name ->
# unicode code space value.
# These variables determine which hash function is tried first.
# Yields a multiple of 1.7875 for UnicodeData.txt on 2000/06/24/
f1Seed = 0x64fc2234
f2Seed = 0x8db7d737
# Maximum allowed multipler, if this isn't None then instead of continually
# increasing C, it resets it back to initC to keep searching for
# a solution.
minC = 1.7875
# Initial multiplier for trying to find a perfect hash function.
initC = 1.7875
moduleName = "ucnhash"
dataArrayName = "aucn"
dataArrayType = "_Py_UnicodeCharacterName"
headerFileName = "ucnhash.h"
cFileName = "ucnhash.c"
structName = "_Py_UCNHashAPI"
keys = []
hashData = {}
def generateOutputFiles(perfHash, hashData):
header = perfHash.generate_header(structName)
header = header + """
typedef struct
{
const char *pszUCN;
Py_UCS4 value;
} _Py_UnicodeCharacterName;
"""
code = perfHash.generate_code(moduleName,
dataArrayName,
dataArrayType,
structName)
out = open(headerFileName, "w")
out.write(header)
out = open(cFileName, "w")
out.write("#include \"%s\"\n" % headerFileName)
out.write(code)
perfHash.generate_graph(out)
out.write("""
static const _Py_UnicodeCharacterName aucn[] =
{
""")
for i in xrange(len(keys)):
v = hashData[keys[i][0]]
out.write(' { "' + keys[i][0] + '", ' + hex(v) + " }," + "\n")
out.write("};\n\n")
sys.stderr.write('\nGenerated output files: \n')
sys.stderr.write('%s\n%s\n' % (headerFileName, cFileName))
def main():
# Suck in UnicodeData.txt and spit out the generated files.
input = open(sys.argv[1], 'r')
i = 0
while 1:
line = input.readline()
if line == "": break
fields = string.split(line, ';')
if len(fields) < 2:
sys.stderr.write('Ill-formated line!\n')
sys.stderr.write('line #: %d\n' % (i + 1))
sys.exit()
data, key = fields[:2]
key = string.strip( key )
# Any name starting with '<' is a control, or start/end character,
# so skip it...
if key[0] == "<":
continue
hashcode = i
i = i + 1
# force the name to uppercase
keys.append( (string.upper(key),hashcode) )
data = string.atoi(data, 16)
hashData[key] = data
input.close()
sys.stderr.write('%i key/hash pairs read\n' % len(keys) )
perfHash = perfect_hash.generate_hash(keys, 1,
minC, initC,
f1Seed, f2Seed,
# increment, tries
0.0025, 50)
generateOutputFiles(perfHash, hashData)
if __name__ == '__main__':
if len(sys.argv) == 1:
sys.stdout = sys.stderr
print 'Usage: %s <input filename>' % sys.argv[0]
print ' The input file needs to be UnicodeData.txt'
sys.exit()
main()
This diff is collapsed.
#include <Python.h>
static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw)
{
PyStringObject *a;
register int len;
register unsigned char *p;
register unsigned long x;
unsigned long ulSeed;
unsigned long cchSeed;
unsigned long cHashElements;
if (!PyArg_ParseTuple(args, "llOl:hash",
&ulSeed, &cchSeed, &a, &cHashElements))
return NULL;
if (!PyString_Check(a))
{
PyErr_SetString(PyExc_TypeError, "arg 3 needs to be a string");
return NULL;
}
len = a->ob_size;
p = (unsigned char *) a->ob_sval;
x = ulSeed;
while (--len >= 0)
{
/* (1000003 * x) ^ *p++
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ *p++;
}
x ^= a->ob_size + cchSeed;
if (x == 0xFFFFFFFF)
x = 0xfffffffe;
if (x & 0x80000000)
{
/* Emulate Python 32-bit signed (2's complement)
* modulo operation
*/
x = (~x & 0xFFFFFFFF) + 1;
x %= cHashElements;
if (x != 0)
{
x = x + (~cHashElements & 0xFFFFFFFF) + 1;
x = (~x & 0xFFFFFFFF) + 1;
}
}
else
x %= cHashElements;
return PyInt_FromLong((long)x);
}
static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw)
{
PyStringObject *a;
register int len;
register unsigned char *p;
register unsigned long x;
if (!PyString_Check(args))
{
PyErr_SetString(PyExc_TypeError, "arg 1 expected a string, but didn't get it.");
return NULL;
}
a = (PyStringObject *)args;
len = a->ob_size;
p = (unsigned char *) a->ob_sval;
x = (*p << 7) & 0xFFFFFFFF;
while (--len >= 0)
{
/* (1000003 * x) ^ *p++
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ *p++;
}
return PyInt_FromLong((long)x);
}
static struct PyMethodDef hashMethods[] = {
{ "calcSeed", calcSeed, 0, NULL },
{ "hash", hashFunction, 0, NULL },
{ NULL, NULL, 0, NULL } /* sentinel */
};
#ifdef _MSC_VER
_declspec(dllexport)
#endif
void initperfhash(void)
{
PyObject *m;
m = Py_InitModule4("perfhash", hashMethods,
NULL, NULL, PYTHON_API_VERSION);
if ( m == NULL )
Py_FatalError("can't initialize module perfhash");
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment