Commit 346058c0 authored by Kevin Locke's avatar Kevin Locke

edit_distance: Rename ED_STACK_ELEMS ED_STACK_DIST_VALS

The previous name was misleading, since it does not define the number of
elements (ed_elem) on the stack, but rather the number of distance
values (ed_dist).  Rename to make this more clear and add more
documentation about what it does and how best to define it.

Note:  This is an API change for custom-compiled versions, but since the
module has only been included for a couple days I don't think it's worth
a back-compat #ifdef at this point.
Signed-off-by: default avatarKevin Locke <kevin@kevinlocke.name>
parent 18cbdae8
...@@ -109,18 +109,39 @@ ...@@ -109,18 +109,39 @@
# define ED_HASH_ELEM(e) ((unsigned char)e) # define ED_HASH_ELEM(e) ((unsigned char)e)
/** Maximum value that can be returned from #ED_HASH_ELEM */ /** Maximum value that can be returned from #ED_HASH_ELEM */
# define ED_HASH_MAX UCHAR_MAX # define ED_HASH_MAX UCHAR_MAX
/** Can an array of #ED_HASH_MAX ::ed_dist values be stored on the stack? */ /** Can an array of #ED_HASH_MAX ::ed_dist values be stored on the stack?
* @see #ED_STACK_DIST_VALS
*/
# define ED_HASH_ON_STACK # define ED_HASH_ON_STACK
#endif #endif
#ifndef ED_STACK_ELEMS #ifndef ED_STACK_DIST_VALS
/** Maximum number of elements that will be allocated on the stack. /** Maximum number of ::ed_dist values that will be allocated on the stack.
*
* The edit distance algorithms which use a dynamic programming (all currently
* supported algorithms) can store intermediate values on the stack to avoid
* the overhead of calling @c malloc. This macro defines the maximum number
* of intermediate distance values which can be stored on the stack for a
* single function call. It should be large enough to cover common cases,
* where possible, and small enough to avoid overflowing the stack or frame
* size limits. The algorithms have the following requirements:
* *
* Note: The threshold does not need to be tight since with * - ed_measure::EDIT_DISTANCE_LCS and ed_measure::EDIT_DISTANCE_LEV require
* <code>ED_STACK_ELEMS > ~100</code> algorithm cost will dominate @c malloc * @c min(slen,tlen) values when #ED_COST_IS_SYMMETRIC is defined, @c slen
* otherwise.
* - ed_measure::EDIT_DISTANCE_RDL requires @c 2*min(slen,tlen) values when
* #ED_COST_IS_SYMMETRIC is defined, @c 2*slen otherwise.
* - ed_measure::EDIT_DISTANCE_DL requires @c slen*tlen values (in addition to
* the #ED_HASH_MAX values stored on the stack if #ED_HASH_ON_STACK is
* defined).
*
* This value does not need to be a tight bound, since when @c slen*tlen is
* greater than around 10,000 the algorithm cost will dominate the @c malloc
* cost anyway. * cost anyway.
*
* @see #ED_HASH_ON_STACK
*/ */
# define ED_STACK_ELEMS 512 # define ED_STACK_DIST_VALS 512
#endif #endif
#endif #endif
...@@ -15,11 +15,11 @@ ed_dist edit_distance_dl(const ed_elem *src, ed_size slen, ...@@ -15,11 +15,11 @@ ed_dist edit_distance_dl(const ed_elem *src, ed_size slen,
{ {
/* Optimization: Avoid malloc when distance matrix can fit on the stack. /* Optimization: Avoid malloc when distance matrix can fit on the stack.
*/ */
ed_dist stackdist[ED_STACK_ELEMS]; ed_dist stackdist[ED_STACK_DIST_VALS];
/* Lowrance-Wagner distance matrix, in row-major order. */ /* Lowrance-Wagner distance matrix, in row-major order. */
size_t matsize = ((size_t)slen + 2) * (tlen + 2); size_t matsize = ((size_t)slen + 2) * (tlen + 2);
ed_dist *distmem = matsize <= ED_STACK_ELEMS ? stackdist : ed_dist *distmem = matsize <= ED_STACK_DIST_VALS ? stackdist :
malloc(matsize * sizeof(ed_dist)); malloc(matsize * sizeof(ed_dist));
ed_dist *dist = distmem; ed_dist *dist = distmem;
......
...@@ -16,10 +16,10 @@ ed_dist edit_distance_lcs(const ed_elem *src, ed_size slen, ...@@ -16,10 +16,10 @@ ed_dist edit_distance_lcs(const ed_elem *src, ed_size slen,
/* Optimization: Avoid malloc when row of distance matrix can fit on /* Optimization: Avoid malloc when row of distance matrix can fit on
* the stack. * the stack.
*/ */
ed_dist stackdist[ED_STACK_ELEMS]; ed_dist stackdist[ED_STACK_DIST_VALS];
/* One row of the Wagner-Fischer distance matrix. */ /* One row of the Wagner-Fischer distance matrix. */
ed_dist *dist = slen < ED_STACK_ELEMS ? stackdist : ed_dist *dist = slen < ED_STACK_DIST_VALS ? stackdist :
malloc((slen + 1) * sizeof(ed_dist)); malloc((slen + 1) * sizeof(ed_dist));
/* Initialize row with cost to delete src[0..i-1] */ /* Initialize row with cost to delete src[0..i-1] */
......
...@@ -16,10 +16,10 @@ ed_dist edit_distance_lev(const ed_elem *src, ed_size slen, ...@@ -16,10 +16,10 @@ ed_dist edit_distance_lev(const ed_elem *src, ed_size slen,
/* Optimization: Avoid malloc when row of distance matrix can fit on /* Optimization: Avoid malloc when row of distance matrix can fit on
* the stack. * the stack.
*/ */
ed_dist stackdist[ED_STACK_ELEMS]; ed_dist stackdist[ED_STACK_DIST_VALS];
/* One row of the Wagner-Fischer distance matrix. */ /* One row of the Wagner-Fischer distance matrix. */
ed_dist *dist = slen < ED_STACK_ELEMS ? stackdist : ed_dist *dist = slen < ED_STACK_DIST_VALS ? stackdist :
malloc((slen + 1) * sizeof(ed_dist)); malloc((slen + 1) * sizeof(ed_dist));
/* Initialize row with cost to delete src[0..i-1] */ /* Initialize row with cost to delete src[0..i-1] */
......
...@@ -16,11 +16,11 @@ ed_dist edit_distance_rdl(const ed_elem *src, ed_size slen, ...@@ -16,11 +16,11 @@ ed_dist edit_distance_rdl(const ed_elem *src, ed_size slen,
/* Optimization: Avoid malloc when required rows of distance matrix can /* Optimization: Avoid malloc when required rows of distance matrix can
* fit on the stack. * fit on the stack.
*/ */
ed_dist stackdist[ED_STACK_ELEMS]; ed_dist stackdist[ED_STACK_DIST_VALS];
/* Two rows of the Wagner-Fischer distance matrix. */ /* Two rows of the Wagner-Fischer distance matrix. */
ed_dist *distmem, *dist, *prevdist; ed_dist *distmem, *dist, *prevdist;
if (slen < ED_STACK_ELEMS / 2) { if (slen < ED_STACK_DIST_VALS / 2) {
distmem = stackdist; distmem = stackdist;
dist = distmem; dist = distmem;
prevdist = distmem + slen + 1; prevdist = distmem + slen + 1;
......
...@@ -224,14 +224,14 @@ static void test_dl(void) ...@@ -224,14 +224,14 @@ static void test_dl(void)
static void test_mem_use(void) static void test_mem_use(void)
{ {
char tgt[] = "BC"; char tgt[] = "BC";
char src[ED_STACK_ELEMS + 1]; char src[ED_STACK_DIST_VALS + 1];
for (size_t i = 0; i < ED_STACK_ELEMS; ++i) { for (size_t i = 0; i < ED_STACK_DIST_VALS; ++i) {
src[i] = (char)('A' + (i % 26)); src[i] = (char)('A' + (i % 26));
} }
src[ED_STACK_ELEMS] = '\0'; src[ED_STACK_DIST_VALS] = '\0';
for (ed_size tlen = 1; tlen < 3; ++tlen) { for (ed_size tlen = 1; tlen < 3; ++tlen) {
ed_size slen = ED_STACK_ELEMS; ed_size slen = ED_STACK_DIST_VALS;
/* Above threshold, causes allocation */ /* Above threshold, causes allocation */
ok(edit_distance_lcs(src, slen, tgt, tlen) == slen - tlen, ok(edit_distance_lcs(src, slen, tgt, tlen) == slen - tlen,
"edit_distance_lcs(\"%.3s..., %u, \"%.*s\", %u) == %u", "edit_distance_lcs(\"%.3s..., %u, \"%.*s\", %u) == %u",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment