Commit c630e104 authored by Raymond Hettinger's avatar Raymond Hettinger Committed by GitHub

Factor-out common code. Also, optimize common cases by preallocating space on the stack. GH-8738

Improves speed by 9 to 10ns per call.
parent 13990745
...@@ -2032,10 +2032,10 @@ math_fmod_impl(PyObject *module, double x, double y) ...@@ -2032,10 +2032,10 @@ math_fmod_impl(PyObject *module, double x, double y)
} }
/* /*
Given an *n* length *vec* of non-negative, non-nan, non-inf values Given an *n* length *vec* of non-negative values
where *max* is the largest value in the vector, compute: where *max* is the largest value in the vector, compute:
sum((x / max) ** 2 for x in vec) max * sqrt(sum((x / max) ** 2 for x in vec))
When a maximum value is found, it is swapped to the end. This When a maximum value is found, it is swapped to the end. This
lets us skip one loop iteration and just add 1.0 at the end. lets us skip one loop iteration and just add 1.0 at the end.
...@@ -2045,19 +2045,31 @@ Kahan summation is used to improve accuracy. The *csum* ...@@ -2045,19 +2045,31 @@ Kahan summation is used to improve accuracy. The *csum*
variable tracks the cumulative sum and *frac* tracks variable tracks the cumulative sum and *frac* tracks
fractional round-off error for the most recent addition. fractional round-off error for the most recent addition.
The value of the *max* variable must be present in *vec*
or should equal to 0.0 when n==0. Likewise, *max* will
be INF if an infinity is present in the vec.
The *found_nan* variable indicates whether some member of
the *vec* is a NaN.
*/ */
static inline double static inline double
scaled_vector_squared(Py_ssize_t n, double *vec, double max) vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
{ {
double x, csum = 0.0, oldcsum, frac = 0.0; double x, csum = 0.0, oldcsum, frac = 0.0;
Py_ssize_t i; Py_ssize_t i;
if (Py_IS_INFINITY(max)) {
return max;
}
if (found_nan) {
return Py_NAN;
}
if (max == 0.0) { if (max == 0.0) {
return 0.0; return 0.0;
} }
assert(n > 0); assert(n > 0);
for (i=0 ; i<n-1 ; i++) { for (i=0 ; i < n-1 ; i++) {
x = vec[i]; x = vec[i];
if (x == max) { if (x == max) {
x = vec[n-1]; x = vec[n-1];
...@@ -2071,9 +2083,11 @@ scaled_vector_squared(Py_ssize_t n, double *vec, double max) ...@@ -2071,9 +2083,11 @@ scaled_vector_squared(Py_ssize_t n, double *vec, double max)
} }
assert(vec[n-1] == max); assert(vec[n-1] == max);
csum += 1.0 - frac; csum += 1.0 - frac;
return csum; return max * sqrt(csum);
} }
#define NUM_STACK_ELEMS 16
/*[clinic input] /*[clinic input]
math.dist math.dist
...@@ -2095,11 +2109,12 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q) ...@@ -2095,11 +2109,12 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
/*[clinic end generated code: output=56bd9538d06bbcfe input=937122eaa5f19272]*/ /*[clinic end generated code: output=56bd9538d06bbcfe input=937122eaa5f19272]*/
{ {
PyObject *item; PyObject *item;
double *diffs;
double max = 0.0; double max = 0.0;
double x, px, qx, result; double x, px, qx, result;
Py_ssize_t i, m, n; Py_ssize_t i, m, n;
int found_nan = 0; int found_nan = 0;
double diffs_on_stack[NUM_STACK_ELEMS];
double *diffs = diffs_on_stack;
m = PyTuple_GET_SIZE(p); m = PyTuple_GET_SIZE(p);
n = PyTuple_GET_SIZE(q); n = PyTuple_GET_SIZE(q);
...@@ -2109,22 +2124,22 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q) ...@@ -2109,22 +2124,22 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
return NULL; return NULL;
} }
diffs = (double *) PyObject_Malloc(n * sizeof(double)); if (n > NUM_STACK_ELEMS) {
if (diffs == NULL) { diffs = (double *) PyObject_Malloc(n * sizeof(double));
return NULL; if (diffs == NULL) {
return NULL;
}
} }
for (i=0 ; i<n ; i++) { for (i=0 ; i<n ; i++) {
item = PyTuple_GET_ITEM(p, i); item = PyTuple_GET_ITEM(p, i);
px = PyFloat_AsDouble(item); px = PyFloat_AsDouble(item);
if (px == -1.0 && PyErr_Occurred()) { if (px == -1.0 && PyErr_Occurred()) {
PyObject_Free(diffs); goto error_exit;
return NULL;
} }
item = PyTuple_GET_ITEM(q, i); item = PyTuple_GET_ITEM(q, i);
qx = PyFloat_AsDouble(item); qx = PyFloat_AsDouble(item);
if (qx == -1.0 && PyErr_Occurred()) { if (qx == -1.0 && PyErr_Occurred()) {
PyObject_Free(diffs); goto error_exit;
return NULL;
} }
x = fabs(px - qx); x = fabs(px - qx);
diffs[i] = x; diffs[i] = x;
...@@ -2133,19 +2148,17 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q) ...@@ -2133,19 +2148,17 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
max = x; max = x;
} }
} }
if (Py_IS_INFINITY(max)) { result = vector_norm(n, diffs, max, found_nan);
result = max; if (diffs != diffs_on_stack) {
goto done; PyObject_Free(diffs);
}
if (found_nan) {
result = Py_NAN;
goto done;
} }
result = max * sqrt(scaled_vector_squared(n, diffs, max));
done:
PyObject_Free(diffs);
return PyFloat_FromDouble(result); return PyFloat_FromDouble(result);
error_exit:
if (diffs != diffs_on_stack) {
PyObject_Free(diffs);
}
return NULL;
} }
/* AC: cannot convert yet, waiting for *args support */ /* AC: cannot convert yet, waiting for *args support */
...@@ -2154,21 +2167,23 @@ math_hypot(PyObject *self, PyObject *args) ...@@ -2154,21 +2167,23 @@ math_hypot(PyObject *self, PyObject *args)
{ {
Py_ssize_t i, n; Py_ssize_t i, n;
PyObject *item; PyObject *item;
double *coordinates;
double max = 0.0; double max = 0.0;
double x, result; double x, result;
int found_nan = 0; int found_nan = 0;
double coord_on_stack[NUM_STACK_ELEMS];
double *coordinates = coord_on_stack;
n = PyTuple_GET_SIZE(args); n = PyTuple_GET_SIZE(args);
coordinates = (double *) PyObject_Malloc(n * sizeof(double)); if (n > NUM_STACK_ELEMS) {
if (coordinates == NULL) coordinates = (double *) PyObject_Malloc(n * sizeof(double));
return NULL; if (coordinates == NULL)
return NULL;
}
for (i=0 ; i<n ; i++) { for (i=0 ; i<n ; i++) {
item = PyTuple_GET_ITEM(args, i); item = PyTuple_GET_ITEM(args, i);
x = PyFloat_AsDouble(item); x = PyFloat_AsDouble(item);
if (x == -1.0 && PyErr_Occurred()) { if (x == -1.0 && PyErr_Occurred()) {
PyObject_Free(coordinates); goto error_exit;
return NULL;
} }
x = fabs(x); x = fabs(x);
coordinates[i] = x; coordinates[i] = x;
...@@ -2177,21 +2192,21 @@ math_hypot(PyObject *self, PyObject *args) ...@@ -2177,21 +2192,21 @@ math_hypot(PyObject *self, PyObject *args)
max = x; max = x;
} }
} }
if (Py_IS_INFINITY(max)) { result = vector_norm(n, coordinates, max, found_nan);
result = max; if (coordinates != coord_on_stack) {
goto done; PyObject_Free(coordinates);
} }
if (found_nan) {
result = Py_NAN;
goto done;
}
result = max * sqrt(scaled_vector_squared(n, coordinates, max));
done:
PyObject_Free(coordinates);
return PyFloat_FromDouble(result); return PyFloat_FromDouble(result);
error_exit:
if (coordinates != coord_on_stack) {
PyObject_Free(coordinates);
}
return NULL;
} }
#undef NUM_STACK_ELEMS
PyDoc_STRVAR(math_hypot_doc, PyDoc_STRVAR(math_hypot_doc,
"hypot(*coordinates) -> value\n\n\ "hypot(*coordinates) -> value\n\n\
Multidimensional Euclidean distance from the origin to a point.\n\ Multidimensional Euclidean distance from the origin to a point.\n\
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment