mirror of https://github.com/python/cpython.git
GH-100485: Convert from Fast2Sum to 2Sum (GH-100836)
This commit is contained in:
parent
951303fd85
commit
df3851fe4a
|
@ -2847,7 +2847,7 @@ based on ideas from three sources:
|
||||||
|
|
||||||
The double length routines allow for quite a bit of instruction
|
The double length routines allow for quite a bit of instruction
|
||||||
level parallelism. On a 3.22 Ghz Apple M1 Max, the incremental
|
level parallelism. On a 3.22 Ghz Apple M1 Max, the incremental
|
||||||
cost of increasing the input vector size by one is 6.25 nsec.
|
cost of increasing the input vector size by one is 6.0 nsec.
|
||||||
|
|
||||||
dl_zero() returns an extended precision zero
|
dl_zero() returns an extended precision zero
|
||||||
dl_split() exactly splits a double into two half precision components.
|
dl_split() exactly splits a double into two half precision components.
|
||||||
|
@ -2860,22 +2860,25 @@ dl_to_d() converts from extended precision to double precision.
|
||||||
|
|
||||||
typedef struct{ double hi; double lo; } DoubleLength;
|
typedef struct{ double hi; double lo; } DoubleLength;
|
||||||
|
|
||||||
|
static const DoubleLength dl_zero = {0.0, 0.0};
|
||||||
|
|
||||||
static inline DoubleLength
|
static inline DoubleLength
|
||||||
dl_zero()
|
twosum(double a, double b)
|
||||||
{
|
{
|
||||||
return (DoubleLength) {0.0, 0.0};
|
double s = a + b;
|
||||||
|
double ap = s - b;
|
||||||
|
double bp = s - a;
|
||||||
|
double da = a - ap;
|
||||||
|
double db = b - bp;
|
||||||
|
double t = da + db;
|
||||||
|
return (DoubleLength) {s, t};
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline DoubleLength
|
static inline DoubleLength
|
||||||
dl_add(DoubleLength total, double x)
|
dl_add(DoubleLength total, double x)
|
||||||
{
|
{
|
||||||
double s = total.hi + x;
|
DoubleLength s = twosum(total.hi, x);
|
||||||
double c = total.lo;
|
return (DoubleLength) {s.hi, total.lo + s.lo};
|
||||||
if (fabs(total.hi) >= fabs(x)) {
|
|
||||||
c += (total.hi - s) + x;
|
|
||||||
} else {
|
|
||||||
c += (x - s) + total.hi;
|
|
||||||
}
|
|
||||||
return (DoubleLength) {s, c};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline DoubleLength
|
static inline DoubleLength
|
||||||
|
@ -2941,7 +2944,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
|
||||||
bool int_path_enabled = true, int_total_in_use = false;
|
bool int_path_enabled = true, int_total_in_use = false;
|
||||||
bool flt_path_enabled = true, flt_total_in_use = false;
|
bool flt_path_enabled = true, flt_total_in_use = false;
|
||||||
long int_total = 0;
|
long int_total = 0;
|
||||||
DoubleLength flt_total = dl_zero();
|
DoubleLength flt_total = dl_zero;
|
||||||
|
|
||||||
p_it = PyObject_GetIter(p);
|
p_it = PyObject_GetIter(p);
|
||||||
if (p_it == NULL) {
|
if (p_it == NULL) {
|
||||||
|
@ -3101,7 +3104,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
|
||||||
Py_SETREF(total, new_total);
|
Py_SETREF(total, new_total);
|
||||||
new_total = NULL;
|
new_total = NULL;
|
||||||
Py_CLEAR(term_i);
|
Py_CLEAR(term_i);
|
||||||
flt_total = dl_zero();
|
flt_total = dl_zero;
|
||||||
flt_total_in_use = false;
|
flt_total_in_use = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue