GH-100485: Convert from Fast2Sum to 2Sum (GH-100836)

2023-01-07 21:37:08 -06:00 · 2023-01-07 21:37:08 -06:00 · df3851fe4a
parent 951303fd85
commit df3851fe4a
1 changed files with 16 additions and 13 deletions
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@ -2847,7 +2847,7 @@ based on ideas from three sources:
 The double length routines allow for quite a bit of instruction
 level parallelism.  On a 3.22 Ghz Apple M1 Max, the incremental
-cost of increasing the input vector size by one is 6.25 nsec.
+cost of increasing the input vector size by one is 6.0 nsec.
 dl_zero() returns an extended precision zero
 dl_split() exactly splits a double into two half precision components.
@ -2860,22 +2860,25 @@ dl_to_d() converts from extended precision to double precision.
 typedef struct{ double hi; double lo; } DoubleLength;
 static const DoubleLength dl_zero = {0.0, 0.0};
 static inline DoubleLength
-dl_zero()
+twosum(double a, double b)
 {
-    return (DoubleLength) {0.0, 0.0};
+    double s = a + b;
    double ap = s - b;
    double bp = s - a;
    double da = a - ap;
    double db = b - bp;
    double t = da + db;
    return  (DoubleLength) {s, t};
 }
 static inline DoubleLength
 dl_add(DoubleLength total, double x)
 {
-    double s = total.hi + x;
+    DoubleLength s = twosum(total.hi, x);
-    double c = total.lo;
+    return (DoubleLength) {s.hi, total.lo + s.lo};
    if (fabs(total.hi) >= fabs(x)) {
        c += (total.hi - s) + x;
    } else {
        c += (x - s) + total.hi;
    }
    return (DoubleLength) {s, c};
 }
 static inline DoubleLength
@ -2941,7 +2944,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
    bool int_path_enabled = true, int_total_in_use = false;
    bool flt_path_enabled = true, flt_total_in_use = false;
    long int_total = 0;
-    DoubleLength flt_total = dl_zero();
+    DoubleLength flt_total = dl_zero;
    p_it = PyObject_GetIter(p);
    if (p_it == NULL) {
@ -3101,7 +3104,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
                Py_SETREF(total, new_total);
                new_total = NULL;
                Py_CLEAR(term_i);
-                flt_total = dl_zero();
+                flt_total = dl_zero;
                flt_total_in_use = false;
            }
        }