提升除法性能，参考

SHA-1: d996d648f333b04ae3da3c5853120f6f37601fb2 * Simplify the inner loop of numeric division in div_var().
2022-10-17 18:03:30 +08:00 · 2022-10-17 18:03:30 +08:00 · cf96c5fa83
parent 709e1dadf5
commit cf96c5fa83
1 changed files with 244 additions and 119 deletions
--- a/src/common/backend/utils/adt/numeric.cpp
+++ b/src/common/backend/utils/adt/numeric.cpp
@ -236,6 +236,7 @@ static void sub_var(NumericVar* var1, NumericVar* var2, NumericVar* result);
 static void mul_var(NumericVar* var1, NumericVar* var2, NumericVar* result, int rscale);
 static void div_var(NumericVar* var1, NumericVar* var2, NumericVar* result, int rscale, bool round);
 static void div_var_fast(NumericVar* var1, NumericVar* var2, NumericVar* result, int rscale, bool round);
+static void div_var_int(const NumericVar *var, int ival, int ival_weight, NumericVar *result, int rscale, bool round);
 static int select_div_scale(NumericVar* var1, NumericVar* var2);
 static void mod_var(NumericVar* var1, NumericVar* var2, NumericVar* result);
 static void ceil_var(NumericVar* var, NumericVar* result);
@ -5454,11 +5455,36 @@ static void div_var(NumericVar* var1, NumericVar* var2, NumericVar* result, int
     */
    if (var2ndigits == 0 || var2->digits[0] == 0)
        ereport(ERROR, (errcode(ERRCODE_DIVISION_BY_ZERO), errmsg("division by zero")));
+    /*
+     * If the divisor has just one or two digits, delegate to div_var_int(),
+     * which uses fast short division.
+     */
+    if (var2ndigits <= 2)
+    {
+        int idivisor;
+        int idivisor_weight;
+
+        idivisor = var2->digits[0];
+        idivisor_weight = var2->weight;
+        if (var2ndigits == 2)
+        {
+            idivisor = idivisor * NBASE + var2->digits[1];
+            idivisor_weight--;
+        }
+        if (var2->sign == NUMERIC_NEG)
+            idivisor = -idivisor;
+
+        div_var_int(var1, idivisor, idivisor_weight, result, rscale, round);
+        return;
+    }

    /*
-     * Now result zero check
+     * Otherwise, perform full long division.
     */
-    if (var1ndigits == 0) {
+
+    /* Result zero check */
+    if (var1ndigits == 0)
+    {
        zero_var(result);
        result->dscale = rscale;
        return;
@ -5515,141 +5541,130 @@ static void div_var(NumericVar* var1, NumericVar* var2, NumericVar* result, int
    alloc_var(result, res_ndigits);
    res_digits = result->digits;

-    if (var2ndigits == 1) {
-        /*
-         * If there's only a single divisor digit, we can use a fast path (cf.
-         * Knuth section 4.3.1 exercise 16).
-         */
-        divisor1 = divisor[1];
+    /*
+     * The full multiple-place algorithm is taken from Knuth volume 2,
+     * Algorithm 4.3.1D.
+     *
+     * We need the first divisor digit to be >= NBASE/2.  If it isn't,
+     * make it so by scaling up both the divisor and dividend by the
+     * factor "d".	(The reason for allocating dividend[0] above is to
+     * leave room for possible carry here.)
+     */
+    if (divisor[1] < HALF_NBASE) {
+        int d = NBASE / (divisor[1] + 1);
+
        carry = 0;
-        for (i = 0; i < res_ndigits; i++) {
-            carry = carry * NBASE + dividend[i + 1];
-            res_digits[i] = carry / divisor1;
-            carry = carry % divisor1;
+        for (i = var2ndigits; i > 0; i--) {
+            carry += divisor[i] * d;
+            divisor[i] = carry % NBASE;
+            carry = carry / NBASE;
        }
-    } else {
-        /*
-         * The full multiple-place algorithm is taken from Knuth volume 2,
-         * Algorithm 4.3.1D.
-         *
-         * We need the first divisor digit to be >= NBASE/2.  If it isn't,
-         * make it so by scaling up both the divisor and dividend by the
-         * factor "d".	(The reason for allocating dividend[0] above is to
-         * leave room for possible carry here.)
-         */
-        if (divisor[1] < HALF_NBASE) {
-            int d = NBASE / (divisor[1] + 1);
+        Assert(carry == 0);
+        carry = 0;
+        /* at this point only var1ndigits of dividend can be nonzero */
+        for (i = var1ndigits; i >= 0; i--) {
+            carry += dividend[i] * d;
+            dividend[i] = carry % NBASE;
+            carry = carry / NBASE;
+        }
+        Assert(carry == 0);
+        Assert(divisor[1] >= HALF_NBASE);
+    }
+    /* First 2 divisor digits are used repeatedly in main loop */
+    divisor1 = divisor[1];
+    divisor2 = divisor[2];

-            carry = 0;
-            for (i = var2ndigits; i > 0; i--) {
-                carry += divisor[i] * d;
-                divisor[i] = carry % NBASE;
-                carry = carry / NBASE;
-            }
-            Assert(carry == 0);
-            carry = 0;
-            /* at this point only var1ndigits of dividend can be nonzero */
-            for (i = var1ndigits; i >= 0; i--) {
-                carry += dividend[i] * d;
-                dividend[i] = carry % NBASE;
-                carry = carry / NBASE;
-            }
-            Assert(carry == 0);
-            Assert(divisor[1] >= HALF_NBASE);
-        }
-        /* First 2 divisor digits are used repeatedly in main loop */
-        divisor1 = divisor[1];
-        divisor2 = divisor[2];
+    /*
+     * Begin the main loop.  Each iteration of this loop produces the j'th
+     * quotient digit by dividing dividend[j .. j + var2ndigits] by the
+     * divisor; this is essentially the same as the common manual
+     * procedure for long division.
+     */
+    for (j = 0; j < res_ndigits; j++) {
+        /* Estimate quotient digit from the first two dividend digits */
+        int next2digits = dividend[j] * NBASE + dividend[j + 1];
+        int qhat;

        /*
-         * Begin the main loop.  Each iteration of this loop produces the j'th
-         * quotient digit by dividing dividend[j .. j + var2ndigits] by the
-         * divisor; this is essentially the same as the common manual
-         * procedure for long division.
+         * If next2digits are 0, then quotient digit must be 0 and there's
+         * no need to adjust the working dividend.	It's worth testing
+         * here to fall out ASAP when processing trailing zeroes in a
+         * dividend.
         */
-        for (j = 0; j < res_ndigits; j++) {
-            /* Estimate quotient digit from the first two dividend digits */
-            int next2digits = dividend[j] * NBASE + dividend[j + 1];
-            int qhat;
+        if (next2digits == 0) {
+            res_digits[j] = 0;
+            continue;
+        }
+
+        if (dividend[j] == divisor1)
+            qhat = NBASE - 1;
+        else
+            qhat = next2digits / divisor1;
+
+        /*
+         * Adjust quotient digit if it's too large.  Knuth proves that
+         * after this step, the quotient digit will be either correct or
+         * just one too large.	(Note: it's OK to use dividend[j+2] here
+         * because we know the divisor length is at least 2.)
+         */
+        while (divisor2 * qhat > (next2digits - qhat * divisor1) * NBASE + dividend[j + 2])
+            qhat--;
+
+        /* As above, need do nothing more when quotient digit is 0 */
+        if (qhat > 0) {
+            NumericDigit *dividend_j = &dividend[j];

            /*
-             * If next2digits are 0, then quotient digit must be 0 and there's
-             * no need to adjust the working dividend.	It's worth testing
-             * here to fall out ASAP when processing trailing zeroes in a
-             * dividend.
-             */
-            if (next2digits == 0) {
-                res_digits[j] = 0;
-                continue;
+             * Multiply the divisor by qhat, and subtract that from the
+             * working dividend.  The multiplication and subtraction are
+             * folded together here, noting that qhat <= NBASE (since it might
+             * be one too large), and so the intermediate result "tmp_result"
+             * is in the range [-NBASE^2, NBASE - 1], and "borrow" is in the
+             * range [0, NBASE].
+            */
+            borrow = 0;
+            for (i = var2ndigits; i >= 0; i--)
+            {
+                int tmp_result;
+
+                tmp_result = dividend_j[i] - borrow - divisor[i] * qhat;
+                borrow = (NBASE - 1 - tmp_result) / NBASE;
+                dividend_j[i] = tmp_result + borrow * NBASE;
            }

-            if (dividend[j] == divisor1)
-                qhat = NBASE - 1;
-            else
-                qhat = next2digits / divisor1;
-
            /*
-             * Adjust quotient digit if it's too large.  Knuth proves that
-             * after this step, the quotient digit will be either correct or
-             * just one too large.	(Note: it's OK to use dividend[j+2] here
-             * because we know the divisor length is at least 2.)
+             * If we got a borrow out of the top dividend digit, then indeed
+             * qhat was one too large.  Fix it, and add back the divisor to
+             * correct the working dividend.  (Knuth proves that this will
+             * occur only about 3/NBASE of the time; hence, it's a good idea
+             * to test this code with small NBASE to be sure this section gets
+             * exercised.)
             */
-            while (divisor2 * qhat > (next2digits - qhat * divisor1) * NBASE + dividend[j + 2])
+            if (borrow)
+            {
                qhat--;
-
-            /* As above, need do nothing more when quotient digit is 0 */
-            if (qhat > 0) {
-                /*
-                 * Multiply the divisor by qhat, and subtract that from the
-                 * working dividend.  "carry" tracks the multiplication,
-                 * "borrow" the subtraction (could we fold these together?)
-                 */
                carry = 0;
-                borrow = 0;
-                for (i = var2ndigits; i >= 0; i--) {
-                    carry += divisor[i] * qhat;
-                    borrow -= carry % NBASE;
-                    carry = carry / NBASE;
-                    borrow += dividend[j + i];
-                    if (borrow < 0) {
-                        dividend[j + i] = borrow + NBASE;
-                        borrow = -1;
-                    } else {
-                        dividend[j + i] = borrow;
-                        borrow = 0;
+                for (i = var2ndigits; i >= 0; i--)
+                {
+                    carry += dividend_j[i] + divisor[i];
+                    if (carry >= NBASE)
+                    {
+                        dividend_j[i] = carry - NBASE;
+                        carry = 1;
+                    }
+                    else
+                    {
+                        dividend_j[i] = carry;
+                        carry = 0;
                    }
                }
-                Assert(carry == 0);
-
-                /*
-                 * If we got a borrow out of the top dividend digit, then
-                 * indeed qhat was one too large.  Fix it, and add back the
-                 * divisor to correct the working dividend.  (Knuth proves
-                 * that this will occur only about 3/NBASE of the time; hence,
-                 * it's a good idea to test this code with small NBASE to be
-                 * sure this section gets exercised.)
-                 */
-                if (borrow) {
-                    qhat--;
-                    carry = 0;
-                    for (i = var2ndigits; i >= 0; i--) {
-                        carry += dividend[j + i] + divisor[i];
-                        if (carry >= NBASE) {
-                            dividend[j + i] = carry - NBASE;
-                            carry = 1;
-                        } else {
-                            dividend[j + i] = carry;
-                            carry = 0;
-                        }
-                    }
-                    /* A carry should occur here to cancel the borrow above */
-                    Assert(carry == 1);
-                }
+                /* A carry should occur here to cancel the borrow above */
+                Assert(carry == 1);
            }
-
-            /* And we're done with this quotient digit */
-            res_digits[j] = qhat;
        }
+
+        /* And we're done with this quotient digit */
+        res_digits[j] = qhat;
    }

    pfree_ext(dividend);
@ -5910,6 +5925,116 @@ static void div_var_fast(NumericVar* var1, NumericVar* var2, NumericVar* result,
    strip_var(result);
 }

+/*
+ * div_var_int() -
+ *
+ *	Divide a numeric variable by a 32-bit integer with the specified weight.
+ *	The quotient var / (ival * NBASE^ival_weight) is stored in result.
+ */
+static void
+div_var_int(const NumericVar *var, int ival, int ival_weight,
+			NumericVar *result, int rscale, bool round)
+{
+	NumericDigit *var_digits = var->digits;
+	int			var_ndigits = var->ndigits;
+	int			res_sign;
+	int			res_weight;
+	int			res_ndigits;
+	NumericDigit *res_buf;
+	NumericDigit *res_digits;
+	uint32		divisor;
+	int			i;
+
+	/* Guard against division by zero */
+	if (ival == 0)
+        ereport(ERROR, (errcode(ERRCODE_DIVISION_BY_ZERO), errmsg("division by zero")));
+
+	/* Result zero check */
+	if (var_ndigits == 0)
+	{
+		zero_var(result);
+		result->dscale = rscale;
+		return;
+	}
+
+	/*
+	 * Determine the result sign, weight and number of digits to calculate.
+	 * The weight figured here is correct if the emitted quotient has no
+	 * leading zero digits; otherwise strip_var() will fix things up.
+	 */
+	if (var->sign == NUMERIC_POS)
+		res_sign = ival > 0 ? NUMERIC_POS : NUMERIC_NEG;
+	else
+		res_sign = ival > 0 ? NUMERIC_NEG : NUMERIC_POS;
+	res_weight = var->weight - ival_weight;
+	/* The number of accurate result digits we need to produce: */
+	res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS;
+	/* ... but always at least 1 */
+	res_ndigits = Max(res_ndigits, 1);
+	/* If rounding needed, figure one more digit to ensure correct result */
+	if (round)
+		res_ndigits++;
+
+	res_buf = digitbuf_alloc(res_ndigits + 1);
+	res_buf[0] = 0;				/* spare digit for later rounding */
+	res_digits = res_buf + 1;
+
+	/*
+	 * Now compute the quotient digits.  This is the short division algorithm
+	 * described in Knuth volume 2, section 4.3.1 exercise 16, except that we
+	 * allow the divisor to exceed the internal base.
+	 *
+	 * In this algorithm, the carry from one digit to the next is at most
+	 * divisor - 1.  Therefore, while processing the next digit, carry may
+	 * become as large as divisor * NBASE - 1, and so it requires a 64-bit
+	 * integer if this exceeds UINT_MAX.
+	 */
+	divisor = Abs(ival);
+
+	if (divisor <= UINT_MAX / NBASE)
+	{
+		/* carry cannot overflow 32 bits */
+		uint32		carry = 0;
+
+		for (i = 0; i < res_ndigits; i++)
+		{
+			carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
+			res_digits[i] = (NumericDigit) (carry / divisor);
+			carry = carry % divisor;
+		}
+	}
+	else
+	{
+		/* carry may exceed 32 bits */
+		uint64		carry = 0;
+
+		for (i = 0; i < res_ndigits; i++)
+		{
+			carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
+			res_digits[i] = (NumericDigit) (carry / divisor);
+			carry = carry % divisor;
+		}
+	}
+
+	/* Store the quotient in result */
+	digitbuf_free(result->buf);
+	result->ndigits = res_ndigits;
+	result->buf = res_buf;
+	result->digits = res_digits;
+	result->weight = res_weight;
+	result->sign = res_sign;
+
+	/* Round or truncate to target rscale (and set result->dscale) */
+	if (round)
+		round_var(result, rscale);
+	else
+		trunc_var(result, rscale);
+
+	/* Strip leading/trailing zeroes */
+	strip_var(result);
+}
+
+
 /*
 * Default scale selection for division
 *