llvm-project/libclc/generic/lib/math/clc_fmod.cl

/*
 * Copyright (c) 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#include <clc/clc.h>

#include <math/clc_remainder.h>
#include "../clcmacro.h"
#include "config.h"
#include "math.h"

_CLC_DEF _CLC_OVERLOAD float __clc_fmod(float x, float y)
{
    int ux = as_int(x);
    int ax = ux & EXSIGNBIT_SP32;
    float xa = as_float(ax);
    int sx = ux ^ ax;
    int ex = ax >> EXPSHIFTBITS_SP32;

    int uy = as_int(y);
    int ay = uy & EXSIGNBIT_SP32;
    float ya = as_float(ay);
    int ey = ay >> EXPSHIFTBITS_SP32;

    float xr = as_float(0x3f800000 | (ax & 0x007fffff));
    float yr = as_float(0x3f800000 | (ay & 0x007fffff));
    int c;
    int k = ex - ey;

    while (k > 0) {
        c = xr >= yr;
        xr -= c ? yr : 0.0f;
        xr += xr;
        --k;
    }

    c = xr >= yr;
    xr -= c ? yr : 0.0f;

    int lt = ex < ey;

    xr = lt ? xa : xr;
    yr = lt ? ya : yr;


    float s = as_float(ey << EXPSHIFTBITS_SP32);
    xr *= lt ? 1.0f : s;

    c = ax == ay;
    xr = c ? 0.0f : xr;

    xr = as_float(sx ^ as_int(xr));

    c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 | ay == 0;
    xr = c ? as_float(QNANBITPATT_SP32) : xr;

    return xr;

}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_fmod, float, float);

#ifdef cl_khr_fp64
_CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y)
{
    ulong ux = as_ulong(x);
    ulong ax = ux & ~SIGNBIT_DP64;
    ulong xsgn = ux ^ ax;
    double dx = as_double(ax);
    int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);
    int xexp1 = 11 - (int) clz(ax & MANTBITS_DP64);
    xexp1 = xexp < 1 ? xexp1 : xexp;

    ulong uy = as_ulong(y);
    ulong ay = uy & ~SIGNBIT_DP64;
    double dy = as_double(ay);
    int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);
    int yexp1 = 11 - (int) clz(ay & MANTBITS_DP64);
    yexp1 = yexp < 1 ? yexp1 : yexp;

    // First assume |x| > |y|

    // Set ntimes to the number of times we need to do a
    // partial remainder. If the exponent of x is an exact multiple
    // of 53 larger than the exponent of y, and the mantissa of x is
    // less than the mantissa of y, ntimes will be one too large
    // but it doesn't matter - it just means that we'll go round
    // the loop below one extra time.
    int ntimes = max(0, (xexp1 - yexp1) / 53);
    double w =  ldexp(dy, ntimes * 53);
    w = ntimes == 0 ? dy : w;
    double scale = ntimes == 0 ? 1.0 : 0x1.0p-53;

    // Each time round the loop we compute a partial remainder.
    // This is done by subtracting a large multiple of w
    // from x each time, where w is a scaled up version of y.
    // The subtraction must be performed exactly in quad
    // precision, though the result at each stage can
    // fit exactly in a double precision number.
    int i;
    double t, v, p, pp;

    for (i = 0; i < ntimes; i++) {
        // Compute integral multiplier
        t = trunc(dx / w);

        // Compute w * t in quad precision
        p = w * t;
        pp = fma(w, t, -p);

        // Subtract w * t from dx
        v = dx - p;
        dx = v + (((dx - v) - p) - pp);

        // If t was one too large, dx will be negative. Add back one w.
        dx += dx < 0.0 ? w : 0.0;

        // Scale w down by 2^(-53) for the next iteration
        w *= scale;
    }

    // One more time
    // Variable todd says whether the integer t is odd or not
    t = floor(dx / w);
    long lt = (long)t;
    int todd = lt & 1;

    p = w * t;
    pp = fma(w, t, -p);
    v = dx - p;
    dx = v + (((dx - v) - p) - pp);
    i = dx < 0.0;
    todd ^= i;
    dx += i ? w : 0.0;

    // At this point, dx lies in the range [0,dy)
    double ret = as_double(xsgn ^ as_ulong(dx));
    dx = as_double(ax);

    // Now handle |x| == |y|
    int c = dx == dy;
    t = as_double(xsgn);
    ret = c ? t : ret;

    // Next, handle |x| < |y|
    c = dx < dy;
    ret = c ? x : ret;

    // We don't need anything special for |x| == 0

    // |y| is 0
    c = dy == 0.0;
    ret = c ? as_double(QNANBITPATT_DP64) : ret;

    // y is +-Inf, NaN
    c = yexp > BIASEDEMAX_DP64;
    t = y == y ? x : y;
    ret = c ? t : ret;

    // x is +=Inf, NaN
    c = xexp > BIASEDEMAX_DP64;
    ret = c ? as_double(QNANBITPATT_DP64) : ret;

    return ret;
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_fmod, double, double);
#endif
fmod: Port from amd_builtins Uses only denormal path for fp32. Passes CTS on carrizo and turks. v2: whitespace fix Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> Reviewer: Aaron Watry <awatry@gmail.com> llvm-svn: 329433 2018-04-07 01:43:08 +08:00			`/*`
			`* Copyright (c) 2014 Advanced Micro Devices, Inc.`
			`*`
			`* Permission is hereby granted, free of charge, to any person obtaining a copy`
			`* of this software and associated documentation files (the "Software"), to deal`
			`* in the Software without restriction, including without limitation the rights`
			`* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell`
			`* copies of the Software, and to permit persons to whom the Software is`
			`* furnished to do so, subject to the following conditions:`
			`*`
			`* The above copyright notice and this permission notice shall be included in`
			`* all copies or substantial portions of the Software.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE`
			`* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,`
			`* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN`
			`* THE SOFTWARE.`
			`*/`

			`#include <clc/clc.h>`

			`#include <math/clc_remainder.h>`
			`#include "../clcmacro.h"`
			`#include "config.h"`
			`#include "math.h"`

			`_CLC_DEF _CLC_OVERLOAD float __clc_fmod(float x, float y)`
			`{`
			`int ux = as_int(x);`
			`int ax = ux & EXSIGNBIT_SP32;`
			`float xa = as_float(ax);`
			`int sx = ux ^ ax;`
			`int ex = ax >> EXPSHIFTBITS_SP32;`

			`int uy = as_int(y);`
			`int ay = uy & EXSIGNBIT_SP32;`
			`float ya = as_float(ay);`
			`int ey = ay >> EXPSHIFTBITS_SP32;`

			`float xr = as_float(0x3f800000 \| (ax & 0x007fffff));`
			`float yr = as_float(0x3f800000 \| (ay & 0x007fffff));`
			`int c;`
			`int k = ex - ey;`

			`while (k > 0) {`
			`c = xr >= yr;`
			`xr -= c ? yr : 0.0f;`
			`xr += xr;`
			`--k;`
			`}`

			`c = xr >= yr;`
			`xr -= c ? yr : 0.0f;`

			`int lt = ex < ey;`

			`xr = lt ? xa : xr;`
			`yr = lt ? ya : yr;`


			`float s = as_float(ey << EXPSHIFTBITS_SP32);`
			`xr *= lt ? 1.0f : s;`

			`c = ax == ay;`
			`xr = c ? 0.0f : xr;`

			`xr = as_float(sx ^ as_int(xr));`

			`c = ax > PINFBITPATT_SP32 \| ay > PINFBITPATT_SP32 \| ax == PINFBITPATT_SP32 \| ay == 0;`
			`xr = c ? as_float(QNANBITPATT_SP32) : xr;`

			`return xr;`

			`}`
			`_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_fmod, float, float);`

			`#ifdef cl_khr_fp64`
			`_CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y)`
			`{`
			`ulong ux = as_ulong(x);`
			`ulong ax = ux & ~SIGNBIT_DP64;`
			`ulong xsgn = ux ^ ax;`
			`double dx = as_double(ax);`
			`int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);`
			`int xexp1 = 11 - (int) clz(ax & MANTBITS_DP64);`
			`xexp1 = xexp < 1 ? xexp1 : xexp;`

			`ulong uy = as_ulong(y);`
			`ulong ay = uy & ~SIGNBIT_DP64;`
			`double dy = as_double(ay);`
			`int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);`
			`int yexp1 = 11 - (int) clz(ay & MANTBITS_DP64);`
			`yexp1 = yexp < 1 ? yexp1 : yexp;`

			`// First assume \|x\| > \|y\|`

			`// Set ntimes to the number of times we need to do a`
			`// partial remainder. If the exponent of x is an exact multiple`
			`// of 53 larger than the exponent of y, and the mantissa of x is`
			`// less than the mantissa of y, ntimes will be one too large`
			`// but it doesn't matter - it just means that we'll go round`
			`// the loop below one extra time.`
			`int ntimes = max(0, (xexp1 - yexp1) / 53);`
			`double w = ldexp(dy, ntimes * 53);`
			`w = ntimes == 0 ? dy : w;`
			`double scale = ntimes == 0 ? 1.0 : 0x1.0p-53;`

			`// Each time round the loop we compute a partial remainder.`
			`// This is done by subtracting a large multiple of w`
			`// from x each time, where w is a scaled up version of y.`
			`// The subtraction must be performed exactly in quad`
			`// precision, though the result at each stage can`
			`// fit exactly in a double precision number.`
			`int i;`
			`double t, v, p, pp;`

			`for (i = 0; i < ntimes; i++) {`
			`// Compute integral multiplier`
			`t = trunc(dx / w);`

			`// Compute w * t in quad precision`
			`p = w * t;`
			`pp = fma(w, t, -p);`

			`// Subtract w * t from dx`
			`v = dx - p;`
			`dx = v + (((dx - v) - p) - pp);`

			`// If t was one too large, dx will be negative. Add back one w.`
			`dx += dx < 0.0 ? w : 0.0;`

			`// Scale w down by 2^(-53) for the next iteration`
			`w *= scale;`
			`}`

			`// One more time`
			`// Variable todd says whether the integer t is odd or not`
			`t = floor(dx / w);`
			`long lt = (long)t;`
			`int todd = lt & 1;`

			`p = w * t;`
			`pp = fma(w, t, -p);`
			`v = dx - p;`
			`dx = v + (((dx - v) - p) - pp);`
			`i = dx < 0.0;`
			`todd ^= i;`
			`dx += i ? w : 0.0;`

			`// At this point, dx lies in the range [0,dy)`
			`double ret = as_double(xsgn ^ as_ulong(dx));`
			`dx = as_double(ax);`

			`// Now handle \|x\| == \|y\|`
			`int c = dx == dy;`
			`t = as_double(xsgn);`
			`ret = c ? t : ret;`

			`// Next, handle \|x\| < \|y\|`
			`c = dx < dy;`
			`ret = c ? x : ret;`

			`// We don't need anything special for \|x\| == 0`

			`// \|y\| is 0`
			`c = dy == 0.0;`
			`ret = c ? as_double(QNANBITPATT_DP64) : ret;`

			`// y is +-Inf, NaN`
			`c = yexp > BIASEDEMAX_DP64;`
			`t = y == y ? x : y;`
			`ret = c ? t : ret;`

			`// x is +=Inf, NaN`
			`c = xexp > BIASEDEMAX_DP64;`
			`ret = c ? as_double(QNANBITPATT_DP64) : ret;`

			`return ret;`
			`}`
			`_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_fmod, double, double);`
			`#endif`