forked from OSchip/llvm-project
78 lines
1.8 KiB
C
78 lines
1.8 KiB
C
|
/*
|
||
|
* Single-precision vector cos function.
|
||
|
*
|
||
|
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
* See https://llvm.org/LICENSE.txt for license information.
|
||
|
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
*/
|
||
|
|
||
|
#include "mathlib.h"
|
||
|
#include "v_math.h"
|
||
|
#if V_SUPPORTED
|
||
|
|
||
|
static const float Poly[] = {
|
||
|
/* 1.886 ulp error */
|
||
|
0x1.5b2e76p-19f,
|
||
|
-0x1.9f42eap-13f,
|
||
|
0x1.110df4p-7f,
|
||
|
-0x1.555548p-3f,
|
||
|
};
|
||
|
#define Pi1 v_f32 (0x1.921fb6p+1f)
|
||
|
#define Pi2 v_f32 (-0x1.777a5cp-24f)
|
||
|
#define Pi3 v_f32 (-0x1.ee59dap-49f)
|
||
|
#define A3 v_f32 (Poly[3])
|
||
|
#define A5 v_f32 (Poly[2])
|
||
|
#define A7 v_f32 (Poly[1])
|
||
|
#define A9 v_f32 (Poly[0])
|
||
|
#define RangeVal v_f32 (0x1p20f)
|
||
|
#define InvPi v_f32 (0x1.45f306p-2f)
|
||
|
#define Shift v_f32 (0x1.8p+23f)
|
||
|
#define AbsMask v_u32 (0x7fffffff)
|
||
|
#define HalfPi v_f32 (0x1.921fb6p0f)
|
||
|
|
||
|
VPCS_ATTR
|
||
|
static v_f32_t
|
||
|
specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
|
||
|
{
|
||
|
/* Fall back to scalar code. */
|
||
|
return v_call_f32 (cosf, x, y, cmp);
|
||
|
}
|
||
|
|
||
|
VPCS_ATTR
|
||
|
v_f32_t
|
||
|
V_NAME(cosf) (v_f32_t x)
|
||
|
{
|
||
|
v_f32_t n, r, r2, y;
|
||
|
v_u32_t odd, cmp;
|
||
|
|
||
|
r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
|
||
|
cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
|
||
|
|
||
|
/* n = rint((|x|+pi/2)/pi) - 0.5 */
|
||
|
n = v_fma_f32 (InvPi, r + HalfPi, Shift);
|
||
|
odd = v_as_u32_f32 (n) << 31;
|
||
|
n -= Shift;
|
||
|
n -= v_f32 (0.5f);
|
||
|
|
||
|
/* r = |x| - n*pi (range reduction into -pi/2 .. pi/2) */
|
||
|
r = v_fma_f32 (-Pi1, n, r);
|
||
|
r = v_fma_f32 (-Pi2, n, r);
|
||
|
r = v_fma_f32 (-Pi3, n, r);
|
||
|
|
||
|
/* y = sin(r) */
|
||
|
r2 = r * r;
|
||
|
y = v_fma_f32 (A9, r2, A7);
|
||
|
y = v_fma_f32 (y, r2, A5);
|
||
|
y = v_fma_f32 (y, r2, A3);
|
||
|
y = v_fma_f32 (y * r2, r, r);
|
||
|
|
||
|
/* sign fix */
|
||
|
y = v_as_f32_u32 (v_as_u32_f32 (y) ^ odd);
|
||
|
|
||
|
if (unlikely (v_any_u32 (cmp)))
|
||
|
return specialcase (x, y, cmp);
|
||
|
return y;
|
||
|
}
|
||
|
VPCS_ALIAS
|
||
|
#endif
|