forked from OSchip/llvm-project
add strict float for round operation
Differential Revision: https://reviews.llvm.org/D72026
This commit is contained in:
parent
d2bb8c16e7
commit
8af492ade1
|
@ -897,27 +897,50 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
|
|||
continue;
|
||||
}
|
||||
case ISD::FCEIL:
|
||||
case ISD::STRICT_FCEIL:
|
||||
case ISD::FFLOOR:
|
||||
case ISD::STRICT_FFLOOR:
|
||||
case ISD::FTRUNC:
|
||||
case ISD::STRICT_FTRUNC:
|
||||
case ISD::FNEARBYINT:
|
||||
case ISD::FRINT: {
|
||||
case ISD::STRICT_FNEARBYINT:
|
||||
case ISD::FRINT:
|
||||
case ISD::STRICT_FRINT: {
|
||||
// Replace fp rounding with their X86 specific equivalent so we don't
|
||||
// need 2 sets of patterns.
|
||||
unsigned Imm;
|
||||
switch (N->getOpcode()) {
|
||||
default: llvm_unreachable("Unexpected opcode!");
|
||||
case ISD::STRICT_FCEIL:
|
||||
case ISD::FCEIL: Imm = 0xA; break;
|
||||
case ISD::STRICT_FFLOOR:
|
||||
case ISD::FFLOOR: Imm = 0x9; break;
|
||||
case ISD::STRICT_FTRUNC:
|
||||
case ISD::FTRUNC: Imm = 0xB; break;
|
||||
case ISD::STRICT_FNEARBYINT:
|
||||
case ISD::FNEARBYINT: Imm = 0xC; break;
|
||||
case ISD::STRICT_FRINT:
|
||||
case ISD::FRINT: Imm = 0x4; break;
|
||||
}
|
||||
SDLoc dl(N);
|
||||
SDValue Res = CurDAG->getNode(
|
||||
X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0),
|
||||
CurDAG->getTargetConstant(Imm, dl, MVT::i8));
|
||||
bool IsStrict = N->isStrictFPOpcode();
|
||||
SDValue Res;
|
||||
if (IsStrict)
|
||||
Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl,
|
||||
{N->getValueType(0), MVT::Other},
|
||||
{N->getOperand(0), N->getOperand(1),
|
||||
CurDAG->getTargetConstant(Imm, dl, MVT::i8)});
|
||||
else
|
||||
Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0),
|
||||
N->getOperand(0),
|
||||
CurDAG->getTargetConstant(Imm, dl, MVT::i8));
|
||||
--I;
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
|
||||
if (IsStrict) {
|
||||
SDValue From[] = {SDValue(N, 0), SDValue(N, 1)};
|
||||
SDValue To[] = {Res.getValue(0), Res.getValue(1)};
|
||||
CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
|
||||
} else
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
|
||||
++I;
|
||||
CurDAG->DeleteNode(N);
|
||||
continue;
|
||||
|
|
|
@ -1068,11 +1068,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
|
||||
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
|
||||
for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
|
||||
setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
|
||||
setOperationAction(ISD::FCEIL, RoundedTy, Legal);
|
||||
setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
|
||||
setOperationAction(ISD::FRINT, RoundedTy, Legal);
|
||||
setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
|
||||
setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
|
||||
setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
|
||||
setOperationAction(ISD::FCEIL, RoundedTy, Legal);
|
||||
setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
|
||||
setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
|
||||
setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
|
||||
setOperationAction(ISD::FRINT, RoundedTy, Legal);
|
||||
setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
|
||||
setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
|
||||
setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
|
||||
|
@ -1144,14 +1149,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
: &X86::VR256RegClass);
|
||||
|
||||
for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
|
||||
setOperationAction(ISD::FFLOOR, VT, Legal);
|
||||
setOperationAction(ISD::FCEIL, VT, Legal);
|
||||
setOperationAction(ISD::FTRUNC, VT, Legal);
|
||||
setOperationAction(ISD::FRINT, VT, Legal);
|
||||
setOperationAction(ISD::FNEARBYINT, VT, Legal);
|
||||
setOperationAction(ISD::FNEG, VT, Custom);
|
||||
setOperationAction(ISD::FABS, VT, Custom);
|
||||
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
|
||||
setOperationAction(ISD::FFLOOR, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
|
||||
setOperationAction(ISD::FCEIL, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
|
||||
setOperationAction(ISD::FTRUNC, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
|
||||
setOperationAction(ISD::FRINT, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FRINT, VT, Legal);
|
||||
setOperationAction(ISD::FNEARBYINT, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
|
||||
setOperationAction(ISD::FNEG, VT, Custom);
|
||||
setOperationAction(ISD::FABS, VT, Custom);
|
||||
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
|
||||
}
|
||||
|
||||
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
|
||||
|
@ -1503,11 +1513,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
|
||||
|
||||
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
|
||||
setOperationAction(ISD::FFLOOR, VT, Legal);
|
||||
setOperationAction(ISD::FCEIL, VT, Legal);
|
||||
setOperationAction(ISD::FTRUNC, VT, Legal);
|
||||
setOperationAction(ISD::FRINT, VT, Legal);
|
||||
setOperationAction(ISD::FNEARBYINT, VT, Legal);
|
||||
setOperationAction(ISD::FFLOOR, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
|
||||
setOperationAction(ISD::FCEIL, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
|
||||
setOperationAction(ISD::FTRUNC, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
|
||||
setOperationAction(ISD::FRINT, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FRINT, VT, Legal);
|
||||
setOperationAction(ISD::FNEARBYINT, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
|
||||
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
}
|
||||
|
@ -29650,6 +29665,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H";
|
||||
case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L";
|
||||
case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
|
||||
case X86ISD::STRICT_VRNDSCALE: return "X86ISD::STRICT_VRNDSCALE";
|
||||
case X86ISD::VRNDSCALE_SAE: return "X86ISD::VRNDSCALE_SAE";
|
||||
case X86ISD::VRNDSCALES: return "X86ISD::VRNDSCALES";
|
||||
case X86ISD::VRNDSCALES_SAE: return "X86ISD::VRNDSCALES_SAE";
|
||||
|
|
|
@ -424,7 +424,7 @@ namespace llvm {
|
|||
// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
|
||||
// Also used by the legacy (V)ROUND intrinsics where we mask out the
|
||||
// scaling part of the immediate.
|
||||
VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
|
||||
VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE, STRICT_VRNDSCALE,
|
||||
// Tests Types Of a FP Values for packed types.
|
||||
VFPCLASS,
|
||||
// Tests Types Of a FP Values for scalar types.
|
||||
|
|
|
@ -9019,13 +9019,13 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
|
|||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2),
|
||||
def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
|
||||
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
|
||||
_.FRC:$src1, timm:$src2))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512, OptForSize] in {
|
||||
def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
|
||||
def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
|
||||
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
|
||||
addr:$src1, timm:$src2))>;
|
||||
}
|
||||
|
@ -10290,7 +10290,7 @@ defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56
|
|||
X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
|
||||
AVX512AIi8Base, EVEX;
|
||||
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
|
||||
X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
|
||||
X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
|
||||
AVX512AIi8Base, EVEX;
|
||||
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
|
||||
X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
|
||||
|
|
|
@ -466,6 +466,12 @@ def X86VRangeSAE : SDNode<"X86ISD::VRANGE_SAE", SDTFPBinOpImm>;
|
|||
def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>;
|
||||
def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>;
|
||||
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>;
|
||||
def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm,
|
||||
[SDNPHasChain]>;
|
||||
def X86any_VRndScale : PatFrags<(ops node:$src1, node:$src2),
|
||||
[(X86strict_VRndScale node:$src1, node:$src2),
|
||||
(X86VRndScale node:$src1, node:$src2)]>;
|
||||
|
||||
def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>;
|
||||
def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImm>;
|
||||
def X86VGetMantSAE : SDNode<"X86ISD::VGETMANT_SAE", SDTFPUnaryOpImm>;
|
||||
|
|
|
@ -5540,19 +5540,19 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in {
|
||||
// Intrinsic form
|
||||
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
|
||||
loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>,
|
||||
loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>,
|
||||
VEX, VEX_WIG;
|
||||
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
|
||||
loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>,
|
||||
loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>,
|
||||
VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {
|
||||
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
|
||||
loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>,
|
||||
loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>,
|
||||
VEX, VEX_WIG;
|
||||
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
|
||||
loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>,
|
||||
loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>,
|
||||
VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
}
|
||||
|
@ -5565,25 +5565,25 @@ let Predicates = [UseAVX] in {
|
|||
}
|
||||
|
||||
let Predicates = [UseAVX] in {
|
||||
def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
|
||||
def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
|
||||
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>;
|
||||
def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
|
||||
def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
|
||||
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseAVX, OptForSize] in {
|
||||
def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
|
||||
def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
|
||||
(VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
|
||||
def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
|
||||
def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
|
||||
(VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
|
||||
memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>;
|
||||
memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
|
||||
memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>;
|
||||
memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>;
|
||||
|
||||
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>;
|
||||
|
||||
|
@ -5592,16 +5592,16 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
|
|||
v4f32, v2f64, X86RndScales>;
|
||||
|
||||
let Predicates = [UseSSE41] in {
|
||||
def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
|
||||
def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
|
||||
(ROUNDSSr FR32:$src1, timm:$src2)>;
|
||||
def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
|
||||
def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
|
||||
(ROUNDSDr FR64:$src1, timm:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseSSE41, OptForSize] in {
|
||||
def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
|
||||
def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
|
||||
(ROUNDSSm addr:$src1, timm:$src2)>;
|
||||
def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
|
||||
def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
|
||||
(ROUNDSDm addr:$src1, timm:$src2)>;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,474 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41,SSE41-X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41,SSE41-X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86,AVX512-X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64,AVX512-X64
|
||||
|
||||
declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
|
||||
declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
|
||||
declare float @llvm.experimental.constrained.floor.f32(float, metadata)
|
||||
declare double @llvm.experimental.constrained.floor.f64(double, metadata)
|
||||
declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
|
||||
declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
|
||||
declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
|
||||
|
||||
define float @fceil32(float %f) #0 {
|
||||
; SSE41-X86-LABEL: fceil32:
|
||||
; SSE41-X86: # %bb.0:
|
||||
; SSE41-X86-NEXT: pushl %eax
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE41-X86-NEXT: roundss $10, %xmm0, %xmm0
|
||||
; SSE41-X86-NEXT: movss %xmm0, (%esp)
|
||||
; SSE41-X86-NEXT: flds (%esp)
|
||||
; SSE41-X86-NEXT: popl %eax
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; SSE41-X86-NEXT: retl
|
||||
;
|
||||
; SSE41-X64-LABEL: fceil32:
|
||||
; SSE41-X64: # %bb.0:
|
||||
; SSE41-X64-NEXT: roundss $10, %xmm0, %xmm0
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X86-LABEL: fceil32:
|
||||
; AVX-X86: # %bb.0:
|
||||
; AVX-X86-NEXT: pushl %eax
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-X86-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX-X86-NEXT: flds (%esp)
|
||||
; AVX-X86-NEXT: popl %eax
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; AVX-X86-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: fceil32:
|
||||
; AVX-X64: # %bb.0:
|
||||
; AVX-X64-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X64-NEXT: retq
|
||||
%res = call float @llvm.experimental.constrained.ceil.f32(
|
||||
float %f, metadata !"fpexcept.strict")
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @fceilf64(double %f) #0 {
|
||||
; SSE41-X86-LABEL: fceilf64:
|
||||
; SSE41-X86: # %bb.0:
|
||||
; SSE41-X86-NEXT: pushl %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
|
||||
; SSE41-X86-NEXT: movl %esp, %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; SSE41-X86-NEXT: andl $-8, %esp
|
||||
; SSE41-X86-NEXT: subl $8, %esp
|
||||
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE41-X86-NEXT: roundsd $10, %xmm0, %xmm0
|
||||
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
|
||||
; SSE41-X86-NEXT: fldl (%esp)
|
||||
; SSE41-X86-NEXT: movl %ebp, %esp
|
||||
; SSE41-X86-NEXT: popl %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; SSE41-X86-NEXT: retl
|
||||
;
|
||||
; SSE41-X64-LABEL: fceilf64:
|
||||
; SSE41-X64: # %bb.0:
|
||||
; SSE41-X64-NEXT: roundsd $10, %xmm0, %xmm0
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X86-LABEL: fceilf64:
|
||||
; AVX-X86: # %bb.0:
|
||||
; AVX-X86-NEXT: pushl %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-X86-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX-X86-NEXT: movl %esp, %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX-X86-NEXT: andl $-8, %esp
|
||||
; AVX-X86-NEXT: subl $8, %esp
|
||||
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
|
||||
; AVX-X86-NEXT: fldl (%esp)
|
||||
; AVX-X86-NEXT: movl %ebp, %esp
|
||||
; AVX-X86-NEXT: popl %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX-X86-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: fceilf64:
|
||||
; AVX-X64: # %bb.0:
|
||||
; AVX-X64-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X64-NEXT: retq
|
||||
%res = call double @llvm.experimental.constrained.ceil.f64(
|
||||
double %f, metadata !"fpexcept.strict")
|
||||
ret double %res
|
||||
}
|
||||
|
||||
define float @ffloor32(float %f) #0 {
|
||||
; SSE41-X86-LABEL: ffloor32:
|
||||
; SSE41-X86: # %bb.0:
|
||||
; SSE41-X86-NEXT: pushl %eax
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE41-X86-NEXT: roundss $9, %xmm0, %xmm0
|
||||
; SSE41-X86-NEXT: movss %xmm0, (%esp)
|
||||
; SSE41-X86-NEXT: flds (%esp)
|
||||
; SSE41-X86-NEXT: popl %eax
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; SSE41-X86-NEXT: retl
|
||||
;
|
||||
; SSE41-X64-LABEL: ffloor32:
|
||||
; SSE41-X64: # %bb.0:
|
||||
; SSE41-X64-NEXT: roundss $9, %xmm0, %xmm0
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X86-LABEL: ffloor32:
|
||||
; AVX-X86: # %bb.0:
|
||||
; AVX-X86-NEXT: pushl %eax
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-X86-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX-X86-NEXT: flds (%esp)
|
||||
; AVX-X86-NEXT: popl %eax
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; AVX-X86-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: ffloor32:
|
||||
; AVX-X64: # %bb.0:
|
||||
; AVX-X64-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X64-NEXT: retq
|
||||
%res = call float @llvm.experimental.constrained.floor.f32(
|
||||
float %f, metadata !"fpexcept.strict")
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @ffloorf64(double %f) #0 {
|
||||
; SSE41-X86-LABEL: ffloorf64:
|
||||
; SSE41-X86: # %bb.0:
|
||||
; SSE41-X86-NEXT: pushl %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
|
||||
; SSE41-X86-NEXT: movl %esp, %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; SSE41-X86-NEXT: andl $-8, %esp
|
||||
; SSE41-X86-NEXT: subl $8, %esp
|
||||
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE41-X86-NEXT: roundsd $9, %xmm0, %xmm0
|
||||
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
|
||||
; SSE41-X86-NEXT: fldl (%esp)
|
||||
; SSE41-X86-NEXT: movl %ebp, %esp
|
||||
; SSE41-X86-NEXT: popl %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; SSE41-X86-NEXT: retl
|
||||
;
|
||||
; SSE41-X64-LABEL: ffloorf64:
|
||||
; SSE41-X64: # %bb.0:
|
||||
; SSE41-X64-NEXT: roundsd $9, %xmm0, %xmm0
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X86-LABEL: ffloorf64:
|
||||
; AVX-X86: # %bb.0:
|
||||
; AVX-X86-NEXT: pushl %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-X86-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX-X86-NEXT: movl %esp, %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX-X86-NEXT: andl $-8, %esp
|
||||
; AVX-X86-NEXT: subl $8, %esp
|
||||
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
|
||||
; AVX-X86-NEXT: fldl (%esp)
|
||||
; AVX-X86-NEXT: movl %ebp, %esp
|
||||
; AVX-X86-NEXT: popl %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX-X86-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: ffloorf64:
|
||||
; AVX-X64: # %bb.0:
|
||||
; AVX-X64-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X64-NEXT: retq
|
||||
%res = call double @llvm.experimental.constrained.floor.f64(
|
||||
double %f, metadata !"fpexcept.strict")
|
||||
ret double %res
|
||||
}
|
||||
|
||||
define float @ftrunc32(float %f) #0 {
|
||||
; SSE41-X86-LABEL: ftrunc32:
|
||||
; SSE41-X86: # %bb.0:
|
||||
; SSE41-X86-NEXT: pushl %eax
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE41-X86-NEXT: roundss $11, %xmm0, %xmm0
|
||||
; SSE41-X86-NEXT: movss %xmm0, (%esp)
|
||||
; SSE41-X86-NEXT: flds (%esp)
|
||||
; SSE41-X86-NEXT: popl %eax
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; SSE41-X86-NEXT: retl
|
||||
;
|
||||
; SSE41-X64-LABEL: ftrunc32:
|
||||
; SSE41-X64: # %bb.0:
|
||||
; SSE41-X64-NEXT: roundss $11, %xmm0, %xmm0
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X86-LABEL: ftrunc32:
|
||||
; AVX-X86: # %bb.0:
|
||||
; AVX-X86-NEXT: pushl %eax
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX-X86-NEXT: flds (%esp)
|
||||
; AVX-X86-NEXT: popl %eax
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; AVX-X86-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: ftrunc32:
|
||||
; AVX-X64: # %bb.0:
|
||||
; AVX-X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X64-NEXT: retq
|
||||
%res = call float @llvm.experimental.constrained.trunc.f32(
|
||||
float %f, metadata !"fpexcept.strict")
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @ftruncf64(double %f) #0 {
|
||||
; SSE41-X86-LABEL: ftruncf64:
|
||||
; SSE41-X86: # %bb.0:
|
||||
; SSE41-X86-NEXT: pushl %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
|
||||
; SSE41-X86-NEXT: movl %esp, %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; SSE41-X86-NEXT: andl $-8, %esp
|
||||
; SSE41-X86-NEXT: subl $8, %esp
|
||||
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE41-X86-NEXT: roundsd $11, %xmm0, %xmm0
|
||||
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
|
||||
; SSE41-X86-NEXT: fldl (%esp)
|
||||
; SSE41-X86-NEXT: movl %ebp, %esp
|
||||
; SSE41-X86-NEXT: popl %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; SSE41-X86-NEXT: retl
|
||||
;
|
||||
; SSE41-X64-LABEL: ftruncf64:
|
||||
; SSE41-X64: # %bb.0:
|
||||
; SSE41-X64-NEXT: roundsd $11, %xmm0, %xmm0
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X86-LABEL: ftruncf64:
|
||||
; AVX-X86: # %bb.0:
|
||||
; AVX-X86-NEXT: pushl %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-X86-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX-X86-NEXT: movl %esp, %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX-X86-NEXT: andl $-8, %esp
|
||||
; AVX-X86-NEXT: subl $8, %esp
|
||||
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
|
||||
; AVX-X86-NEXT: fldl (%esp)
|
||||
; AVX-X86-NEXT: movl %ebp, %esp
|
||||
; AVX-X86-NEXT: popl %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX-X86-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: ftruncf64:
|
||||
; AVX-X64: # %bb.0:
|
||||
; AVX-X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X64-NEXT: retq
|
||||
%res = call double @llvm.experimental.constrained.trunc.f64(
|
||||
double %f, metadata !"fpexcept.strict")
|
||||
ret double %res
|
||||
}
|
||||
|
||||
define float @frint32(float %f) #0 {
|
||||
; SSE41-X86-LABEL: frint32:
|
||||
; SSE41-X86: # %bb.0:
|
||||
; SSE41-X86-NEXT: pushl %eax
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE41-X86-NEXT: roundss $4, %xmm0, %xmm0
|
||||
; SSE41-X86-NEXT: movss %xmm0, (%esp)
|
||||
; SSE41-X86-NEXT: flds (%esp)
|
||||
; SSE41-X86-NEXT: popl %eax
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; SSE41-X86-NEXT: retl
|
||||
;
|
||||
; SSE41-X64-LABEL: frint32:
|
||||
; SSE41-X64: # %bb.0:
|
||||
; SSE41-X64-NEXT: roundss $4, %xmm0, %xmm0
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X86-LABEL: frint32:
|
||||
; AVX-X86: # %bb.0:
|
||||
; AVX-X86-NEXT: pushl %eax
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-X86-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX-X86-NEXT: flds (%esp)
|
||||
; AVX-X86-NEXT: popl %eax
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; AVX-X86-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: frint32:
|
||||
; AVX-X64: # %bb.0:
|
||||
; AVX-X64-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X64-NEXT: retq
|
||||
%res = call float @llvm.experimental.constrained.rint.f32(
|
||||
float %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @frintf64(double %f) #0 {
|
||||
; SSE41-X86-LABEL: frintf64:
|
||||
; SSE41-X86: # %bb.0:
|
||||
; SSE41-X86-NEXT: pushl %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
|
||||
; SSE41-X86-NEXT: movl %esp, %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; SSE41-X86-NEXT: andl $-8, %esp
|
||||
; SSE41-X86-NEXT: subl $8, %esp
|
||||
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE41-X86-NEXT: roundsd $4, %xmm0, %xmm0
|
||||
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
|
||||
; SSE41-X86-NEXT: fldl (%esp)
|
||||
; SSE41-X86-NEXT: movl %ebp, %esp
|
||||
; SSE41-X86-NEXT: popl %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; SSE41-X86-NEXT: retl
|
||||
;
|
||||
; SSE41-X64-LABEL: frintf64:
|
||||
; SSE41-X64: # %bb.0:
|
||||
; SSE41-X64-NEXT: roundsd $4, %xmm0, %xmm0
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X86-LABEL: frintf64:
|
||||
; AVX-X86: # %bb.0:
|
||||
; AVX-X86-NEXT: pushl %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-X86-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX-X86-NEXT: movl %esp, %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX-X86-NEXT: andl $-8, %esp
|
||||
; AVX-X86-NEXT: subl $8, %esp
|
||||
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
|
||||
; AVX-X86-NEXT: fldl (%esp)
|
||||
; AVX-X86-NEXT: movl %ebp, %esp
|
||||
; AVX-X86-NEXT: popl %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX-X86-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: frintf64:
|
||||
; AVX-X64: # %bb.0:
|
||||
; AVX-X64-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X64-NEXT: retq
|
||||
%res = call double @llvm.experimental.constrained.rint.f64(
|
||||
double %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret double %res
|
||||
}
|
||||
|
||||
define float @fnearbyint32(float %f) #0 {
|
||||
; SSE41-X86-LABEL: fnearbyint32:
|
||||
; SSE41-X86: # %bb.0:
|
||||
; SSE41-X86-NEXT: pushl %eax
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE41-X86-NEXT: roundss $12, %xmm0, %xmm0
|
||||
; SSE41-X86-NEXT: movss %xmm0, (%esp)
|
||||
; SSE41-X86-NEXT: flds (%esp)
|
||||
; SSE41-X86-NEXT: popl %eax
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; SSE41-X86-NEXT: retl
|
||||
;
|
||||
; SSE41-X64-LABEL: fnearbyint32:
|
||||
; SSE41-X64: # %bb.0:
|
||||
; SSE41-X64-NEXT: roundss $12, %xmm0, %xmm0
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X86-LABEL: fnearbyint32:
|
||||
; AVX-X86: # %bb.0:
|
||||
; AVX-X86-NEXT: pushl %eax
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-X86-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX-X86-NEXT: flds (%esp)
|
||||
; AVX-X86-NEXT: popl %eax
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; AVX-X86-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: fnearbyint32:
|
||||
; AVX-X64: # %bb.0:
|
||||
; AVX-X64-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X64-NEXT: retq
|
||||
%res = call float @llvm.experimental.constrained.nearbyint.f32(
|
||||
float %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @fnearbyintf64(double %f) #0 {
|
||||
; SSE41-X86-LABEL: fnearbyintf64:
|
||||
; SSE41-X86: # %bb.0:
|
||||
; SSE41-X86-NEXT: pushl %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
|
||||
; SSE41-X86-NEXT: movl %esp, %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; SSE41-X86-NEXT: andl $-8, %esp
|
||||
; SSE41-X86-NEXT: subl $8, %esp
|
||||
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE41-X86-NEXT: roundsd $12, %xmm0, %xmm0
|
||||
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
|
||||
; SSE41-X86-NEXT: fldl (%esp)
|
||||
; SSE41-X86-NEXT: movl %ebp, %esp
|
||||
; SSE41-X86-NEXT: popl %ebp
|
||||
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; SSE41-X86-NEXT: retl
|
||||
;
|
||||
; SSE41-X64-LABEL: fnearbyintf64:
|
||||
; SSE41-X64: # %bb.0:
|
||||
; SSE41-X64-NEXT: roundsd $12, %xmm0, %xmm0
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X86-LABEL: fnearbyintf64:
|
||||
; AVX-X86: # %bb.0:
|
||||
; AVX-X86-NEXT: pushl %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-X86-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX-X86-NEXT: movl %esp, %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX-X86-NEXT: andl $-8, %esp
|
||||
; AVX-X86-NEXT: subl $8, %esp
|
||||
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
|
||||
; AVX-X86-NEXT: fldl (%esp)
|
||||
; AVX-X86-NEXT: movl %ebp, %esp
|
||||
; AVX-X86-NEXT: popl %ebp
|
||||
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX-X86-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: fnearbyintf64:
|
||||
; AVX-X64: # %bb.0:
|
||||
; AVX-X64-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
|
||||
; AVX-X64-NEXT: retq
|
||||
%res = call double @llvm.experimental.constrained.nearbyint.f64(
|
||||
double %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret double %res
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
|
@ -18,6 +18,16 @@ declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float
|
|||
declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)
|
||||
declare <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, metadata, metadata)
|
||||
declare <8 x float> @llvm.experimental.constrained.ceil.v8f32(<8 x float>, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata)
|
||||
declare <8 x float> @llvm.experimental.constrained.floor.v8f32(<8 x float>, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata)
|
||||
declare <8 x float> @llvm.experimental.constrained.trunc.v8f32(<8 x float>, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata)
|
||||
declare <8 x float> @llvm.experimental.constrained.rint.v8f32(<8 x float>, metadata, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
|
||||
declare <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float>, metadata, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
|
||||
|
||||
define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 {
|
||||
; CHECK-LABEL: f1:
|
||||
|
@ -178,4 +188,111 @@ define <4 x double> @f14(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 {
|
|||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
define <8 x float> @fceilv8f32(<8 x float> %f) #0 {
|
||||
; CHECK-LABEL: fceilv8f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundps $10, %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x float> @llvm.experimental.constrained.ceil.v8f32(
|
||||
<8 x float> %f, metadata !"fpexcept.strict")
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <4 x double> @fceilv4f64(<4 x double> %f) #0 {
|
||||
; CHECK-LABEL: fceilv4f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundpd $10, %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <4 x double> @llvm.experimental.constrained.ceil.v4f64(
|
||||
<4 x double> %f, metadata !"fpexcept.strict")
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
define <8 x float> @ffloorv8f32(<8 x float> %f) #0 {
|
||||
; CHECK-LABEL: ffloorv8f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundps $9, %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x float> @llvm.experimental.constrained.floor.v8f32(
|
||||
<8 x float> %f, metadata !"fpexcept.strict")
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <4 x double> @ffloorv4f64(<4 x double> %f) #0 {
|
||||
; CHECK-LABEL: ffloorv4f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundpd $9, %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <4 x double> @llvm.experimental.constrained.floor.v4f64(
|
||||
<4 x double> %f, metadata !"fpexcept.strict")
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
|
||||
define <8 x float> @ftruncv8f32(<8 x float> %f) #0 {
|
||||
; CHECK-LABEL: ftruncv8f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundps $11, %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x float> @llvm.experimental.constrained.trunc.v8f32(
|
||||
<8 x float> %f, metadata !"fpexcept.strict")
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <4 x double> @ftruncv4f64(<4 x double> %f) #0 {
|
||||
; CHECK-LABEL: ftruncv4f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <4 x double> @llvm.experimental.constrained.trunc.v4f64(
|
||||
<4 x double> %f, metadata !"fpexcept.strict")
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
|
||||
define <8 x float> @frintv8f32(<8 x float> %f) #0 {
|
||||
; CHECK-LABEL: frintv8f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundps $4, %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x float> @llvm.experimental.constrained.rint.v8f32(
|
||||
<8 x float> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <4 x double> @frintv4f64(<4 x double> %f) #0 {
|
||||
; CHECK-LABEL: frintv4f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundpd $4, %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
|
||||
<4 x double> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
|
||||
define <8 x float> @fnearbyintv8f32(<8 x float> %f) #0 {
|
||||
; CHECK-LABEL: fnearbyintv8f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundps $12, %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(
|
||||
<8 x float> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <4 x double> @fnearbyintv4f64(<4 x double> %f) #0 {
|
||||
; CHECK-LABEL: fnearbyintv4f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundpd $12, %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
|
||||
<4 x double> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
|
|
@ -16,6 +16,17 @@ declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(<8 x float
|
|||
declare <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double>, metadata, metadata)
|
||||
declare <8 x double> @llvm.experimental.constrained.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, metadata, metadata)
|
||||
declare <16 x float> @llvm.experimental.constrained.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, metadata, metadata)
|
||||
declare <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float>, metadata)
|
||||
declare <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double>, metadata)
|
||||
declare <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float>, metadata)
|
||||
declare <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double>, metadata)
|
||||
declare <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float>, metadata)
|
||||
declare <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double>, metadata)
|
||||
declare <16 x float> @llvm.experimental.constrained.rint.v16f32(<16 x float>, metadata, metadata)
|
||||
declare <8 x double> @llvm.experimental.constrained.rint.v8f64(<8 x double>, metadata, metadata)
|
||||
declare <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float>, metadata, metadata)
|
||||
declare <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double>, metadata, metadata)
|
||||
|
||||
|
||||
define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 {
|
||||
; CHECK-LABEL: f1:
|
||||
|
@ -175,4 +186,98 @@ define <8 x double> @f14(<8 x double> %a, <8 x double> %b, <8 x double> %c) #0 {
|
|||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @strict_vector_fceil_v16f32(<16 x float> %f) #0 {
|
||||
; CHECK-LABEL: strict_vector_fceil_v16f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float> %f, metadata !"fpexcept.strict")
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @strict_vector_fceil_v8f64(<8 x double> %f) #0 {
|
||||
; CHECK-LABEL: strict_vector_fceil_v8f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double> %f, metadata !"fpexcept.strict")
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @strict_vector_ffloor_v16f32(<16 x float> %f) #0 {
|
||||
; CHECK-LABEL: strict_vector_ffloor_v16f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float> %f, metadata !"fpexcept.strict")
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @strict_vector_ffloor_v8f64(<8 x double> %f) #0 {
|
||||
; CHECK-LABEL: strict_vector_ffloor_v8f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double> %f, metadata !"fpexcept.strict")
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @strict_vector_ftrunc_v16f32(<16 x float> %f) #0 {
|
||||
; CHECK-LABEL: strict_vector_ftrunc_v16f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float> %f, metadata !"fpexcept.strict")
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @strict_vector_ftrunc_v8f64(<8 x double> %f) #0 {
|
||||
; CHECK-LABEL: strict_vector_ftrunc_v8f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double> %f, metadata !"fpexcept.strict")
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @strict_vector_frint_v16f32(<16 x float> %f) #0 {
|
||||
; CHECK-LABEL: strict_vector_frint_v16f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <16 x float> @llvm.experimental.constrained.rint.v16f32(<16 x float> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @strict_vector_frint_v8f64(<8 x double> %f) #0 {
|
||||
; CHECK-LABEL: strict_vector_frint_v8f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x double> @llvm.experimental.constrained.rint.v8f64(<8 x double> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @strict_vector_fnearbyint_v16f32(<16 x float> %f) #0 {
|
||||
; CHECK-LABEL: strict_vector_fnearbyint_v16f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @strict_vector_fnearbyint_v8f64(<8 x double> %f) #0 {
|
||||
; CHECK-LABEL: strict_vector_fnearbyint_v8f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
|
|
@ -0,0 +1,174 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
|
||||
|
||||
declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata)
|
||||
declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata)
|
||||
declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata)
|
||||
declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
|
||||
declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
|
||||
|
||||
define <4 x float> @fceilv4f32(<4 x float> %f) #0 {
|
||||
; SSE41-LABEL: fceilv4f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: roundps $10, %xmm0, %xmm0
|
||||
; SSE41-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX-LABEL: fceilv4f32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vroundps $10, %xmm0, %xmm0
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(
|
||||
<4 x float> %f, metadata !"fpexcept.strict")
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @fceilv2f64(<2 x double> %f) #0 {
|
||||
; SSE41-LABEL: fceilv2f64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: roundpd $10, %xmm0, %xmm0
|
||||
; SSE41-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX-LABEL: fceilv2f64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vroundpd $10, %xmm0, %xmm0
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
|
||||
<2 x double> %f, metadata !"fpexcept.strict")
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <4 x float> @ffloorv4f32(<4 x float> %f) #0 {
|
||||
; SSE41-LABEL: ffloorv4f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: roundps $9, %xmm0, %xmm0
|
||||
; SSE41-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX-LABEL: ffloorv4f32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vroundps $9, %xmm0, %xmm0
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%res = call <4 x float> @llvm.experimental.constrained.floor.v4f32(
|
||||
<4 x float> %f, metadata !"fpexcept.strict")
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @ffloorv2f64(<2 x double> %f) #0 {
|
||||
; SSE41-LABEL: ffloorv2f64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: roundpd $9, %xmm0, %xmm0
|
||||
; SSE41-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX-LABEL: ffloorv2f64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vroundpd $9, %xmm0, %xmm0
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%res = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
|
||||
<2 x double> %f, metadata !"fpexcept.strict")
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <4 x float> @ftruncv4f32(<4 x float> %f) #0 {
|
||||
; SSE41-LABEL: ftruncv4f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: roundps $11, %xmm0, %xmm0
|
||||
; SSE41-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX-LABEL: ftruncv4f32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vroundps $11, %xmm0, %xmm0
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(
|
||||
<4 x float> %f, metadata !"fpexcept.strict")
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @ftruncv2f64(<2 x double> %f) #0 {
|
||||
; SSE41-LABEL: ftruncv2f64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
|
||||
; SSE41-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX-LABEL: ftruncv2f64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vroundpd $11, %xmm0, %xmm0
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
|
||||
<2 x double> %f, metadata !"fpexcept.strict")
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <4 x float> @frintv4f32(<4 x float> %f) #0 {
|
||||
; SSE41-LABEL: frintv4f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: roundps $4, %xmm0, %xmm0
|
||||
; SSE41-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX-LABEL: frintv4f32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vroundps $4, %xmm0, %xmm0
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%res = call <4 x float> @llvm.experimental.constrained.rint.v4f32(
|
||||
<4 x float> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @frintv2f64(<2 x double> %f) #0 {
|
||||
; SSE41-LABEL: frintv2f64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: roundpd $4, %xmm0, %xmm0
|
||||
; SSE41-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX-LABEL: frintv2f64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vroundpd $4, %xmm0, %xmm0
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%res = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
|
||||
<2 x double> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <4 x float> @fnearbyintv4f32(<4 x float> %f) #0 {
|
||||
; SSE41-LABEL: fnearbyintv4f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: roundps $12, %xmm0, %xmm0
|
||||
; SSE41-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX-LABEL: fnearbyintv4f32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vroundps $12, %xmm0, %xmm0
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(
|
||||
<4 x float> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @fnearbyintv2f64(<2 x double> %f) #0 {
|
||||
; SSE41-LABEL: fnearbyintv2f64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: roundpd $12, %xmm0, %xmm0
|
||||
; SSE41-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX-LABEL: fnearbyintv2f64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vroundpd $12, %xmm0, %xmm0
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
|
||||
<2 x double> %f,
|
||||
metadata !"round.dynamic", metadata !"fpexcept.strict")
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
Loading…
Reference in New Issue