add strict float for round operation

Differential Revision: https://reviews.llvm.org/D72026
This commit is contained in:
Liu, Chen3 2019-12-31 11:38:17 +08:00
parent d2bb8c16e7
commit 8af492ade1
10 changed files with 956 additions and 41 deletions

View File

@ -897,27 +897,50 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
continue;
}
case ISD::FCEIL:
case ISD::STRICT_FCEIL:
case ISD::FFLOOR:
case ISD::STRICT_FFLOOR:
case ISD::FTRUNC:
case ISD::STRICT_FTRUNC:
case ISD::FNEARBYINT:
case ISD::FRINT: {
case ISD::STRICT_FNEARBYINT:
case ISD::FRINT:
case ISD::STRICT_FRINT: {
// Replace fp rounding with their X86 specific equivalent so we don't
// need 2 sets of patterns.
unsigned Imm;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
case ISD::STRICT_FCEIL:
case ISD::FCEIL: Imm = 0xA; break;
case ISD::STRICT_FFLOOR:
case ISD::FFLOOR: Imm = 0x9; break;
case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: Imm = 0xB; break;
case ISD::STRICT_FNEARBYINT:
case ISD::FNEARBYINT: Imm = 0xC; break;
case ISD::STRICT_FRINT:
case ISD::FRINT: Imm = 0x4; break;
}
SDLoc dl(N);
SDValue Res = CurDAG->getNode(
X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0),
CurDAG->getTargetConstant(Imm, dl, MVT::i8));
bool IsStrict = N->isStrictFPOpcode();
SDValue Res;
if (IsStrict)
Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl,
{N->getValueType(0), MVT::Other},
{N->getOperand(0), N->getOperand(1),
CurDAG->getTargetConstant(Imm, dl, MVT::i8)});
else
Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0),
N->getOperand(0),
CurDAG->getTargetConstant(Imm, dl, MVT::i8));
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
if (IsStrict) {
SDValue From[] = {SDValue(N, 0), SDValue(N, 1)};
SDValue To[] = {Res.getValue(0), Res.getValue(1)};
CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
} else
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I;
CurDAG->DeleteNode(N);
continue;

View File

@ -1068,11 +1068,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
setOperationAction(ISD::FCEIL, RoundedTy, Legal);
setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
setOperationAction(ISD::FRINT, RoundedTy, Legal);
setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
setOperationAction(ISD::FCEIL, RoundedTy, Legal);
setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
setOperationAction(ISD::FRINT, RoundedTy, Legal);
setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
}
setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
@ -1144,14 +1149,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
: &X86::VR256RegClass);
for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
setOperationAction(ISD::FRINT, VT, Legal);
setOperationAction(ISD::STRICT_FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
@ -1503,11 +1513,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
setOperationAction(ISD::FRINT, VT, Legal);
setOperationAction(ISD::STRICT_FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
setOperationAction(ISD::SELECT, VT, Custom);
}
@ -29650,6 +29665,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H";
case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L";
case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
case X86ISD::STRICT_VRNDSCALE: return "X86ISD::STRICT_VRNDSCALE";
case X86ISD::VRNDSCALE_SAE: return "X86ISD::VRNDSCALE_SAE";
case X86ISD::VRNDSCALES: return "X86ISD::VRNDSCALES";
case X86ISD::VRNDSCALES_SAE: return "X86ISD::VRNDSCALES_SAE";

View File

@ -424,7 +424,7 @@ namespace llvm {
// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
// Also used by the legacy (V)ROUND intrinsics where we mask out the
// scaling part of the immediate.
VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE, STRICT_VRNDSCALE,
// Tests Types Of a FP Values for packed types.
VFPCLASS,
// Tests Types Of a FP Values for scalar types.

View File

@ -9019,13 +9019,13 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
}
let Predicates = [HasAVX512] in {
def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2),
def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src1, timm:$src2))>;
}
let Predicates = [HasAVX512, OptForSize] in {
def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
addr:$src1, timm:$src2))>;
}
@ -10290,7 +10290,7 @@ defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56
X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, EVEX;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,

View File

@ -466,6 +466,12 @@ def X86VRangeSAE : SDNode<"X86ISD::VRANGE_SAE", SDTFPBinOpImm>;
def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>;
def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>;
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>;
def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm,
[SDNPHasChain]>;
def X86any_VRndScale : PatFrags<(ops node:$src1, node:$src2),
[(X86strict_VRndScale node:$src1, node:$src2),
(X86VRndScale node:$src1, node:$src2)]>;
def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>;
def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImm>;
def X86VGetMantSAE : SDNode<"X86ISD::VGETMANT_SAE", SDTFPUnaryOpImm>;

View File

@ -5540,19 +5540,19 @@ let Predicates = [HasAVX, NoVLX] in {
let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in {
// Intrinsic form
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>,
loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>,
loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>,
VEX, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>,
loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>,
loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>,
VEX, VEX_L, VEX_WIG;
}
}
@ -5565,25 +5565,25 @@ let Predicates = [UseAVX] in {
}
let Predicates = [UseAVX] in {
def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>;
def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>;
}
let Predicates = [UseAVX, OptForSize] in {
def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
(VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
(VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
}
let ExeDomain = SSEPackedSingle in
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>;
memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>;
let ExeDomain = SSEPackedDouble in
defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>;
memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>;
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>;
@ -5592,16 +5592,16 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales>;
let Predicates = [UseSSE41] in {
def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
(ROUNDSSr FR32:$src1, timm:$src2)>;
def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
(ROUNDSDr FR64:$src1, timm:$src2)>;
}
let Predicates = [UseSSE41, OptForSize] in {
def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
(ROUNDSSm addr:$src1, timm:$src2)>;
def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
(ROUNDSDm addr:$src1, timm:$src2)>;
}

View File

@ -0,0 +1,474 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41,SSE41-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41,SSE41-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86,AVX512-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64,AVX512-X64
declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
declare float @llvm.experimental.constrained.floor.f32(float, metadata)
declare double @llvm.experimental.constrained.floor.f64(double, metadata)
declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
define float @fceil32(float %f) #0 {
; SSE41-X86-LABEL: fceil32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: roundss $10, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: fceil32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundss $10, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: fceil32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: fceil32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.ceil.f32(
float %f, metadata !"fpexcept.strict")
ret float %res
}
define double @fceilf64(double %f) #0 {
; SSE41-X86-LABEL: fceilf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
; SSE41-X86-NEXT: movl %esp, %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE41-X86-NEXT: andl $-8, %esp
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: roundsd $10, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: fceilf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundsd $10, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: fceilf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: .cfi_offset %ebp, -8
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: fceilf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.ceil.f64(
double %f, metadata !"fpexcept.strict")
ret double %res
}
define float @ffloor32(float %f) #0 {
; SSE41-X86-LABEL: ffloor32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: roundss $9, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: ffloor32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundss $9, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: ffloor32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: ffloor32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.floor.f32(
float %f, metadata !"fpexcept.strict")
ret float %res
}
define double @ffloorf64(double %f) #0 {
; SSE41-X86-LABEL: ffloorf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
; SSE41-X86-NEXT: movl %esp, %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE41-X86-NEXT: andl $-8, %esp
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: roundsd $9, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: ffloorf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundsd $9, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: ffloorf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: .cfi_offset %ebp, -8
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: ffloorf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.floor.f64(
double %f, metadata !"fpexcept.strict")
ret double %res
}
define float @ftrunc32(float %f) #0 {
; SSE41-X86-LABEL: ftrunc32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: roundss $11, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: ftrunc32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundss $11, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: ftrunc32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: ftrunc32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.trunc.f32(
float %f, metadata !"fpexcept.strict")
ret float %res
}
define double @ftruncf64(double %f) #0 {
; SSE41-X86-LABEL: ftruncf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
; SSE41-X86-NEXT: movl %esp, %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE41-X86-NEXT: andl $-8, %esp
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: roundsd $11, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: ftruncf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundsd $11, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: ftruncf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: .cfi_offset %ebp, -8
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: ftruncf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.trunc.f64(
double %f, metadata !"fpexcept.strict")
ret double %res
}
define float @frint32(float %f) #0 {
; SSE41-X86-LABEL: frint32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: roundss $4, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: frint32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundss $4, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: frint32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: frint32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.rint.f32(
float %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret float %res
}
define double @frintf64(double %f) #0 {
; SSE41-X86-LABEL: frintf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
; SSE41-X86-NEXT: movl %esp, %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE41-X86-NEXT: andl $-8, %esp
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: roundsd $4, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: frintf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundsd $4, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: frintf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: .cfi_offset %ebp, -8
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: frintf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.rint.f64(
double %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret double %res
}
define float @fnearbyint32(float %f) #0 {
; SSE41-X86-LABEL: fnearbyint32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: roundss $12, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: fnearbyint32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundss $12, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: fnearbyint32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: fnearbyint32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.nearbyint.f32(
float %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret float %res
}
define double @fnearbyintf64(double %f) #0 {
; SSE41-X86-LABEL: fnearbyintf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
; SSE41-X86-NEXT: movl %esp, %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE41-X86-NEXT: andl $-8, %esp
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: roundsd $12, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: fnearbyintf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundsd $12, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: fnearbyintf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: .cfi_offset %ebp, -8
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: fnearbyintf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.nearbyint.f64(
double %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret double %res
}
attributes #0 = { strictfp }

View File

@ -18,6 +18,16 @@ declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float
declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)
declare <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, metadata, metadata)
declare <8 x float> @llvm.experimental.constrained.ceil.v8f32(<8 x float>, metadata)
declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata)
declare <8 x float> @llvm.experimental.constrained.floor.v8f32(<8 x float>, metadata)
declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata)
declare <8 x float> @llvm.experimental.constrained.trunc.v8f32(<8 x float>, metadata)
declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata)
declare <8 x float> @llvm.experimental.constrained.rint.v8f32(<8 x float>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
declare <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 {
; CHECK-LABEL: f1:
@ -178,4 +188,111 @@ define <4 x double> @f14(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 {
ret <4 x double> %res
}
define <8 x float> @fceilv8f32(<8 x float> %f) #0 {
; CHECK-LABEL: fceilv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundps $10, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x float> @llvm.experimental.constrained.ceil.v8f32(
<8 x float> %f, metadata !"fpexcept.strict")
ret <8 x float> %res
}
define <4 x double> @fceilv4f64(<4 x double> %f) #0 {
; CHECK-LABEL: fceilv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundpd $10, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <4 x double> @llvm.experimental.constrained.ceil.v4f64(
<4 x double> %f, metadata !"fpexcept.strict")
ret <4 x double> %res
}
define <8 x float> @ffloorv8f32(<8 x float> %f) #0 {
; CHECK-LABEL: ffloorv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundps $9, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x float> @llvm.experimental.constrained.floor.v8f32(
<8 x float> %f, metadata !"fpexcept.strict")
ret <8 x float> %res
}
define <4 x double> @ffloorv4f64(<4 x double> %f) #0 {
; CHECK-LABEL: ffloorv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundpd $9, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <4 x double> @llvm.experimental.constrained.floor.v4f64(
<4 x double> %f, metadata !"fpexcept.strict")
ret <4 x double> %res
}
define <8 x float> @ftruncv8f32(<8 x float> %f) #0 {
; CHECK-LABEL: ftruncv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundps $11, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x float> @llvm.experimental.constrained.trunc.v8f32(
<8 x float> %f, metadata !"fpexcept.strict")
ret <8 x float> %res
}
define <4 x double> @ftruncv4f64(<4 x double> %f) #0 {
; CHECK-LABEL: ftruncv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <4 x double> @llvm.experimental.constrained.trunc.v4f64(
<4 x double> %f, metadata !"fpexcept.strict")
ret <4 x double> %res
}
define <8 x float> @frintv8f32(<8 x float> %f) #0 {
; CHECK-LABEL: frintv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundps $4, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x float> @llvm.experimental.constrained.rint.v8f32(
<8 x float> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x float> %res
}
define <4 x double> @frintv4f64(<4 x double> %f) #0 {
; CHECK-LABEL: frintv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundpd $4, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
<4 x double> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x double> %res
}
define <8 x float> @fnearbyintv8f32(<8 x float> %f) #0 {
; CHECK-LABEL: fnearbyintv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundps $12, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(
<8 x float> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x float> %res
}
define <4 x double> @fnearbyintv4f64(<4 x double> %f) #0 {
; CHECK-LABEL: fnearbyintv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundpd $12, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
<4 x double> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x double> %res
}
attributes #0 = { strictfp }

View File

@ -16,6 +16,17 @@ declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(<8 x float
declare <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double>, metadata, metadata)
declare <8 x double> @llvm.experimental.constrained.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, metadata, metadata)
declare <16 x float> @llvm.experimental.constrained.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, metadata, metadata)
declare <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float>, metadata)
declare <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double>, metadata)
declare <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float>, metadata)
declare <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double>, metadata)
declare <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float>, metadata)
declare <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double>, metadata)
declare <16 x float> @llvm.experimental.constrained.rint.v16f32(<16 x float>, metadata, metadata)
declare <8 x double> @llvm.experimental.constrained.rint.v8f64(<8 x double>, metadata, metadata)
declare <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float>, metadata, metadata)
declare <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double>, metadata, metadata)
define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 {
; CHECK-LABEL: f1:
@ -175,4 +186,98 @@ define <8 x double> @f14(<8 x double> %a, <8 x double> %b, <8 x double> %c) #0 {
ret <8 x double> %res
}
define <16 x float> @strict_vector_fceil_v16f32(<16 x float> %f) #0 {
; CHECK-LABEL: strict_vector_fceil_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float> %f, metadata !"fpexcept.strict")
ret <16 x float> %res
}
define <8 x double> @strict_vector_fceil_v8f64(<8 x double> %f) #0 {
; CHECK-LABEL: strict_vector_fceil_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double> %f, metadata !"fpexcept.strict")
ret <8 x double> %res
}
define <16 x float> @strict_vector_ffloor_v16f32(<16 x float> %f) #0 {
; CHECK-LABEL: strict_vector_ffloor_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float> %f, metadata !"fpexcept.strict")
ret <16 x float> %res
}
define <8 x double> @strict_vector_ffloor_v8f64(<8 x double> %f) #0 {
; CHECK-LABEL: strict_vector_ffloor_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double> %f, metadata !"fpexcept.strict")
ret <8 x double> %res
}
define <16 x float> @strict_vector_ftrunc_v16f32(<16 x float> %f) #0 {
; CHECK-LABEL: strict_vector_ftrunc_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float> %f, metadata !"fpexcept.strict")
ret <16 x float> %res
}
define <8 x double> @strict_vector_ftrunc_v8f64(<8 x double> %f) #0 {
; CHECK-LABEL: strict_vector_ftrunc_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double> %f, metadata !"fpexcept.strict")
ret <8 x double> %res
}
define <16 x float> @strict_vector_frint_v16f32(<16 x float> %f) #0 {
; CHECK-LABEL: strict_vector_frint_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <16 x float> @llvm.experimental.constrained.rint.v16f32(<16 x float> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x float> %res
}
define <8 x double> @strict_vector_frint_v8f64(<8 x double> %f) #0 {
; CHECK-LABEL: strict_vector_frint_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x double> @llvm.experimental.constrained.rint.v8f64(<8 x double> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x double> %res
}
define <16 x float> @strict_vector_fnearbyint_v16f32(<16 x float> %f) #0 {
; CHECK-LABEL: strict_vector_fnearbyint_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x float> %res
}
define <8 x double> @strict_vector_fnearbyint_v8f64(<8 x double> %f) #0 {
; CHECK-LABEL: strict_vector_fnearbyint_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x double> %res
}
attributes #0 = { strictfp }

View File

@ -0,0 +1,174 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata)
declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata)
declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata)
declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata)
declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata)
declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata)
declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
define <4 x float> @fceilv4f32(<4 x float> %f) #0 {
; SSE41-LABEL: fceilv4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: roundps $10, %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: fceilv4f32:
; AVX: # %bb.0:
; AVX-NEXT: vroundps $10, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(
<4 x float> %f, metadata !"fpexcept.strict")
ret <4 x float> %res
}
define <2 x double> @fceilv2f64(<2 x double> %f) #0 {
; SSE41-LABEL: fceilv2f64:
; SSE41: # %bb.0:
; SSE41-NEXT: roundpd $10, %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: fceilv2f64:
; AVX: # %bb.0:
; AVX-NEXT: vroundpd $10, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
<2 x double> %f, metadata !"fpexcept.strict")
ret <2 x double> %res
}
define <4 x float> @ffloorv4f32(<4 x float> %f) #0 {
; SSE41-LABEL: ffloorv4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: roundps $9, %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: ffloorv4f32:
; AVX: # %bb.0:
; AVX-NEXT: vroundps $9, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%res = call <4 x float> @llvm.experimental.constrained.floor.v4f32(
<4 x float> %f, metadata !"fpexcept.strict")
ret <4 x float> %res
}
define <2 x double> @ffloorv2f64(<2 x double> %f) #0 {
; SSE41-LABEL: ffloorv2f64:
; SSE41: # %bb.0:
; SSE41-NEXT: roundpd $9, %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: ffloorv2f64:
; AVX: # %bb.0:
; AVX-NEXT: vroundpd $9, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%res = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
<2 x double> %f, metadata !"fpexcept.strict")
ret <2 x double> %res
}
define <4 x float> @ftruncv4f32(<4 x float> %f) #0 {
; SSE41-LABEL: ftruncv4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: roundps $11, %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: ftruncv4f32:
; AVX: # %bb.0:
; AVX-NEXT: vroundps $11, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(
<4 x float> %f, metadata !"fpexcept.strict")
ret <4 x float> %res
}
define <2 x double> @ftruncv2f64(<2 x double> %f) #0 {
; SSE41-LABEL: ftruncv2f64:
; SSE41: # %bb.0:
; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: ftruncv2f64:
; AVX: # %bb.0:
; AVX-NEXT: vroundpd $11, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
<2 x double> %f, metadata !"fpexcept.strict")
ret <2 x double> %res
}
define <4 x float> @frintv4f32(<4 x float> %f) #0 {
; SSE41-LABEL: frintv4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: roundps $4, %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: frintv4f32:
; AVX: # %bb.0:
; AVX-NEXT: vroundps $4, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%res = call <4 x float> @llvm.experimental.constrained.rint.v4f32(
<4 x float> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x float> %res
}
define <2 x double> @frintv2f64(<2 x double> %f) #0 {
; SSE41-LABEL: frintv2f64:
; SSE41: # %bb.0:
; SSE41-NEXT: roundpd $4, %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: frintv2f64:
; AVX: # %bb.0:
; AVX-NEXT: vroundpd $4, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%res = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
<2 x double> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x double> %res
}
define <4 x float> @fnearbyintv4f32(<4 x float> %f) #0 {
; SSE41-LABEL: fnearbyintv4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: roundps $12, %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: fnearbyintv4f32:
; AVX: # %bb.0:
; AVX-NEXT: vroundps $12, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(
<4 x float> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x float> %res
}
define <2 x double> @fnearbyintv2f64(<2 x double> %f) #0 {
; SSE41-LABEL: fnearbyintv2f64:
; SSE41: # %bb.0:
; SSE41-NEXT: roundpd $12, %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: fnearbyintv2f64:
; AVX: # %bb.0:
; AVX-NEXT: vroundpd $12, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
<2 x double> %f,
metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x double> %res
}
attributes #0 = { strictfp }