forked from OSchip/llvm-project
[X86][FastISel] Support EVEX version of sqrt.
llvm-svn: 336939
This commit is contained in:
parent
1880a3f0d8
commit
57c4585bab
|
@ -2799,17 +2799,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
|
|||
// Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
|
||||
// is not generated by FastISel yet.
|
||||
// FIXME: Update this code once tablegen can handle it.
|
||||
static const uint16_t SqrtOpc[2][2] = {
|
||||
{X86::SQRTSSr, X86::VSQRTSSr},
|
||||
{X86::SQRTSDr, X86::VSQRTSDr}
|
||||
static const uint16_t SqrtOpc[3][2] = {
|
||||
{ X86::SQRTSSr, X86::SQRTSDr },
|
||||
{ X86::VSQRTSSr, X86::VSQRTSDr },
|
||||
{ X86::VSQRTSSZr, X86::VSQRTSDZr },
|
||||
};
|
||||
bool HasAVX = Subtarget->hasAVX();
|
||||
unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
|
||||
Subtarget->hasAVX() ? 1 :
|
||||
0;
|
||||
unsigned Opc;
|
||||
const TargetRegisterClass *RC;
|
||||
switch (VT.SimpleTy) {
|
||||
default: return false;
|
||||
case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
|
||||
case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
|
||||
case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
|
||||
case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
|
||||
}
|
||||
|
||||
const Value *SrcVal = II->getArgOperand(0);
|
||||
|
@ -2818,8 +2820,9 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
|
|||
if (SrcReg == 0)
|
||||
return false;
|
||||
|
||||
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
|
||||
unsigned ImplicitDefReg = 0;
|
||||
if (HasAVX) {
|
||||
if (AVXLevel > 0) {
|
||||
ImplicitDefReg = createResultReg(RC);
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
|
||||
|
|
|
@ -2614,10 +2614,15 @@ define float @test_mm_sqrt_ss_scalar(float %a0) {
|
|||
; X64-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0]
|
||||
; X64-SSE-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX-LABEL: test_mm_sqrt_ss_scalar:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0]
|
||||
; X64-AVX-NEXT: retq # encoding: [0xc3]
|
||||
; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar:
|
||||
; X64-AVX1: # %bb.0:
|
||||
; X64-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0]
|
||||
; X64-AVX1-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar:
|
||||
; X64-AVX512: # %bb.0:
|
||||
; X64-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
|
||||
; X64-AVX512-NEXT: retq # encoding: [0xc3]
|
||||
%sqrt = call float @llvm.sqrt.f32(float %a0)
|
||||
ret float %sqrt
|
||||
}
|
||||
|
|
|
@ -4959,10 +4959,15 @@ define double @test_mm_sqrt_sd_scalar(double %a0) nounwind {
|
|||
; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
|
||||
; X64-SSE-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX-LABEL: test_mm_sqrt_sd_scalar:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
|
||||
; X64-AVX-NEXT: retq # encoding: [0xc3]
|
||||
; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar:
|
||||
; X64-AVX1: # %bb.0:
|
||||
; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
|
||||
; X64-AVX1-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar:
|
||||
; X64-AVX512: # %bb.0:
|
||||
; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
|
||||
; X64-AVX512-NEXT: retq # encoding: [0xc3]
|
||||
%sqrt = call double @llvm.sqrt.f64(double %a0)
|
||||
ret double %sqrt
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue