forked from OSchip/llvm-project
[X86] Support fp128 and/or/xor/load/store with VEX and EVEX encoded instructions.
Move all the patterns to X86InstrVecCompiler.td so we can keep SSE/AVX/AVX512 all in one place. To save some patterns we'll use an existing DAG combine to convert f128 fand/for/fxor to integer when sse2 is enabled. This allows use to reuse all the existing patterns for v2i64. I believe this now makes SHA instructions the only case where VEX/EVEX and legacy encoded instructions could be generated simultaneously. llvm-svn: 338821
This commit is contained in:
parent
58d837d347
commit
e902b7d0b0
|
@ -613,7 +613,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
// Long double always uses X87, except f128 in MMX.
|
||||
if (UseX87) {
|
||||
if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
|
||||
addRegisterClass(MVT::f128, &X86::VR128RegClass);
|
||||
addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
|
||||
: &X86::VR128RegClass);
|
||||
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
|
||||
setOperationAction(ISD::FABS , MVT::f128, Custom);
|
||||
setOperationAction(ISD::FNEG , MVT::f128, Custom);
|
||||
|
@ -36981,7 +36982,7 @@ static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
|
|||
const X86Subtarget &Subtarget) {
|
||||
MVT VT = N->getSimpleValueType(0);
|
||||
// If we have integer vector types available, use the integer opcodes.
|
||||
if (VT.isVector() && Subtarget.hasSSE2()) {
|
||||
if ((VT.isVector() || VT == MVT::f128) && Subtarget.hasSSE2()) {
|
||||
SDLoc dl(N);
|
||||
|
||||
MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
|
||||
|
|
|
@ -8131,51 +8131,6 @@ let Predicates = [UseAVX2] in {
|
|||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Extra selection patterns for f128, f128mem
|
||||
|
||||
// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
|
||||
def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
|
||||
(MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 VR128:$src), VR128))>;
|
||||
def : Pat<(store (f128 VR128:$src), addr:$dst),
|
||||
(MOVUPSmr addr:$dst, (COPY_TO_REGCLASS (f128 VR128:$src), VR128))>;
|
||||
|
||||
def : Pat<(alignedloadf128 addr:$src),
|
||||
(COPY_TO_REGCLASS (MOVAPSrm addr:$src), VR128)>;
|
||||
def : Pat<(loadf128 addr:$src),
|
||||
(COPY_TO_REGCLASS (MOVUPSrm addr:$src), VR128)>;
|
||||
|
||||
// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
|
||||
def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
|
||||
(COPY_TO_REGCLASS
|
||||
(ANDPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
|
||||
VR128)>;
|
||||
|
||||
def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
|
||||
(COPY_TO_REGCLASS
|
||||
(ANDPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
|
||||
(COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
|
||||
|
||||
def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
|
||||
(COPY_TO_REGCLASS
|
||||
(ORPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
|
||||
VR128)>;
|
||||
|
||||
def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
|
||||
(COPY_TO_REGCLASS
|
||||
(ORPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
|
||||
(COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
|
||||
|
||||
def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
|
||||
(COPY_TO_REGCLASS
|
||||
(XORPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
|
||||
VR128)>;
|
||||
|
||||
def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
|
||||
(COPY_TO_REGCLASS
|
||||
(XORPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
|
||||
(COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFNI instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -49,6 +49,19 @@ def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
|
|||
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
|
||||
def : Pat<(f128 (bitconvert (v2i64 VR128:$src))), (f128 VR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v4i32 VR128:$src))), (f128 VR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v8i16 VR128:$src))), (f128 VR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v16i8 VR128:$src))), (f128 VR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v2f64 VR128:$src))), (f128 VR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v4f32 VR128:$src))), (f128 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (f128 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (f128 VR128:$src))), (v4i32 VR128:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (f128 VR128:$src))), (v8i16 VR128:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (f128 VR128:$src))), (v16i8 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (f128 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (f128 VR128:$src))), (v4f32 VR128:$src)>;
|
||||
|
||||
// Bitcasts between 256-bit vector types. Return the original type since
|
||||
// no instruction is needed for the conversion
|
||||
def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
|
||||
|
@ -509,3 +522,68 @@ let Predicates = [HasBWI, HasVLX] in {
|
|||
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
|
||||
(i8 60)), (i8 60))>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Extra selection patterns for f128, f128mem
|
||||
|
||||
// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
|
||||
let Predicates = [NoAVX] in {
|
||||
def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
|
||||
(MOVAPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(store (f128 VR128:$src), addr:$dst),
|
||||
(MOVUPSmr addr:$dst, VR128:$src)>;
|
||||
|
||||
def : Pat<(alignedloadf128 addr:$src),
|
||||
(MOVAPSrm addr:$src)>;
|
||||
def : Pat<(loadf128 addr:$src),
|
||||
(MOVUPSrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
|
||||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(store (f128 VR128:$src), addr:$dst),
|
||||
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
||||
|
||||
def : Pat<(alignedloadf128 addr:$src),
|
||||
(VMOVAPSrm addr:$src)>;
|
||||
def : Pat<(loadf128 addr:$src),
|
||||
(VMOVUPSrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
def : Pat<(alignedstore (f128 VR128X:$src), addr:$dst),
|
||||
(VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
|
||||
def : Pat<(store (f128 VR128X:$src), addr:$dst),
|
||||
(VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
|
||||
|
||||
def : Pat<(alignedloadf128 addr:$src),
|
||||
(VMOVAPSZ128rm addr:$src)>;
|
||||
def : Pat<(loadf128 addr:$src),
|
||||
(VMOVUPSZ128rm addr:$src)>;
|
||||
}
|
||||
|
||||
// With SSE2 the DAG combiner converts fp logic ops to integer logic ops to
|
||||
// reduce patterns.
|
||||
let Predicates = [UseSSE1] in {
|
||||
// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
|
||||
def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
|
||||
(ANDPSrm VR128:$src1, f128mem:$src2)>;
|
||||
|
||||
def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
|
||||
(ANDPSrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
|
||||
(ORPSrm VR128:$src1, f128mem:$src2)>;
|
||||
|
||||
def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
|
||||
(ORPSrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
|
||||
(XORPSrm VR128:$src1, f128mem:$src2)>;
|
||||
|
||||
def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
|
||||
(XORPSrr VR128:$src1, VR128:$src2)>;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx -enable-legalize-types-checking | FileCheck %s
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx -enable-legalize-types-checking | FileCheck %s
|
||||
; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,SSE
|
||||
; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,SSE
|
||||
; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx,avx2 -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
|
||||
; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+mmx,avx2 -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
|
||||
; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx,avx512vl -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
|
||||
; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+mmx,avx512vl -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
|
||||
|
||||
; These tests were generated from simplified libm C code.
|
||||
; When compiled for the x86_64-linux-android target,
|
||||
|
@ -42,19 +46,33 @@
|
|||
; foo(w);
|
||||
; }
|
||||
define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
|
||||
; CHECK-LABEL: TestUnionLD1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; CHECK-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF
|
||||
; CHECK-NEXT: andq %rdi, %rcx
|
||||
; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
|
||||
; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
|
||||
; CHECK-NEXT: orq %rcx, %rdx
|
||||
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
|
||||
; CHECK-NEXT: jmp foo # TAILCALL
|
||||
; SSE-LABEL: TestUnionLD1:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF
|
||||
; SSE-NEXT: andq %rdi, %rcx
|
||||
; SSE-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
|
||||
; SSE-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
|
||||
; SSE-NEXT: orq %rcx, %rdx
|
||||
; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
|
||||
; SSE-NEXT: jmp foo # TAILCALL
|
||||
;
|
||||
; AVX-LABEL: TestUnionLD1:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF
|
||||
; AVX-NEXT: andq %rdi, %rcx
|
||||
; AVX-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
|
||||
; AVX-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
|
||||
; AVX-NEXT: orq %rcx, %rdx
|
||||
; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX-NEXT: jmp foo # TAILCALL
|
||||
entry:
|
||||
%0 = bitcast fp128 %s to i128
|
||||
%1 = zext i64 %n to i128
|
||||
|
@ -77,14 +95,23 @@ entry:
|
|||
; return w;
|
||||
; }
|
||||
define fp128 @TestUnionLD2(fp128 %s) #0 {
|
||||
; CHECK-LABEL: TestUnionLD2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
; SSE-LABEL: TestUnionLD2:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: TestUnionLD2:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq $0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast fp128 %s to i128
|
||||
%bf.clear = and i128 %0, -18446744073709551616
|
||||
|
@ -101,25 +128,45 @@ entry:
|
|||
; return (z.e < 0.1L) ? 1.0L : 2.0L;
|
||||
; }
|
||||
define fp128 @TestI128_1(fp128 %x) #0 {
|
||||
; CHECK-LABEL: TestI128_1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: subq $40, %rsp
|
||||
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; CHECK-NEXT: andq {{[0-9]+}}(%rsp), %rax
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rcx, (%rsp)
|
||||
; CHECK-NEXT: movaps (%rsp), %xmm0
|
||||
; CHECK-NEXT: movaps {{.*}}(%rip), %xmm1
|
||||
; CHECK-NEXT: callq __lttf2
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: testl %eax, %eax
|
||||
; CHECK-NEXT: sets %cl
|
||||
; CHECK-NEXT: shlq $4, %rcx
|
||||
; CHECK-NEXT: movaps {{\.LCPI.*}}(%rcx), %xmm0
|
||||
; CHECK-NEXT: addq $40, %rsp
|
||||
; CHECK-NEXT: retq
|
||||
; SSE-LABEL: TestI128_1:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: subq $40, %rsp
|
||||
; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; SSE-NEXT: andq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %rcx, (%rsp)
|
||||
; SSE-NEXT: movaps (%rsp), %xmm0
|
||||
; SSE-NEXT: movaps {{.*}}(%rip), %xmm1
|
||||
; SSE-NEXT: callq __lttf2
|
||||
; SSE-NEXT: xorl %ecx, %ecx
|
||||
; SSE-NEXT: testl %eax, %eax
|
||||
; SSE-NEXT: sets %cl
|
||||
; SSE-NEXT: shlq $4, %rcx
|
||||
; SSE-NEXT: movaps {{\.LCPI.*}}(%rcx), %xmm0
|
||||
; SSE-NEXT: addq $40, %rsp
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: TestI128_1:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: subq $40, %rsp
|
||||
; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; AVX-NEXT: andq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq %rcx, (%rsp)
|
||||
; AVX-NEXT: vmovaps (%rsp), %xmm0
|
||||
; AVX-NEXT: vmovaps {{.*}}(%rip), %xmm1
|
||||
; AVX-NEXT: callq __lttf2
|
||||
; AVX-NEXT: xorl %ecx, %ecx
|
||||
; AVX-NEXT: testl %eax, %eax
|
||||
; AVX-NEXT: sets %cl
|
||||
; AVX-NEXT: shlq $4, %rcx
|
||||
; AVX-NEXT: vmovaps {{\.LCPI.*}}(%rcx), %xmm0
|
||||
; AVX-NEXT: addq $40, %rsp
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%bf.clear = and i128 %0, 170141183460469231731687303715884105727
|
||||
|
@ -139,15 +186,25 @@ entry:
|
|||
; return (hx & 0x8000) == 0 ? x : y;
|
||||
; }
|
||||
define fp128 @TestI128_2(fp128 %x, fp128 %y) #0 {
|
||||
; CHECK-LABEL: TestI128_2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: jns .LBB3_2
|
||||
; CHECK-NEXT: # %bb.1: # %entry
|
||||
; CHECK-NEXT: movaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: .LBB3_2: # %entry
|
||||
; CHECK-NEXT: retq
|
||||
; SSE-LABEL: TestI128_2:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: jns .LBB3_2
|
||||
; SSE-NEXT: # %bb.1: # %entry
|
||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE-NEXT: .LBB3_2: # %entry
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: TestI128_2:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: jns .LBB3_2
|
||||
; AVX-NEXT: # %bb.1: # %entry
|
||||
; AVX-NEXT: vmovaps %xmm1, %xmm0
|
||||
; AVX-NEXT: .LBB3_2: # %entry
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%cmp = icmp sgt i128 %0, -1
|
||||
|
@ -167,32 +224,59 @@ entry:
|
|||
; return (u.e);
|
||||
; }
|
||||
define fp128 @TestI128_3(fp128 %x, i32* nocapture readnone %ex) #0 {
|
||||
; CHECK-LABEL: TestI128_3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: subq $56, %rsp
|
||||
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; CHECK-NEXT: movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000
|
||||
; CHECK-NEXT: testq %rcx, %rax
|
||||
; CHECK-NEXT: je .LBB4_2
|
||||
; CHECK-NEXT: # %bb.1:
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; CHECK-NEXT: jmp .LBB4_3
|
||||
; CHECK-NEXT: .LBB4_2: # %if.then
|
||||
; CHECK-NEXT: movaps {{.*}}(%rip), %xmm1
|
||||
; CHECK-NEXT: callq __multf3
|
||||
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; CHECK-NEXT: movabsq $-9223090561878065153, %rdx # imm = 0x8000FFFFFFFFFFFF
|
||||
; CHECK-NEXT: andq {{[0-9]+}}(%rsp), %rdx
|
||||
; CHECK-NEXT: movabsq $4611123068473966592, %rax # imm = 0x3FFE000000000000
|
||||
; CHECK-NEXT: orq %rdx, %rax
|
||||
; CHECK-NEXT: .LBB4_3: # %if.end
|
||||
; CHECK-NEXT: movq %rcx, (%rsp)
|
||||
; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movaps (%rsp), %xmm0
|
||||
; CHECK-NEXT: addq $56, %rsp
|
||||
; CHECK-NEXT: retq
|
||||
; SSE-LABEL: TestI128_3:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: subq $56, %rsp
|
||||
; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000
|
||||
; SSE-NEXT: testq %rcx, %rax
|
||||
; SSE-NEXT: je .LBB4_2
|
||||
; SSE-NEXT: # %bb.1:
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE-NEXT: jmp .LBB4_3
|
||||
; SSE-NEXT: .LBB4_2: # %if.then
|
||||
; SSE-NEXT: movaps {{.*}}(%rip), %xmm1
|
||||
; SSE-NEXT: callq __multf3
|
||||
; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE-NEXT: movabsq $-9223090561878065153, %rdx # imm = 0x8000FFFFFFFFFFFF
|
||||
; SSE-NEXT: andq {{[0-9]+}}(%rsp), %rdx
|
||||
; SSE-NEXT: movabsq $4611123068473966592, %rax # imm = 0x3FFE000000000000
|
||||
; SSE-NEXT: orq %rdx, %rax
|
||||
; SSE-NEXT: .LBB4_3: # %if.end
|
||||
; SSE-NEXT: movq %rcx, (%rsp)
|
||||
; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movaps (%rsp), %xmm0
|
||||
; SSE-NEXT: addq $56, %rsp
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: TestI128_3:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: subq $56, %rsp
|
||||
; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX-NEXT: movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000
|
||||
; AVX-NEXT: testq %rcx, %rax
|
||||
; AVX-NEXT: je .LBB4_2
|
||||
; AVX-NEXT: # %bb.1:
|
||||
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX-NEXT: jmp .LBB4_3
|
||||
; AVX-NEXT: .LBB4_2: # %if.then
|
||||
; AVX-NEXT: vmovaps {{.*}}(%rip), %xmm1
|
||||
; AVX-NEXT: callq __multf3
|
||||
; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX-NEXT: movabsq $-9223090561878065153, %rdx # imm = 0x8000FFFFFFFFFFFF
|
||||
; AVX-NEXT: andq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX-NEXT: movabsq $4611123068473966592, %rax # imm = 0x3FFE000000000000
|
||||
; AVX-NEXT: orq %rdx, %rax
|
||||
; AVX-NEXT: .LBB4_3: # %if.end
|
||||
; AVX-NEXT: movq %rcx, (%rsp)
|
||||
; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: vmovaps (%rsp), %xmm0
|
||||
; AVX-NEXT: addq $56, %rsp
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%bf.cast = and i128 %0, 170135991163610696904058773219554885632
|
||||
|
@ -223,18 +307,31 @@ if.end: ; preds = %if.then, %entry
|
|||
; return x + df;
|
||||
; }
|
||||
define fp128 @TestI128_4(fp128 %x) #0 {
|
||||
; CHECK-LABEL: TestI128_4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: subq $40, %rsp
|
||||
; CHECK-NEXT: movaps %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq $0, (%rsp)
|
||||
; CHECK-NEXT: movaps (%rsp), %xmm0
|
||||
; CHECK-NEXT: callq __addtf3
|
||||
; CHECK-NEXT: addq $40, %rsp
|
||||
; CHECK-NEXT: retq
|
||||
; SSE-LABEL: TestI128_4:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: subq $40, %rsp
|
||||
; SSE-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq $0, (%rsp)
|
||||
; SSE-NEXT: movaps (%rsp), %xmm0
|
||||
; SSE-NEXT: callq __addtf3
|
||||
; SSE-NEXT: addq $40, %rsp
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: TestI128_4:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: subq $40, %rsp
|
||||
; AVX-NEXT: vmovaps %xmm0, %xmm1
|
||||
; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq $0, (%rsp)
|
||||
; AVX-NEXT: vmovaps (%rsp), %xmm0
|
||||
; AVX-NEXT: callq __addtf3
|
||||
; AVX-NEXT: addq $40, %rsp
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%bf.clear = and i128 %0, -18446744073709551616
|
||||
|
@ -271,18 +368,31 @@ entry:
|
|||
}
|
||||
|
||||
define fp128 @acosl(fp128 %x) #0 {
|
||||
; CHECK-LABEL: acosl:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: subq $40, %rsp
|
||||
; CHECK-NEXT: movaps %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq $0, (%rsp)
|
||||
; CHECK-NEXT: movaps (%rsp), %xmm0
|
||||
; CHECK-NEXT: callq __addtf3
|
||||
; CHECK-NEXT: addq $40, %rsp
|
||||
; CHECK-NEXT: retq
|
||||
; SSE-LABEL: acosl:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: subq $40, %rsp
|
||||
; SSE-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq $0, (%rsp)
|
||||
; SSE-NEXT: movaps (%rsp), %xmm0
|
||||
; SSE-NEXT: callq __addtf3
|
||||
; SSE-NEXT: addq $40, %rsp
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: acosl:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: subq $40, %rsp
|
||||
; AVX-NEXT: vmovaps %xmm0, %xmm1
|
||||
; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq $0, (%rsp)
|
||||
; AVX-NEXT: vmovaps (%rsp), %xmm0
|
||||
; AVX-NEXT: callq __addtf3
|
||||
; AVX-NEXT: addq $40, %rsp
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%bf.clear = and i128 %0, -18446744073709551616
|
||||
|
@ -293,15 +403,25 @@ entry:
|
|||
|
||||
; Compare i128 values and check i128 constants.
|
||||
define fp128 @TestComp(fp128 %x, fp128 %y) #0 {
|
||||
; CHECK-LABEL: TestComp:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: jns .LBB8_2
|
||||
; CHECK-NEXT: # %bb.1: # %entry
|
||||
; CHECK-NEXT: movaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: .LBB8_2: # %entry
|
||||
; CHECK-NEXT: retq
|
||||
; SSE-LABEL: TestComp:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: jns .LBB8_2
|
||||
; SSE-NEXT: # %bb.1: # %entry
|
||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE-NEXT: .LBB8_2: # %entry
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: TestComp:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: jns .LBB8_2
|
||||
; AVX-NEXT: # %bb.1: # %entry
|
||||
; AVX-NEXT: vmovaps %xmm1, %xmm0
|
||||
; AVX-NEXT: .LBB8_2: # %entry
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%cmp = icmp sgt i128 %0, -1
|
||||
|
@ -313,10 +433,15 @@ declare void @foo(fp128) #1
|
|||
|
||||
; Test logical operations on fp128 values.
|
||||
define fp128 @TestFABS_LD(fp128 %x) #0 {
|
||||
; CHECK-LABEL: TestFABS_LD:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
; SSE-LABEL: TestFABS_LD:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: TestFABS_LD:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%call = tail call fp128 @fabsl(fp128 %x) #2
|
||||
ret fp128 %call
|
||||
|
@ -328,43 +453,79 @@ declare fp128 @copysignl(fp128, fp128) #1
|
|||
|
||||
; Test more complicated logical operations generated from copysignl.
|
||||
define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, { fp128, fp128 }* byval nocapture readonly align 16 %z) #0 {
|
||||
; CHECK-LABEL: TestCopySign:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: pushq %rbp
|
||||
; CHECK-NEXT: pushq %rbx
|
||||
; CHECK-NEXT: subq $40, %rsp
|
||||
; CHECK-NEXT: movq %rdi, %rbx
|
||||
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
|
||||
; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
||||
; CHECK-NEXT: callq __gttf2
|
||||
; CHECK-NEXT: movl %eax, %ebp
|
||||
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
||||
; CHECK-NEXT: movaps %xmm0, %xmm1
|
||||
; CHECK-NEXT: callq __subtf3
|
||||
; CHECK-NEXT: testl %ebp, %ebp
|
||||
; CHECK-NEXT: jle .LBB10_1
|
||||
; CHECK-NEXT: # %bb.2: # %if.then
|
||||
; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: movaps %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
||||
; CHECK-NEXT: movaps %xmm1, %xmm2
|
||||
; CHECK-NEXT: jmp .LBB10_3
|
||||
; CHECK-NEXT: .LBB10_1:
|
||||
; CHECK-NEXT: movaps (%rsp), %xmm2 # 16-byte Reload
|
||||
; CHECK-NEXT: .LBB10_3: # %cleanup
|
||||
; CHECK-NEXT: movaps {{.*}}(%rip), %xmm1
|
||||
; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: andps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
||||
; CHECK-NEXT: orps %xmm1, %xmm0
|
||||
; CHECK-NEXT: movaps %xmm2, (%rbx)
|
||||
; CHECK-NEXT: movaps %xmm0, 16(%rbx)
|
||||
; CHECK-NEXT: movq %rbx, %rax
|
||||
; CHECK-NEXT: addq $40, %rsp
|
||||
; CHECK-NEXT: popq %rbx
|
||||
; CHECK-NEXT: popq %rbp
|
||||
; CHECK-NEXT: retq
|
||||
; SSE-LABEL: TestCopySign:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: pushq %rbp
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: subq $40, %rsp
|
||||
; SSE-NEXT: movq %rdi, %rbx
|
||||
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
|
||||
; SSE-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
||||
; SSE-NEXT: callq __gttf2
|
||||
; SSE-NEXT: movl %eax, %ebp
|
||||
; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
||||
; SSE-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE-NEXT: callq __subtf3
|
||||
; SSE-NEXT: testl %ebp, %ebp
|
||||
; SSE-NEXT: jle .LBB10_1
|
||||
; SSE-NEXT: # %bb.2: # %if.then
|
||||
; SSE-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE-NEXT: andps {{.*}}(%rip), %xmm1
|
||||
; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
||||
; SSE-NEXT: jmp .LBB10_3
|
||||
; SSE-NEXT: .LBB10_1:
|
||||
; SSE-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
||||
; SSE-NEXT: .LBB10_3: # %cleanup
|
||||
; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
|
||||
; SSE-NEXT: andps {{.*}}(%rip), %xmm2
|
||||
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: orps %xmm2, %xmm0
|
||||
; SSE-NEXT: movaps %xmm1, (%rbx)
|
||||
; SSE-NEXT: movaps %xmm0, 16(%rbx)
|
||||
; SSE-NEXT: movq %rbx, %rax
|
||||
; SSE-NEXT: addq $40, %rsp
|
||||
; SSE-NEXT: popq %rbx
|
||||
; SSE-NEXT: popq %rbp
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: TestCopySign:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: pushq %rbp
|
||||
; AVX-NEXT: pushq %rbx
|
||||
; AVX-NEXT: subq $40, %rsp
|
||||
; AVX-NEXT: movq %rdi, %rbx
|
||||
; AVX-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm1
|
||||
; AVX-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
||||
; AVX-NEXT: callq __gttf2
|
||||
; AVX-NEXT: movl %eax, %ebp
|
||||
; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
||||
; AVX-NEXT: vmovaps %xmm0, %xmm1
|
||||
; AVX-NEXT: callq __subtf3
|
||||
; AVX-NEXT: testl %ebp, %ebp
|
||||
; AVX-NEXT: jle .LBB10_1
|
||||
; AVX-NEXT: # %bb.2: # %if.then
|
||||
; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
|
||||
; AVX-NEXT: vmovaps %xmm1, %xmm2
|
||||
; AVX-NEXT: jmp .LBB10_3
|
||||
; AVX-NEXT: .LBB10_1:
|
||||
; AVX-NEXT: vmovaps (%rsp), %xmm2 # 16-byte Reload
|
||||
; AVX-NEXT: .LBB10_3: # %cleanup
|
||||
; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
||||
; AVX-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovaps %xmm2, (%rbx)
|
||||
; AVX-NEXT: vmovaps %xmm0, 16(%rbx)
|
||||
; AVX-NEXT: movq %rbx, %rax
|
||||
; AVX-NEXT: addq $40, %rsp
|
||||
; AVX-NEXT: popq %rbx
|
||||
; AVX-NEXT: popq %rbp
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%z.realp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 0
|
||||
%z.real = load fp128, fp128* %z.realp, align 16
|
||||
|
|
Loading…
Reference in New Issue