forked from OSchip/llvm-project
[X86] Cleanup fma tests a little bit. NFC.
Reformat, isolate 213->231 xform, actually --check-prefix CHECK, and deduplicate the FMA intrinsic tests (FMA3 in AMD-land). llvm-svn: 240615
This commit is contained in:
parent
42aa00b34b
commit
cee6d1bb3c
|
@ -0,0 +1,204 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: fmaddsubpd_loop:
|
||||
; CHECK: vfmaddsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <4 x double> %c.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmsubaddpd_loop:
|
||||
; CHECK: vfmsubadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <4 x double> %c.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmaddpd_loop:
|
||||
; CHECK: vfmadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <4 x double> %c.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmsubpd_loop:
|
||||
; CHECK: vfmsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <4 x double> %c.addr.0
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
|
||||
; CHECK-LABEL: fmaddsubps_loop:
|
||||
; CHECK: vfmaddsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <8 x float> %c.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmsubaddps_loop:
|
||||
; CHECK: vfmsubadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <8 x float> %c.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmaddps_loop:
|
||||
; CHECK: vfmadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <8 x float> %c.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmsubps_loop:
|
||||
; CHECK: vfmsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <8 x float> %c.addr.0
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
|
@ -0,0 +1,493 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
|
||||
|
||||
; VFMADD
|
||||
define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ss:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_sd:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMSUB
|
||||
define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ss:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ss:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_sd:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFNMADD
|
||||
define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ss:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ss:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_sd:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFNMSUB
|
||||
define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ss:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ss:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_sd:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMADDSUB
|
||||
define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMSUBADD
|
||||
define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256:
|
||||
; CHECK-FMA: # BB#0:
|
||||
; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0
|
||||
; CHECK-FMA-NEXT: retq
|
||||
;
|
||||
; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd_256:
|
||||
; CHECK-FMA4: # BB#0:
|
||||
; CHECK-FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -1,278 +0,0 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK-FMA --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s --check-prefix=CHECK-FMA4 --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --check-prefix=CHECK-FMA4 --check-prefix=CHECK
|
||||
|
||||
; VFMADD
|
||||
define < 4 x float > @test_x86_fma_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK-FMA4: vfmaddss
|
||||
; CHECK-FMA: vfmadd213ss
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK-FMA4: vfmaddsd
|
||||
; CHECK-FMA: vfmadd213sd
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 4 x float > @test_x86_fma_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK-FMA4: vfmaddps
|
||||
; CHECK-FMA: vfmadd213ps
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK-FMA4: vfmaddpd
|
||||
; CHECK-FMA: vfmadd213pd
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK-FMA4: vfmaddps
|
||||
; CHECK-FMA: vfmadd213ps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK-FMA4: vfmaddpd
|
||||
; CHECK-FMA: vfmadd213pd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
||||
|
||||
; VFMSUB
|
||||
define < 4 x float > @test_x86_fma_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK-FMA4: vfmsubss
|
||||
; CHECK-FMA: vfmsub213ss
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK-FMA4: vfmsubsd
|
||||
; CHECK-FMA: vfmsub213sd
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 4 x float > @test_x86_fma_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK-FMA4: vfmsubps
|
||||
; CHECK-FMA: vfmsub213ps
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK-FMA4: vfmsubpd
|
||||
; CHECK-FMA: vfmsub213pd
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK-FMA4: vfmsubps
|
||||
; CHECK-FMA: vfmsub213ps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK-FMA4: vfmsubpd
|
||||
; CHECK-FMA: vfmsub213pd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
||||
|
||||
; VFNMADD
|
||||
define < 4 x float > @test_x86_fma_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK-FMA4: vfnmaddss
|
||||
; CHECK-FMA: vfnmadd213ss
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK-FMA4: vfnmaddsd
|
||||
; CHECK-FMA: vfnmadd213sd
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 4 x float > @test_x86_fma_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK-FMA4: vfnmaddps
|
||||
; CHECK-FMA: vfnmadd213ps
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK-FMA4: vfnmaddpd
|
||||
; CHECK-FMA: vfnmadd213pd
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK-FMA4: vfnmaddps
|
||||
; CHECK-FMA: vfnmadd213ps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK-FMA4: vfnmaddpd
|
||||
; CHECK-FMA: vfnmadd213pd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
||||
|
||||
; VFNMSUB
|
||||
define < 4 x float > @test_x86_fma_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK-FMA4: vfnmsubss
|
||||
; CHECK-FMA: vfnmsub213ss
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK-FMA4: vfnmsubsd
|
||||
; CHECK-FMA: vfnmsub213sd
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 4 x float > @test_x86_fma_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK-FMA4: vfnmsubps
|
||||
; CHECK-FMA: vfnmsub213ps
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK-FMA4: vfnmsubpd
|
||||
; CHECK-FMA: vfnmsub213pd
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK-FMA4: vfnmsubps
|
||||
; CHECK-FMA: vfnmsub213ps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK-FMA4: vfnmsubpd
|
||||
; CHECK-FMA: vfnmsub213pd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
||||
|
||||
; VFMADDSUB
|
||||
define < 4 x float > @test_x86_fma_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK-FMA4: vfmaddsubps
|
||||
; CHECK-FMA: vfmaddsub213ps
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK-FMA4: vfmaddsubpd
|
||||
; CHECK-FMA: vfmaddsub213pd
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK-FMA4: vfmaddsubps
|
||||
; CHECK-FMA: vfmaddsub213ps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK-FMA4: vfmaddsubpd
|
||||
; CHECK-FMA: vfmaddsub213pd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
||||
|
||||
; VFMSUBADD
|
||||
define < 4 x float > @test_x86_fma_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK-FMA4: vfmsubaddps
|
||||
; CHECK-FMA: vfmsubadd213ps
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK-FMA4: vfmsubaddpd
|
||||
; CHECK-FMA: vfmsubadd213pd
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK-FMA4: vfmsubaddps
|
||||
; CHECK-FMA: vfmsubadd213ps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK-FMA4: vfmsubaddpd
|
||||
; CHECK-FMA: vfmsubadd213pd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
|
@ -1,246 +1,37 @@
|
|||
; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.10.0"
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s
|
||||
; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma4 | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: fmaddsubpd_loop
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmaddsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
; Test FMA3 variant selection
|
||||
|
||||
; CHECK-LABEL: fma3_select231ssX:
|
||||
; CHECK: vfmadd231ss %xmm
|
||||
define float @fma3_select231ssX(float %x, float %y) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <4 x double> %c.addr.0
|
||||
br label %while.body
|
||||
while.body:
|
||||
%acc.01 = phi float [ 0.000000e+00, %entry ], [ %acc, %while.body ]
|
||||
%acc = call float @llvm.fma.f32(float %x, float %y, float %acc.01)
|
||||
%b = fcmp ueq float %acc, 0.0
|
||||
br i1 %b, label %while.body, label %while.end
|
||||
while.end:
|
||||
ret float %acc
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmsubaddpd_loop
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmsubadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
; CHECK-LABEL: fma3_select231pdY:
|
||||
; CHECK: vfmadd231pd %ymm
|
||||
define <4 x double> @fma3_select231pdY(<4 x double> %x, <4 x double> %y) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <4 x double> %c.addr.0
|
||||
br label %while.body
|
||||
while.body:
|
||||
%acc.04 = phi <4 x double> [ zeroinitializer, %entry ], [ %add, %while.body ]
|
||||
%add = call <4 x double> @llvm.fma.v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %acc.04)
|
||||
%vecext = extractelement <4 x double> %add, i32 0
|
||||
%cmp = fcmp oeq double %vecext, 0.000000e+00
|
||||
br i1 %cmp, label %while.body, label %while.end
|
||||
while.end:
|
||||
ret <4 x double> %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmaddpd_loop
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <4 x double> %c.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmsubpd_loop
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <4 x double> %c.addr.0
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
|
||||
; CHECK-LABEL: fmaddsubps_loop
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmaddsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <8 x float> %c.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmsubaddps_loop
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmsubadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <8 x float> %c.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmaddps_loop
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <8 x float> %c.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmsubps_loop
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i.0, %iter
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%0 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret <8 x float> %c.addr.0
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
declare float @llvm.fma.f32(float, float, float)
|
||||
declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
|
|
@ -1,80 +1,46 @@
|
|||
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-INST
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-CALL
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-INST
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-CALL
|
||||
; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK-FMA-INST
|
||||
; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-CALL
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL
|
||||
; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST
|
||||
; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL
|
||||
|
||||
; CHECK: test_f32
|
||||
; CHECK-LABEL: test_f32:
|
||||
; CHECK-FMA-INST: vfmadd213ss
|
||||
; CHECK-FMA-CALL: fmaf
|
||||
|
||||
define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
|
||||
define float @test_f32(float %a, float %b, float %c) #0 {
|
||||
entry:
|
||||
%call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
|
||||
%call = call float @llvm.fma.f32(float %a, float %b, float %c)
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; CHECK: test_f64
|
||||
; CHECK-LABEL: test_f64:
|
||||
; CHECK-FMA-INST: vfmadd213sd
|
||||
; CHECK-FMA-CALL: fma
|
||||
|
||||
define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
|
||||
define double @test_f64(double %a, double %b, double %c) #0 {
|
||||
entry:
|
||||
%call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
|
||||
%call = call double @llvm.fma.f64(double %a, double %b, double %c)
|
||||
ret double %call
|
||||
}
|
||||
|
||||
; CHECK: test_f80
|
||||
; CHECK-LABEL: test_f80:
|
||||
; CHECK: fmal
|
||||
|
||||
define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone ssp {
|
||||
define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) #0 {
|
||||
entry:
|
||||
%call = tail call x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone
|
||||
%call = call x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
|
||||
ret x86_fp80 %call
|
||||
}
|
||||
|
||||
; CHECK: test_f32_cst
|
||||
; CHECK-NOT: fma
|
||||
define float @test_f32_cst() nounwind readnone ssp {
|
||||
; CHECK-LABEL: test_f32_cst:
|
||||
; CHECK-NOT: vfmadd
|
||||
define float @test_f32_cst() #0 {
|
||||
entry:
|
||||
%call = tail call float @llvm.fma.f32(float 3.0, float 3.0, float 3.0) nounwind readnone
|
||||
%call = call float @llvm.fma.f32(float 3.0, float 3.0, float 3.0)
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; Test FMA3 variant selection
|
||||
; CHECK-FMA-INST: fma3_select231ssX:
|
||||
; CHECK-FMA-INST: vfmadd231ss %xmm
|
||||
define float @fma3_select231ssX(float %x, float %y) #0 {
|
||||
entry:
|
||||
br label %while.body
|
||||
while.body: ; preds = %while.body, %while.body
|
||||
%acc.01 = phi float [ 0.000000e+00, %entry ], [ %acc, %while.body ]
|
||||
%acc = tail call float @llvm.fma.f32(float %x, float %y, float %acc.01) nounwind readnone
|
||||
%b = fcmp ueq float %acc, 0.0
|
||||
br i1 %b, label %while.body, label %while.end
|
||||
while.end: ; preds = %while.body, %entry
|
||||
ret float %acc
|
||||
}
|
||||
declare float @llvm.fma.f32(float, float, float)
|
||||
declare double @llvm.fma.f64(double, double, double)
|
||||
declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80)
|
||||
|
||||
; Test FMA3 variant selection
|
||||
; CHECK-FMA-INST: fma3_select231pdY:
|
||||
; CHECK-FMA-INST: vfmadd231pd %ymm
|
||||
define <4 x double> @fma3_select231pdY(<4 x double> %x, <4 x double> %y) #0 {
|
||||
entry:
|
||||
br label %while.body
|
||||
while.body: ; preds = %entry, %while.body
|
||||
%acc.04 = phi <4 x double> [ zeroinitializer, %entry ], [ %add, %while.body ]
|
||||
%add = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %acc.04)
|
||||
%vecext = extractelement <4 x double> %add, i32 0
|
||||
%cmp = fcmp oeq double %vecext, 0.000000e+00
|
||||
br i1 %cmp, label %while.body, label %while.end
|
||||
|
||||
while.end: ; preds = %while.body
|
||||
ret <4 x double> %add
|
||||
}
|
||||
|
||||
declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
||||
declare double @llvm.fma.f64(double, double, double) nounwind readnone
|
||||
declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) nounwind readnone
|
||||
declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
|
||||
attributes #0 = { nounwind }
|
||||
|
|
|
@ -1,150 +0,0 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma,+fma4 | FileCheck %s
|
||||
; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
|
||||
|
||||
define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fmadd213ss (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK: fmadd213ps
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
|
||||
; CHECK: fmadd213ps {{.*\(%r.*}}, %ymm
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fnmadd213ss (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK: fnmadd213ps
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
|
||||
; CHECK: fnmadd213ps {{.*\(%r.*}}, %ymm
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fmsub213ss (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK: fmsub213ps
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fnmsub213ss (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK: fnmsub213ps
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
;;;;
|
||||
|
||||
define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fmadd213sd (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK: fmadd213pd
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fnmadd213sd (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_fnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK: fnmadd213pd
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
|
||||
define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fmsub213sd (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK: fmsub213pd
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
|
||||
; CHECK: fnmsub213sd (%r8), [[XMM1]], [[XMM0]]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_fnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK: fnmsub213pd
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
Loading…
Reference in New Issue