forked from OSchip/llvm-project
[X86] Add broadcast load unfolding tests for FMA instructions.
llvm-svn: 371322
This commit is contained in:
parent
eacb2c2c97
commit
a461c26dd8
|
@ -1646,3 +1646,421 @@ bb1: ; preds = %bb1, %bb
|
|||
bb9: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma213_v4f32(float* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma213_v4f32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB48_1: # %bb2
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm0
|
||||
; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
|
||||
; CHECK-NEXT: vmovups %xmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $16, %rax
|
||||
; CHECK-NEXT: jne .LBB48_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb11
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb2, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
|
||||
%tmp3 = getelementptr inbounds float, float* %arg, i64 %tmp
|
||||
%tmp4 = bitcast float* %tmp3 to <4 x float>*
|
||||
%tmp5 = load <4 x float>, <4 x float>* %tmp4, align 4
|
||||
%tmp6 = fmul contract <4 x float> %tmp5, %tmp5
|
||||
%tmp7 = fadd contract <4 x float> %tmp6, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
|
||||
%tmp8 = bitcast float* %tmp3 to <4 x float>*
|
||||
store <4 x float> %tmp7, <4 x float>* %tmp8, align 4
|
||||
%tmp9 = add i64 %tmp, 4
|
||||
%tmp10 = icmp eq i64 %tmp9, 1024
|
||||
br i1 %tmp10, label %bb11, label %bb2
|
||||
|
||||
bb11: ; preds = %bb2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma231_v4f32(float* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma231_v4f32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB49_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm0
|
||||
; CHECK-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
|
||||
; CHECK-NEXT: vmovups %xmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $16, %rax
|
||||
; CHECK-NEXT: jne .LBB49_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
|
||||
%tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
|
||||
%tmp3 = bitcast float* %tmp2 to <4 x float>*
|
||||
%tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4
|
||||
%tmp5 = fmul contract <4 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
|
||||
%tmp6 = fadd contract <4 x float> %tmp4, %tmp5
|
||||
%tmp7 = bitcast float* %tmp2 to <4 x float>*
|
||||
store <4 x float> %tmp6, <4 x float>* %tmp7, align 4
|
||||
%tmp8 = add i64 %tmp, 4
|
||||
%tmp9 = icmp eq i64 %tmp8, 1024
|
||||
br i1 %tmp9, label %bb10, label %bb1
|
||||
|
||||
bb10: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma213_v8f32(float* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma213_v8f32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB50_1: # %bb2
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
|
||||
; CHECK-NEXT: vmovups %ymm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB50_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb11
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb2, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
|
||||
%tmp3 = getelementptr inbounds float, float* %arg, i64 %tmp
|
||||
%tmp4 = bitcast float* %tmp3 to <8 x float>*
|
||||
%tmp5 = load <8 x float>, <8 x float>* %tmp4, align 4
|
||||
%tmp6 = fmul contract <8 x float> %tmp5, %tmp5
|
||||
%tmp7 = fadd contract <8 x float> %tmp6, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
|
||||
%tmp8 = bitcast float* %tmp3 to <8 x float>*
|
||||
store <8 x float> %tmp7, <8 x float>* %tmp8, align 4
|
||||
%tmp9 = add i64 %tmp, 8
|
||||
%tmp10 = icmp eq i64 %tmp9, 1024
|
||||
br i1 %tmp10, label %bb11, label %bb2
|
||||
|
||||
bb11: ; preds = %bb2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma231_v8f32(float* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma231_v8f32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB51_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
|
||||
; CHECK-NEXT: vmovups %ymm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB51_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
|
||||
%tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
|
||||
%tmp3 = bitcast float* %tmp2 to <8 x float>*
|
||||
%tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4
|
||||
%tmp5 = fmul contract <8 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
|
||||
%tmp6 = fadd contract <8 x float> %tmp4, %tmp5
|
||||
%tmp7 = bitcast float* %tmp2 to <8 x float>*
|
||||
store <8 x float> %tmp6, <8 x float>* %tmp7, align 4
|
||||
%tmp8 = add i64 %tmp, 8
|
||||
%tmp9 = icmp eq i64 %tmp8, 1024
|
||||
br i1 %tmp9, label %bb10, label %bb1
|
||||
|
||||
bb10: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma213_v16f32(float* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma213_v16f32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB52_1: # %bb2
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm0 * zmm0) + mem
|
||||
; CHECK-NEXT: vmovups %zmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB52_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb11
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb2, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
|
||||
%tmp3 = getelementptr inbounds float, float* %arg, i64 %tmp
|
||||
%tmp4 = bitcast float* %tmp3 to <16 x float>*
|
||||
%tmp5 = load <16 x float>, <16 x float>* %tmp4, align 4
|
||||
%tmp6 = fmul contract <16 x float> %tmp5, %tmp5
|
||||
%tmp7 = fadd contract <16 x float> %tmp6, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
|
||||
%tmp8 = bitcast float* %tmp3 to <16 x float>*
|
||||
store <16 x float> %tmp7, <16 x float>* %tmp8, align 4
|
||||
%tmp9 = add i64 %tmp, 16
|
||||
%tmp10 = icmp eq i64 %tmp9, 1024
|
||||
br i1 %tmp10, label %bb11, label %bb2
|
||||
|
||||
bb11: ; preds = %bb2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma231_v16f32(float* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma231_v16f32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB53_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vfmadd231ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm0
|
||||
; CHECK-NEXT: vmovups %zmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB53_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
|
||||
%tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
|
||||
%tmp3 = bitcast float* %tmp2 to <16 x float>*
|
||||
%tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4
|
||||
%tmp5 = fmul contract <16 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
|
||||
%tmp6 = fadd contract <16 x float> %tmp4, %tmp5
|
||||
%tmp7 = bitcast float* %tmp2 to <16 x float>*
|
||||
store <16 x float> %tmp6, <16 x float>* %tmp7, align 4
|
||||
%tmp8 = add i64 %tmp, 16
|
||||
%tmp9 = icmp eq i64 %tmp8, 1024
|
||||
br i1 %tmp9, label %bb10, label %bb1
|
||||
|
||||
bb10: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma213_v2f64(double* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma213_v2f64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB54_1: # %bb2
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %xmm1
|
||||
; CHECK-NEXT: vfmadd213pd {{.*#+}} xmm1 = (xmm1 * xmm1) + xmm0
|
||||
; CHECK-NEXT: vmovupd %xmm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $16, %rax
|
||||
; CHECK-NEXT: jne .LBB54_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb11
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb2, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
|
||||
%tmp3 = getelementptr inbounds double, double* %arg, i64 %tmp
|
||||
%tmp4 = bitcast double* %tmp3 to <2 x double>*
|
||||
%tmp5 = load <2 x double>, <2 x double>* %tmp4, align 4
|
||||
%tmp6 = fmul contract <2 x double> %tmp5, %tmp5
|
||||
%tmp7 = fadd contract <2 x double> %tmp6, <double 2.000000e+00, double 2.000000e+00>
|
||||
%tmp8 = bitcast double* %tmp3 to <2 x double>*
|
||||
store <2 x double> %tmp7, <2 x double>* %tmp8, align 8
|
||||
%tmp9 = add i64 %tmp, 2
|
||||
%tmp10 = icmp eq i64 %tmp9, 1024
|
||||
br i1 %tmp10, label %bb11, label %bb2
|
||||
|
||||
bb11: ; preds = %bb2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma231_v2f64(double* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma231_v2f64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB55_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %xmm1
|
||||
; CHECK-NEXT: vfmadd231pd {{.*#+}} xmm1 = (xmm1 * xmm0) + xmm1
|
||||
; CHECK-NEXT: vmovupd %xmm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $16, %rax
|
||||
; CHECK-NEXT: jne .LBB55_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
|
||||
%tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
|
||||
%tmp3 = bitcast double* %tmp2 to <2 x double>*
|
||||
%tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8
|
||||
%tmp5 = fmul contract <2 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00>
|
||||
%tmp6 = fadd contract <2 x double> %tmp4, %tmp5
|
||||
%tmp7 = bitcast double* %tmp2 to <2 x double>*
|
||||
store <2 x double> %tmp6, <2 x double>* %tmp7, align 8
|
||||
%tmp8 = add i64 %tmp, 2
|
||||
%tmp9 = icmp eq i64 %tmp8, 1024
|
||||
br i1 %tmp9, label %bb10, label %bb1
|
||||
|
||||
bb10: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma213_v4f64(double* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma213_v4f64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB56_1: # %bb2
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
|
||||
; CHECK-NEXT: vmovupd %ymm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB56_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb11
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb2, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
|
||||
%tmp3 = getelementptr inbounds double, double* %arg, i64 %tmp
|
||||
%tmp4 = bitcast double* %tmp3 to <4 x double>*
|
||||
%tmp5 = load <4 x double>, <4 x double>* %tmp4, align 8
|
||||
%tmp6 = fmul contract <4 x double> %tmp5, %tmp5
|
||||
%tmp7 = fadd contract <4 x double> %tmp6, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
|
||||
%tmp8 = bitcast double* %tmp3 to <4 x double>*
|
||||
store <4 x double> %tmp7, <4 x double>* %tmp8, align 8
|
||||
%tmp9 = add i64 %tmp, 4
|
||||
%tmp10 = icmp eq i64 %tmp9, 1024
|
||||
br i1 %tmp10, label %bb11, label %bb2
|
||||
|
||||
bb11: ; preds = %bb2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma231_v4f64(double* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma231_v4f64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB57_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
|
||||
; CHECK-NEXT: vmovupd %ymm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB57_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
|
||||
%tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
|
||||
%tmp3 = bitcast double* %tmp2 to <4 x double>*
|
||||
%tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8
|
||||
%tmp5 = fmul contract <4 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
|
||||
%tmp6 = fadd contract <4 x double> %tmp4, %tmp5
|
||||
%tmp7 = bitcast double* %tmp2 to <4 x double>*
|
||||
store <4 x double> %tmp6, <4 x double>* %tmp7, align 8
|
||||
%tmp8 = add i64 %tmp, 4
|
||||
%tmp9 = icmp eq i64 %tmp8, 1024
|
||||
br i1 %tmp9, label %bb10, label %bb1
|
||||
|
||||
bb10: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma213_v8f64(double* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma213_v8f64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB58_1: # %bb2
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm0 * zmm0) + mem
|
||||
; CHECK-NEXT: vmovupd %zmm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB58_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb11
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb2, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
|
||||
%tmp3 = getelementptr inbounds double, double* %arg, i64 %tmp
|
||||
%tmp4 = bitcast double* %tmp3 to <8 x double>*
|
||||
%tmp5 = load <8 x double>, <8 x double>* %tmp4, align 8
|
||||
%tmp6 = fmul contract <8 x double> %tmp5, %tmp5
|
||||
%tmp7 = fadd contract <8 x double> %tmp6, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
|
||||
%tmp8 = bitcast double* %tmp3 to <8 x double>*
|
||||
store <8 x double> %tmp7, <8 x double>* %tmp8, align 8
|
||||
%tmp9 = add i64 %tmp, 8
|
||||
%tmp10 = icmp eq i64 %tmp9, 1024
|
||||
br i1 %tmp10, label %bb11, label %bb2
|
||||
|
||||
bb11: ; preds = %bb2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_fma231_v8f64(double* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_fma231_v8f64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB59_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm0 = (zmm0 * mem) + zmm0
|
||||
; CHECK-NEXT: vmovupd %zmm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB59_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
|
||||
%tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
|
||||
%tmp3 = bitcast double* %tmp2 to <8 x double>*
|
||||
%tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8
|
||||
%tmp5 = fmul contract <8 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
|
||||
%tmp6 = fadd contract <8 x double> %tmp4, %tmp5
|
||||
%tmp7 = bitcast double* %tmp2 to <8 x double>*
|
||||
store <8 x double> %tmp6, <8 x double>* %tmp7, align 8
|
||||
%tmp8 = add i64 %tmp, 8
|
||||
%tmp9 = icmp eq i64 %tmp8, 1024
|
||||
br i1 %tmp9, label %bb10, label %bb1
|
||||
|
||||
bb10: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue