forked from OSchip/llvm-project
[X86] Add broadcast load unfolding support for VPTESTMD/Q and VPTESTNMD/Q.
llvm-svn: 373138
This commit is contained in:
parent
82a707e941
commit
8b5ad3d16e
|
@ -5374,6 +5374,18 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
|
|||
{ X86::VPORQZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q },
|
||||
{ X86::VPORQZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q },
|
||||
{ X86::VPORQZrr, X86::VPORQZrmb, TB_BCAST_Q },
|
||||
{ X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rmb, TB_BCAST_D },
|
||||
{ X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rmb, TB_BCAST_D },
|
||||
{ X86::VPTESTMDZrr, X86::VPTESTMDZrmb, TB_BCAST_D },
|
||||
{ X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rmb, TB_BCAST_Q },
|
||||
{ X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rmb, TB_BCAST_Q },
|
||||
{ X86::VPTESTMQZrr, X86::VPTESTMQZrmb, TB_BCAST_Q },
|
||||
{ X86::VPTESTNMDZ128rr,X86::VPTESTNMDZ128rmb,TB_BCAST_D },
|
||||
{ X86::VPTESTNMDZ256rr,X86::VPTESTNMDZ256rmb,TB_BCAST_D },
|
||||
{ X86::VPTESTNMDZrr, X86::VPTESTNMDZrmb, TB_BCAST_D },
|
||||
{ X86::VPTESTNMQZ128rr,X86::VPTESTNMQZ128rmb,TB_BCAST_Q },
|
||||
{ X86::VPTESTNMQZ256rr,X86::VPTESTNMQZ256rmb,TB_BCAST_Q },
|
||||
{ X86::VPTESTNMQZrr, X86::VPTESTNMQZrmb, TB_BCAST_Q },
|
||||
{ X86::VPXORDZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D },
|
||||
{ X86::VPXORDZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D },
|
||||
{ X86::VPXORDZrr, X86::VPXORDZrmb, TB_BCAST_D },
|
||||
|
|
|
@ -4483,3 +4483,153 @@ define void @bcast_unfold_cmp_v8f32_refold(float* nocapture %0) {
|
|||
12: ; preds = %2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_ptestm_v4i32(i32* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_ptestm_v4i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB127_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm1
|
||||
; CHECK-NEXT: vptestmd %xmm0, %xmm1, %k1
|
||||
; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $16, %rax
|
||||
; CHECK-NEXT: jne .LBB127_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
|
||||
%tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
|
||||
%tmp3 = bitcast i32* %tmp2 to <4 x i32>*
|
||||
%tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
|
||||
%tmp4b = and <4 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2>
|
||||
%tmp5 = icmp ne <4 x i32> %tmp4b, zeroinitializer
|
||||
%tmp6 = select <4 x i1> %tmp5, <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> %tmp4
|
||||
%tmp7 = bitcast i32* %tmp2 to <4 x i32>*
|
||||
store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
|
||||
%tmp8 = add i64 %tmp, 4
|
||||
%tmp9 = icmp eq i64 %tmp8, 1024
|
||||
br i1 %tmp9, label %bb10, label %bb1
|
||||
|
||||
bb10: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_ptestnm_v4i32(i32* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_ptestnm_v4i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB128_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm1
|
||||
; CHECK-NEXT: vptestnmd %xmm0, %xmm1, %k1
|
||||
; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $16, %rax
|
||||
; CHECK-NEXT: jne .LBB128_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
|
||||
%tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
|
||||
%tmp3 = bitcast i32* %tmp2 to <4 x i32>*
|
||||
%tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
|
||||
%tmp4b = and <4 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2>
|
||||
%tmp5 = icmp eq <4 x i32> %tmp4b, zeroinitializer
|
||||
%tmp6 = select <4 x i1> %tmp5, <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> %tmp4
|
||||
%tmp7 = bitcast i32* %tmp2 to <4 x i32>*
|
||||
store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
|
||||
%tmp8 = add i64 %tmp, 4
|
||||
%tmp9 = icmp eq i64 %tmp8, 1024
|
||||
br i1 %tmp9, label %bb10, label %bb1
|
||||
|
||||
bb10: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_ptestm_v4i64(i64* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_ptestm_v4i64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB129_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm1
|
||||
; CHECK-NEXT: vptestmq %ymm0, %ymm1, %k1
|
||||
; CHECK-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB129_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
|
||||
%tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
|
||||
%tmp3 = bitcast i64* %tmp2 to <4 x i64>*
|
||||
%tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8
|
||||
%tmp4b = and <4 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2>
|
||||
%tmp5 = icmp ne <4 x i64> %tmp4b, zeroinitializer
|
||||
%tmp6 = select <4 x i1> %tmp5, <4 x i64> <i64 3, i64 3, i64 3, i64 3>, <4 x i64> %tmp4
|
||||
%tmp7 = bitcast i64* %tmp2 to <4 x i64>*
|
||||
store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
|
||||
%tmp8 = add i64 %tmp, 4
|
||||
%tmp9 = icmp eq i64 %tmp8, 1024
|
||||
br i1 %tmp9, label %bb10, label %bb1
|
||||
|
||||
bb10: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bcast_unfold_ptestnm_v4i64(i64* %arg) {
|
||||
; CHECK-LABEL: bcast_unfold_ptestnm_v4i64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB130_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm1
|
||||
; CHECK-NEXT: vptestnmq %ymm0, %ymm1, %k1
|
||||
; CHECK-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB130_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
|
||||
%tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
|
||||
%tmp3 = bitcast i64* %tmp2 to <4 x i64>*
|
||||
%tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8
|
||||
%tmp4b = and <4 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2>
|
||||
%tmp5 = icmp eq <4 x i64> %tmp4b, zeroinitializer
|
||||
%tmp6 = select <4 x i1> %tmp5, <4 x i64> <i64 3, i64 3, i64 3, i64 3>, <4 x i64> %tmp4
|
||||
%tmp7 = bitcast i64* %tmp2 to <4 x i64>*
|
||||
store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
|
||||
%tmp8 = add i64 %tmp, 4
|
||||
%tmp9 = icmp eq i64 %tmp8, 1024
|
||||
br i1 %tmp9, label %bb10, label %bb1
|
||||
|
||||
bb10: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue