forked from OSchip/llvm-project
[X86] Add broadcast load unfold support for smin/umin/smax/umax.
llvm-svn: 371366
This commit is contained in:
parent
68b2e1973f
commit
8c2ab1c4cb
|
@ -5306,6 +5306,30 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
|
|||
{ X86::VPANDQZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q },
|
||||
{ X86::VPANDQZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q },
|
||||
{ X86::VPANDQZrr, X86::VPANDQZrmb, TB_BCAST_Q },
|
||||
{ X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rmb, TB_BCAST_D },
|
||||
{ X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rmb, TB_BCAST_D },
|
||||
{ X86::VPMAXSDZrr, X86::VPMAXSDZrmb, TB_BCAST_D },
|
||||
{ X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rmb, TB_BCAST_Q },
|
||||
{ X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rmb, TB_BCAST_Q },
|
||||
{ X86::VPMAXSQZrr, X86::VPMAXSQZrmb, TB_BCAST_Q },
|
||||
{ X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rmb, TB_BCAST_D },
|
||||
{ X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rmb, TB_BCAST_D },
|
||||
{ X86::VPMAXUDZrr, X86::VPMAXUDZrmb, TB_BCAST_D },
|
||||
{ X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rmb, TB_BCAST_Q },
|
||||
{ X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rmb, TB_BCAST_Q },
|
||||
{ X86::VPMAXUQZrr, X86::VPMAXUQZrmb, TB_BCAST_Q },
|
||||
{ X86::VPMINSDZ128rr, X86::VPMINSDZ128rmb, TB_BCAST_D },
|
||||
{ X86::VPMINSDZ256rr, X86::VPMINSDZ256rmb, TB_BCAST_D },
|
||||
{ X86::VPMINSDZrr, X86::VPMINSDZrmb, TB_BCAST_D },
|
||||
{ X86::VPMINSQZ128rr, X86::VPMINSQZ128rmb, TB_BCAST_Q },
|
||||
{ X86::VPMINSQZ256rr, X86::VPMINSQZ256rmb, TB_BCAST_Q },
|
||||
{ X86::VPMINSQZrr, X86::VPMINSQZrmb, TB_BCAST_Q },
|
||||
{ X86::VPMINUDZ128rr, X86::VPMINUDZ128rmb, TB_BCAST_D },
|
||||
{ X86::VPMINUDZ256rr, X86::VPMINUDZ256rmb, TB_BCAST_D },
|
||||
{ X86::VPMINUDZrr, X86::VPMINUDZrmb, TB_BCAST_D },
|
||||
{ X86::VPMINUQZ128rr, X86::VPMINUQZ128rmb, TB_BCAST_Q },
|
||||
{ X86::VPMINUQZ256rr, X86::VPMINUQZ256rmb, TB_BCAST_Q },
|
||||
{ X86::VPMINUQZrr, X86::VPMINUQZrmb, TB_BCAST_Q },
|
||||
{ X86::VPMULLDZ128rr, X86::VPMULLDZ128rmb, TB_BCAST_D },
|
||||
{ X86::VPMULLDZ256rr, X86::VPMULLDZ256rmb, TB_BCAST_D },
|
||||
{ X86::VPMULLDZrr, X86::VPMULLDZrmb, TB_BCAST_D },
|
||||
|
|
|
@ -2507,12 +2507,12 @@ define void @bcast_unfold_smin_v4i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_smin_v4i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB72_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0
|
||||
; CHECK-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpminsd 4096(%rdi,%rax), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $16, %rax
|
||||
; CHECK-NEXT: jne .LBB72_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2541,12 +2541,12 @@ define void @bcast_unfold_smin_v8i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_smin_v8i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB73_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpminsd 4096(%rdi,%rax), %ymm0, %ymm1
|
||||
; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB73_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2576,12 +2576,12 @@ define void @bcast_unfold_smin_v16i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_smin_v16i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB74_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpminsd 4096(%rdi,%rax), %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB74_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2645,12 +2645,12 @@ define void @bcast_unfold_smin_v4i64(i64* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_smin_v4i64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB76_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: vpminsq 8192(%rdi,%rax), %ymm0, %ymm1
|
||||
; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB76_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2680,12 +2680,12 @@ define void @bcast_unfold_smin_v8i64(i64* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_smin_v8i64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB77_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: vpminsq 8192(%rdi,%rax), %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB77_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2715,12 +2715,12 @@ define void @bcast_unfold_smax_v4i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_smax_v4i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB78_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0
|
||||
; CHECK-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpmaxsd 4096(%rdi,%rax), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $16, %rax
|
||||
; CHECK-NEXT: jne .LBB78_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2749,12 +2749,12 @@ define void @bcast_unfold_smax_v8i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_smax_v8i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB79_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpmaxsd 4096(%rdi,%rax), %ymm0, %ymm1
|
||||
; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB79_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2784,12 +2784,12 @@ define void @bcast_unfold_smax_v16i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_smax_v16i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB80_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpmaxsd 4096(%rdi,%rax), %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB80_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2853,12 +2853,12 @@ define void @bcast_unfold_smax_v4i64(i64* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_smax_v4i64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB82_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: vpmaxsq 8192(%rdi,%rax), %ymm0, %ymm1
|
||||
; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB82_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2888,12 +2888,12 @@ define void @bcast_unfold_smax_v8i64(i64* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_smax_v8i64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB83_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: vpmaxsq 8192(%rdi,%rax), %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB83_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2923,12 +2923,12 @@ define void @bcast_unfold_umin_v4i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_umin_v4i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB84_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0
|
||||
; CHECK-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpminud 4096(%rdi,%rax), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $16, %rax
|
||||
; CHECK-NEXT: jne .LBB84_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2957,12 +2957,12 @@ define void @bcast_unfold_umin_v8i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_umin_v8i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB85_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vpminud {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpminud 4096(%rdi,%rax), %ymm0, %ymm1
|
||||
; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB85_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -2992,12 +2992,12 @@ define void @bcast_unfold_umin_v16i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_umin_v16i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB86_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vpminud {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpminud 4096(%rdi,%rax), %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB86_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -3061,12 +3061,12 @@ define void @bcast_unfold_umin_v4i64(i64* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_umin_v4i64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB88_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vpminuq {{.*}}(%rip){1to4}, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: vpminuq 8192(%rdi,%rax), %ymm0, %ymm1
|
||||
; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB88_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -3096,12 +3096,12 @@ define void @bcast_unfold_umin_v8i64(i64* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_umin_v8i64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB89_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: vpminuq 8192(%rdi,%rax), %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB89_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -3131,12 +3131,12 @@ define void @bcast_unfold_umax_v4i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_umax_v4i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB90_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0
|
||||
; CHECK-NEXT: vpmaxud {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpmaxud 4096(%rdi,%rax), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $16, %rax
|
||||
; CHECK-NEXT: jne .LBB90_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -3165,12 +3165,12 @@ define void @bcast_unfold_umax_v8i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_umax_v8i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB91_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vpmaxud {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpmaxud 4096(%rdi,%rax), %ymm0, %ymm1
|
||||
; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB91_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -3200,12 +3200,12 @@ define void @bcast_unfold_umax_v16i32(i32* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_umax_v16i32:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB92_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vpmaxud {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: vpmaxud 4096(%rdi,%rax), %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB92_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -3269,12 +3269,12 @@ define void @bcast_unfold_umax_v4i64(i64* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_umax_v4i64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB94_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0
|
||||
; CHECK-NEXT: vpmaxuq {{.*}}(%rip){1to4}, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: vpmaxuq 8192(%rdi,%rax), %ymm0, %ymm1
|
||||
; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB94_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
@ -3304,12 +3304,12 @@ define void @bcast_unfold_umax_v8i64(i64* %arg) {
|
|||
; CHECK-LABEL: bcast_unfold_umax_v8i64:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
|
||||
; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2]
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB95_1: # %bb1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0
|
||||
; CHECK-NEXT: vpmaxuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: vpmaxuq 8192(%rdi,%rax), %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $64, %rax
|
||||
; CHECK-NEXT: jne .LBB95_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb10
|
||||
|
|
Loading…
Reference in New Issue