forked from OSchip/llvm-project
[X86] Add AVX512 load opcodes and a couple AVX load opcodes to X86InstrInfo::areLoadsFromSameBasePtr.
llvm-svn: 275765
This commit is contained in:
parent
650a15e2b3
commit
f7a06c29bc
|
@ -6728,6 +6728,7 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
|
|||
case X86::MOVAPSrm:
|
||||
case X86::MOVUPSrm:
|
||||
case X86::MOVAPDrm:
|
||||
case X86::MOVUPDrm:
|
||||
case X86::MOVDQArm:
|
||||
case X86::MOVDQUrm:
|
||||
// AVX load instructions
|
||||
|
@ -6738,13 +6739,52 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
|
|||
case X86::VMOVAPSrm:
|
||||
case X86::VMOVUPSrm:
|
||||
case X86::VMOVAPDrm:
|
||||
case X86::VMOVUPDrm:
|
||||
case X86::VMOVDQArm:
|
||||
case X86::VMOVDQUrm:
|
||||
case X86::VMOVAPSYrm:
|
||||
case X86::VMOVUPSYrm:
|
||||
case X86::VMOVAPDYrm:
|
||||
case X86::VMOVUPDYrm:
|
||||
case X86::VMOVDQAYrm:
|
||||
case X86::VMOVDQUYrm:
|
||||
// AVX512 load instructions
|
||||
case X86::VMOVSSZrm:
|
||||
case X86::VMOVSDZrm:
|
||||
case X86::VMOVAPSZ128rm:
|
||||
case X86::VMOVUPSZ128rm:
|
||||
case X86::VMOVAPDZ128rm:
|
||||
case X86::VMOVUPDZ128rm:
|
||||
case X86::VMOVDQU8Z128rm:
|
||||
case X86::VMOVDQU16Z128rm:
|
||||
case X86::VMOVDQA32Z128rm:
|
||||
case X86::VMOVDQU32Z128rm:
|
||||
case X86::VMOVDQA64Z128rm:
|
||||
case X86::VMOVDQU64Z128rm:
|
||||
case X86::VMOVAPSZ256rm:
|
||||
case X86::VMOVUPSZ256rm:
|
||||
case X86::VMOVAPDZ256rm:
|
||||
case X86::VMOVUPDZ256rm:
|
||||
case X86::VMOVDQU8Z256rm:
|
||||
case X86::VMOVDQU16Z256rm:
|
||||
case X86::VMOVDQA32Z256rm:
|
||||
case X86::VMOVDQU32Z256rm:
|
||||
case X86::VMOVDQA64Z256rm:
|
||||
case X86::VMOVDQU64Z256rm:
|
||||
case X86::VMOVAPSZrm:
|
||||
case X86::VMOVUPSZrm:
|
||||
case X86::VMOVAPDZrm:
|
||||
case X86::VMOVUPDZrm:
|
||||
case X86::VMOVDQU8Zrm:
|
||||
case X86::VMOVDQU16Zrm:
|
||||
case X86::VMOVDQA32Zrm:
|
||||
case X86::VMOVDQU32Zrm:
|
||||
case X86::VMOVDQA64Zrm:
|
||||
case X86::VMOVDQU64Zrm:
|
||||
case X86::KMOVBkm:
|
||||
case X86::KMOVWkm:
|
||||
case X86::KMOVDkm:
|
||||
case X86::KMOVQkm:
|
||||
break;
|
||||
}
|
||||
switch (Opc2) {
|
||||
|
@ -6765,6 +6805,7 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
|
|||
case X86::MOVAPSrm:
|
||||
case X86::MOVUPSrm:
|
||||
case X86::MOVAPDrm:
|
||||
case X86::MOVUPDrm:
|
||||
case X86::MOVDQArm:
|
||||
case X86::MOVDQUrm:
|
||||
// AVX load instructions
|
||||
|
@ -6775,13 +6816,52 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
|
|||
case X86::VMOVAPSrm:
|
||||
case X86::VMOVUPSrm:
|
||||
case X86::VMOVAPDrm:
|
||||
case X86::VMOVUPDrm:
|
||||
case X86::VMOVDQArm:
|
||||
case X86::VMOVDQUrm:
|
||||
case X86::VMOVAPSYrm:
|
||||
case X86::VMOVUPSYrm:
|
||||
case X86::VMOVAPDYrm:
|
||||
case X86::VMOVUPDYrm:
|
||||
case X86::VMOVDQAYrm:
|
||||
case X86::VMOVDQUYrm:
|
||||
// AVX512 load instructions
|
||||
case X86::VMOVSSZrm:
|
||||
case X86::VMOVSDZrm:
|
||||
case X86::VMOVAPSZ128rm:
|
||||
case X86::VMOVUPSZ128rm:
|
||||
case X86::VMOVAPDZ128rm:
|
||||
case X86::VMOVUPDZ128rm:
|
||||
case X86::VMOVDQU8Z128rm:
|
||||
case X86::VMOVDQU16Z128rm:
|
||||
case X86::VMOVDQA32Z128rm:
|
||||
case X86::VMOVDQU32Z128rm:
|
||||
case X86::VMOVDQA64Z128rm:
|
||||
case X86::VMOVDQU64Z128rm:
|
||||
case X86::VMOVAPSZ256rm:
|
||||
case X86::VMOVUPSZ256rm:
|
||||
case X86::VMOVAPDZ256rm:
|
||||
case X86::VMOVUPDZ256rm:
|
||||
case X86::VMOVDQU8Z256rm:
|
||||
case X86::VMOVDQU16Z256rm:
|
||||
case X86::VMOVDQA32Z256rm:
|
||||
case X86::VMOVDQU32Z256rm:
|
||||
case X86::VMOVDQA64Z256rm:
|
||||
case X86::VMOVDQU64Z256rm:
|
||||
case X86::VMOVAPSZrm:
|
||||
case X86::VMOVUPSZrm:
|
||||
case X86::VMOVAPDZrm:
|
||||
case X86::VMOVUPDZrm:
|
||||
case X86::VMOVDQU8Zrm:
|
||||
case X86::VMOVDQU16Zrm:
|
||||
case X86::VMOVDQA32Zrm:
|
||||
case X86::VMOVDQU32Zrm:
|
||||
case X86::VMOVDQA64Zrm:
|
||||
case X86::VMOVDQU64Zrm:
|
||||
case X86::KMOVBkm:
|
||||
case X86::KMOVWkm:
|
||||
case X86::KMOVDkm:
|
||||
case X86::KMOVQkm:
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -517,20 +517,20 @@ define <64 x i8> @test16(i64 %x) {
|
|||
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; KNL-NEXT: kmovw (%rsp), %k1
|
||||
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
||||
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
||||
; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k2} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm2
|
||||
; KNL-NEXT: movl $1, %eax
|
||||
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7]
|
||||
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
||||
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
||||
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
||||
; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpsllw $7, %ymm2, %ymm0
|
||||
|
@ -575,10 +575,10 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
|
|||
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
|
||||
; KNL-NEXT: kmovw (%rsp), %k1
|
||||
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k2} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
|
||||
; KNL-NEXT: xorl %eax, %eax
|
||||
|
@ -591,10 +591,10 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
|
|||
; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
||||
; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
|
||||
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
||||
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k2} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||
; KNL-NEXT: movq %rbp, %rsp
|
||||
|
@ -1319,10 +1319,10 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
|||
;
|
||||
; SKX-LABEL: ktest_2:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vmovups 64(%rdi), %zmm2
|
||||
; SKX-NEXT: vmovups (%rdi), %zmm3
|
||||
; SKX-NEXT: vcmpltps %zmm0, %zmm3, %k1
|
||||
; SKX-NEXT: vcmpltps %zmm1, %zmm2, %k2
|
||||
; SKX-NEXT: vmovups (%rdi), %zmm2
|
||||
; SKX-NEXT: vmovups 64(%rdi), %zmm3
|
||||
; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1
|
||||
; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2
|
||||
; SKX-NEXT: kunpckwd %k1, %k2, %k0
|
||||
; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
|
||||
; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
|
||||
|
@ -1449,11 +1449,11 @@ define <32 x i16> @load_32i1(<32 x i1>* %a) {
|
|||
; KNL-LABEL: load_32i1:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: kmovw (%rdi), %k1
|
||||
; KNL-NEXT: kmovw 2(%rdi), %k2
|
||||
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; KNL-NEXT: kmovw 2(%rdi), %k1
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k2} {z}
|
||||
; KNL-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
|
@ -1471,18 +1471,18 @@ define <64 x i8> @load_64i1(<64 x i1>* %a) {
|
|||
; KNL-LABEL: load_64i1:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: kmovw (%rdi), %k1
|
||||
; KNL-NEXT: kmovw 2(%rdi), %k2
|
||||
; KNL-NEXT: kmovw 4(%rdi), %k3
|
||||
; KNL-NEXT: kmovw 6(%rdi), %k4
|
||||
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: kmovw 2(%rdi), %k1
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k2} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; KNL-NEXT: kmovw 4(%rdi), %k1
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k3} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; KNL-NEXT: kmovw 6(%rdi), %k1
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
|
||||
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k4} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||
; KNL-NEXT: retq
|
||||
|
|
|
@ -3738,11 +3738,11 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
|
|||
; AVX512-LABEL: load_sext_32i1_to_32i8:
|
||||
; AVX512: # BB#0: # %entry
|
||||
; AVX512-NEXT: kmovw (%rdi), %k1
|
||||
; AVX512-NEXT: kmovw 2(%rdi), %k2
|
||||
; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
|
||||
; AVX512-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512-NEXT: kmovw 2(%rdi), %k1
|
||||
; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z}
|
||||
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
|
|
Loading…
Reference in New Issue