[X86] Add AVX512 load opcodes and a couple AVX load opcodes to X86InstrInfo::areLoadsFromSameBasePtr.

llvm-svn: 275765
This commit is contained in:
Craig Topper 2016-07-18 06:14:43 +00:00
parent 650a15e2b3
commit f7a06c29bc
3 changed files with 102 additions and 22 deletions

View File

@ -6728,6 +6728,7 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::MOVAPSrm:
case X86::MOVUPSrm:
case X86::MOVAPDrm:
case X86::MOVUPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
// AVX load instructions
@ -6738,13 +6739,52 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::VMOVAPSrm:
case X86::VMOVUPSrm:
case X86::VMOVAPDrm:
case X86::VMOVUPDrm:
case X86::VMOVDQArm:
case X86::VMOVDQUrm:
case X86::VMOVAPSYrm:
case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
case X86::VMOVUPDYrm:
case X86::VMOVDQAYrm:
case X86::VMOVDQUYrm:
// AVX512 load instructions
case X86::VMOVSSZrm:
case X86::VMOVSDZrm:
case X86::VMOVAPSZ128rm:
case X86::VMOVUPSZ128rm:
case X86::VMOVAPDZ128rm:
case X86::VMOVUPDZ128rm:
case X86::VMOVDQU8Z128rm:
case X86::VMOVDQU16Z128rm:
case X86::VMOVDQA32Z128rm:
case X86::VMOVDQU32Z128rm:
case X86::VMOVDQA64Z128rm:
case X86::VMOVDQU64Z128rm:
case X86::VMOVAPSZ256rm:
case X86::VMOVUPSZ256rm:
case X86::VMOVAPDZ256rm:
case X86::VMOVUPDZ256rm:
case X86::VMOVDQU8Z256rm:
case X86::VMOVDQU16Z256rm:
case X86::VMOVDQA32Z256rm:
case X86::VMOVDQU32Z256rm:
case X86::VMOVDQA64Z256rm:
case X86::VMOVDQU64Z256rm:
case X86::VMOVAPSZrm:
case X86::VMOVUPSZrm:
case X86::VMOVAPDZrm:
case X86::VMOVUPDZrm:
case X86::VMOVDQU8Zrm:
case X86::VMOVDQU16Zrm:
case X86::VMOVDQA32Zrm:
case X86::VMOVDQU32Zrm:
case X86::VMOVDQA64Zrm:
case X86::VMOVDQU64Zrm:
case X86::KMOVBkm:
case X86::KMOVWkm:
case X86::KMOVDkm:
case X86::KMOVQkm:
break;
}
switch (Opc2) {
@ -6765,6 +6805,7 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::MOVAPSrm:
case X86::MOVUPSrm:
case X86::MOVAPDrm:
case X86::MOVUPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
// AVX load instructions
@ -6775,13 +6816,52 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::VMOVAPSrm:
case X86::VMOVUPSrm:
case X86::VMOVAPDrm:
case X86::VMOVUPDrm:
case X86::VMOVDQArm:
case X86::VMOVDQUrm:
case X86::VMOVAPSYrm:
case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
case X86::VMOVUPDYrm:
case X86::VMOVDQAYrm:
case X86::VMOVDQUYrm:
// AVX512 load instructions
case X86::VMOVSSZrm:
case X86::VMOVSDZrm:
case X86::VMOVAPSZ128rm:
case X86::VMOVUPSZ128rm:
case X86::VMOVAPDZ128rm:
case X86::VMOVUPDZ128rm:
case X86::VMOVDQU8Z128rm:
case X86::VMOVDQU16Z128rm:
case X86::VMOVDQA32Z128rm:
case X86::VMOVDQU32Z128rm:
case X86::VMOVDQA64Z128rm:
case X86::VMOVDQU64Z128rm:
case X86::VMOVAPSZ256rm:
case X86::VMOVUPSZ256rm:
case X86::VMOVAPDZ256rm:
case X86::VMOVUPDZ256rm:
case X86::VMOVDQU8Z256rm:
case X86::VMOVDQU16Z256rm:
case X86::VMOVDQA32Z256rm:
case X86::VMOVDQU32Z256rm:
case X86::VMOVDQA64Z256rm:
case X86::VMOVDQU64Z256rm:
case X86::VMOVAPSZrm:
case X86::VMOVUPSZrm:
case X86::VMOVAPDZrm:
case X86::VMOVUPDZrm:
case X86::VMOVDQU8Zrm:
case X86::VMOVDQU16Zrm:
case X86::VMOVDQA32Zrm:
case X86::VMOVDQU32Zrm:
case X86::VMOVDQA64Zrm:
case X86::VMOVDQU64Zrm:
case X86::KMOVBkm:
case X86::KMOVWkm:
case X86::KMOVDkm:
case X86::KMOVQkm:
break;
}

View File

@ -517,20 +517,20 @@ define <64 x i8> @test16(i64 %x) {
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; KNL-NEXT: kmovw (%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k2} {z}
; KNL-NEXT: vpmovdb %zmm2, %xmm2
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm2
; KNL-NEXT: movl $1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
; KNL-NEXT: vpsllw $7, %ymm2, %ymm0
@ -575,10 +575,10 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; KNL-NEXT: kmovw (%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k2} {z}
; KNL-NEXT: vpmovdb %zmm2, %xmm2
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; KNL-NEXT: xorl %eax, %eax
@ -591,10 +591,10 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm2, %xmm2
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k2} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; KNL-NEXT: movq %rbp, %rsp
@ -1319,10 +1319,10 @@ define void @ktest_2(<32 x float> %in, float * %base) {
;
; SKX-LABEL: ktest_2:
; SKX: ## BB#0:
; SKX-NEXT: vmovups 64(%rdi), %zmm2
; SKX-NEXT: vmovups (%rdi), %zmm3
; SKX-NEXT: vcmpltps %zmm0, %zmm3, %k1
; SKX-NEXT: vcmpltps %zmm1, %zmm2, %k2
; SKX-NEXT: vmovups (%rdi), %zmm2
; SKX-NEXT: vmovups 64(%rdi), %zmm3
; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1
; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2
; SKX-NEXT: kunpckwd %k1, %k2, %k0
; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
@ -1449,11 +1449,11 @@ define <32 x i16> @load_32i1(<32 x i1>* %a) {
; KNL-LABEL: load_32i1:
; KNL: ## BB#0:
; KNL-NEXT: kmovw (%rdi), %k1
; KNL-NEXT: kmovw 2(%rdi), %k2
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: kmovw 2(%rdi), %k1
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k2} {z}
; KNL-NEXT: vpmovdw %zmm1, %ymm1
; KNL-NEXT: retq
;
@ -1471,18 +1471,18 @@ define <64 x i8> @load_64i1(<64 x i1>* %a) {
; KNL-LABEL: load_64i1:
; KNL: ## BB#0:
; KNL-NEXT: kmovw (%rdi), %k1
; KNL-NEXT: kmovw 2(%rdi), %k2
; KNL-NEXT: kmovw 4(%rdi), %k3
; KNL-NEXT: kmovw 6(%rdi), %k4
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: kmovw 2(%rdi), %k1
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k2} {z}
; KNL-NEXT: vpmovdb %zmm2, %xmm2
; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; KNL-NEXT: kmovw 4(%rdi), %k1
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k3} {z}
; KNL-NEXT: vpmovdb %zmm2, %xmm2
; KNL-NEXT: kmovw 6(%rdi), %k1
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k4} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; KNL-NEXT: retq

View File

@ -3738,11 +3738,11 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
; AVX512-LABEL: load_sext_32i1_to_32i8:
; AVX512: # BB#0: # %entry
; AVX512-NEXT: kmovw (%rdi), %k1
; AVX512-NEXT: kmovw 2(%rdi), %k2
; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
; AVX512-NEXT: vpmovdb %zmm1, %xmm1
; AVX512-NEXT: kmovw 2(%rdi), %k1
; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z}
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512-NEXT: retq