From f7a06c29bc80219496adb3b29b53206d912a7fa4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 18 Jul 2016 06:14:43 +0000 Subject: [PATCH] [X86] Add AVX512 load opcodes and a couple AVX load opcodes to X86InstrInfo::areLoadsFromSameBasePtr. llvm-svn: 275765 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 80 +++++++++++++++++++++++++ llvm/test/CodeGen/X86/avx512-mask-op.ll | 40 ++++++------- llvm/test/CodeGen/X86/vector-sext.ll | 4 +- 3 files changed, 102 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index f7124e42894e..fed0f6720f9e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6728,6 +6728,7 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPSrm: case X86::MOVUPSrm: case X86::MOVAPDrm: + case X86::MOVUPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: // AVX load instructions @@ -6738,13 +6739,52 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::VMOVAPSrm: case X86::VMOVUPSrm: case X86::VMOVAPDrm: + case X86::VMOVUPDrm: case X86::VMOVDQArm: case X86::VMOVDQUrm: case X86::VMOVAPSYrm: case X86::VMOVUPSYrm: case X86::VMOVAPDYrm: + case X86::VMOVUPDYrm: case X86::VMOVDQAYrm: case X86::VMOVDQUYrm: + // AVX512 load instructions + case X86::VMOVSSZrm: + case X86::VMOVSDZrm: + case X86::VMOVAPSZ128rm: + case X86::VMOVUPSZ128rm: + case X86::VMOVAPDZ128rm: + case X86::VMOVUPDZ128rm: + case X86::VMOVDQU8Z128rm: + case X86::VMOVDQU16Z128rm: + case X86::VMOVDQA32Z128rm: + case X86::VMOVDQU32Z128rm: + case X86::VMOVDQA64Z128rm: + case X86::VMOVDQU64Z128rm: + case X86::VMOVAPSZ256rm: + case X86::VMOVUPSZ256rm: + case X86::VMOVAPDZ256rm: + case X86::VMOVUPDZ256rm: + case X86::VMOVDQU8Z256rm: + case X86::VMOVDQU16Z256rm: + case X86::VMOVDQA32Z256rm: + case X86::VMOVDQU32Z256rm: + case X86::VMOVDQA64Z256rm: + case X86::VMOVDQU64Z256rm: + case X86::VMOVAPSZrm: + case X86::VMOVUPSZrm: + case X86::VMOVAPDZrm: + case X86::VMOVUPDZrm: + case X86::VMOVDQU8Zrm: + case X86::VMOVDQU16Zrm: + case X86::VMOVDQA32Zrm: + case X86::VMOVDQU32Zrm: + case X86::VMOVDQA64Zrm: + case X86::VMOVDQU64Zrm: + case X86::KMOVBkm: + case X86::KMOVWkm: + case X86::KMOVDkm: + case X86::KMOVQkm: break; } switch (Opc2) { @@ -6765,6 +6805,7 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPSrm: case X86::MOVUPSrm: case X86::MOVAPDrm: + case X86::MOVUPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: // AVX load instructions @@ -6775,13 +6816,52 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::VMOVAPSrm: case X86::VMOVUPSrm: case X86::VMOVAPDrm: + case X86::VMOVUPDrm: case X86::VMOVDQArm: case X86::VMOVDQUrm: case X86::VMOVAPSYrm: case X86::VMOVUPSYrm: case X86::VMOVAPDYrm: + case X86::VMOVUPDYrm: case X86::VMOVDQAYrm: case X86::VMOVDQUYrm: + // AVX512 load instructions + case X86::VMOVSSZrm: + case X86::VMOVSDZrm: + case X86::VMOVAPSZ128rm: + case X86::VMOVUPSZ128rm: + case X86::VMOVAPDZ128rm: + case X86::VMOVUPDZ128rm: + case X86::VMOVDQU8Z128rm: + case X86::VMOVDQU16Z128rm: + case X86::VMOVDQA32Z128rm: + case X86::VMOVDQU32Z128rm: + case X86::VMOVDQA64Z128rm: + case X86::VMOVDQU64Z128rm: + case X86::VMOVAPSZ256rm: + case X86::VMOVUPSZ256rm: + case X86::VMOVAPDZ256rm: + case X86::VMOVUPDZ256rm: + case X86::VMOVDQU8Z256rm: + case X86::VMOVDQU16Z256rm: + case X86::VMOVDQA32Z256rm: + case X86::VMOVDQU32Z256rm: + case X86::VMOVDQA64Z256rm: + case X86::VMOVDQU64Z256rm: + case X86::VMOVAPSZrm: + case X86::VMOVUPSZrm: + case X86::VMOVAPDZrm: + case X86::VMOVUPDZrm: + case X86::VMOVDQU8Zrm: + case X86::VMOVDQU16Zrm: + case X86::VMOVDQA32Zrm: + case X86::VMOVDQU32Zrm: + case X86::VMOVDQA64Zrm: + case X86::VMOVDQU64Zrm: + case X86::KMOVBkm: + case X86::KMOVWkm: + case X86::KMOVDkm: + case X86::KMOVQkm: break; } diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index af41de109e11..cb63f9108e29 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -517,20 +517,20 @@ define <64 x i8> @test16(i64 %x) { ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 ; KNL-NEXT: kmovw (%rsp), %k1 +; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} +; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm2 ; KNL-NEXT: movl $1, %eax ; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 ; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7] ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 ; KNL-NEXT: vpsllw $7, %ymm2, %ymm0 @@ -575,10 +575,10 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; KNL-NEXT: kmovw (%rsp), %k1 +; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} +; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; KNL-NEXT: xorl %eax, %eax @@ -591,10 +591,10 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; KNL-NEXT: movq %rbp, %rsp @@ -1319,10 +1319,10 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; ; SKX-LABEL: ktest_2: ; SKX: ## BB#0: -; SKX-NEXT: vmovups 64(%rdi), %zmm2 -; SKX-NEXT: vmovups (%rdi), %zmm3 -; SKX-NEXT: vcmpltps %zmm0, %zmm3, %k1 -; SKX-NEXT: vcmpltps %zmm1, %zmm2, %k2 +; SKX-NEXT: vmovups (%rdi), %zmm2 +; SKX-NEXT: vmovups 64(%rdi), %zmm3 +; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 +; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 ; SKX-NEXT: kunpckwd %k1, %k2, %k0 ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} @@ -1449,11 +1449,11 @@ define <32 x i16> @load_32i1(<32 x i1>* %a) { ; KNL-LABEL: load_32i1: ; KNL: ## BB#0: ; KNL-NEXT: kmovw (%rdi), %k1 +; KNL-NEXT: kmovw 2(%rdi), %k2 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdw %zmm0, %ymm0 -; KNL-NEXT: kmovw 2(%rdi), %k1 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdw %zmm1, %ymm1 ; KNL-NEXT: retq ; @@ -1471,18 +1471,18 @@ define <64 x i8> @load_64i1(<64 x i1>* %a) { ; KNL-LABEL: load_64i1: ; KNL: ## BB#0: ; KNL-NEXT: kmovw (%rdi), %k1 +; KNL-NEXT: kmovw 2(%rdi), %k2 +; KNL-NEXT: kmovw 4(%rdi), %k3 +; KNL-NEXT: kmovw 6(%rdi), %k4 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: kmovw 2(%rdi), %k1 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} +; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; KNL-NEXT: kmovw 4(%rdi), %k1 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} +; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k3} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: kmovw 6(%rdi), %k1 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k4} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; KNL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index 672eeac0c7f2..018c5922a432 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -3738,11 +3738,11 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone { ; AVX512-LABEL: load_sext_32i1_to_32i8: ; AVX512: # BB#0: # %entry ; AVX512-NEXT: kmovw (%rdi), %k1 +; AVX512-NEXT: kmovw 2(%rdi), %k2 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 ; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} ; AVX512-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512-NEXT: kmovw 2(%rdi), %k1 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z} ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX512-NEXT: retq