diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 737bb1f9c9fb..1aa8df29af3b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4059,10 +4059,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, ISD::NON_EXTLOAD, IsExpanding); - if (AddToChain) { - SDValue OutChain = Load.getValue(1); - DAG.setRoot(OutChain); - } + if (AddToChain) + PendingLoads.push_back(Load.getValue(1)); setValue(&I, Load); } diff --git a/llvm/test/CodeGen/X86/avx512-bugfix-26264.ll b/llvm/test/CodeGen/X86/avx512-bugfix-26264.ll index 4d54fb715230..e9d0161dd94b 100644 --- a/llvm/test/CodeGen/X86/avx512-bugfix-26264.ll +++ b/llvm/test/CodeGen/X86/avx512-bugfix-26264.ll @@ -7,13 +7,12 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k1 ; AVX512BW-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} -; AVX512BW-NEXT: kshiftrd $16, %k1, %k2 -; AVX512BW-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2} +; AVX512BW-NEXT: kshiftrw $8, %k1, %k2 +; AVX512BW-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k2} +; AVX512BW-NEXT: kshiftrd $16, %k1, %k1 +; AVX512BW-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm2 {%k1} ; AVX512BW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512BW-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1} -; AVX512BW-NEXT: kshiftrw $8, %k2, %k1 ; AVX512BW-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1} -; AVX512BW-NEXT: vmovapd %zmm5, %zmm2 ; AVX512BW-NEXT: retq %res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0) ret <32 x double> %res @@ -25,13 +24,12 @@ define <32 x i64> @test_load_32i64(<32 x i64>* %ptrs, <32 x i1> %mask, <32 x i64 ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k1 ; AVX512BW-NEXT: vpblendmq (%rdi), %zmm1, %zmm0 {%k1} -; AVX512BW-NEXT: kshiftrd $16, %k1, %k2 -; AVX512BW-NEXT: vpblendmq 128(%rdi), %zmm3, %zmm5 {%k2} +; AVX512BW-NEXT: kshiftrw $8, %k1, %k2 +; AVX512BW-NEXT: vpblendmq 64(%rdi), %zmm2, %zmm1 {%k2} +; AVX512BW-NEXT: kshiftrd $16, %k1, %k1 +; AVX512BW-NEXT: vpblendmq 128(%rdi), %zmm3, %zmm2 {%k1} ; AVX512BW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512BW-NEXT: vpblendmq 64(%rdi), %zmm2, %zmm1 {%k1} -; AVX512BW-NEXT: kshiftrw $8, %k2, %k1 ; AVX512BW-NEXT: vpblendmq 192(%rdi), %zmm4, %zmm3 {%k1} -; AVX512BW-NEXT: vmovdqa64 %zmm5, %zmm2 ; AVX512BW-NEXT: retq %res = call <32 x i64> @llvm.masked.load.v32i64.p0v32i64(<32 x i64>* %ptrs, i32 4, <32 x i1> %mask, <32 x i64> %src0) ret <32 x i64> %res diff --git a/llvm/test/CodeGen/X86/avx512-masked-memop-64-32.ll b/llvm/test/CodeGen/X86/avx512-masked-memop-64-32.ll index 275884c6de06..f199cb097aa4 100644 --- a/llvm/test/CodeGen/X86/avx512-masked-memop-64-32.ll +++ b/llvm/test/CodeGen/X86/avx512-masked-memop-64-32.ll @@ -94,10 +94,10 @@ declare <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>*, i32, <16 define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) { ; AVX512-LABEL: test23: ; AVX512: ## %bb.0: -; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; AVX512-NEXT: vptestnmq %zmm1, %zmm1, %k2 -; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k2} {z} -; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} +; AVX512-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k2 +; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 {%k2} {z} +; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k1} {z} ; AVX512-NEXT: retq %mask = icmp eq <16 x i32*> %trigger, zeroinitializer %res = call <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer) @@ -234,19 +234,19 @@ declare <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i3 define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0) { ; AVX512F-LABEL: test_load_32f64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm5 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm5 +; AVX512F-NEXT: vpmovsxbd %xmm5, %zmm5 ; AVX512F-NEXT: vpslld $31, %zmm5, %zmm5 ; AVX512F-NEXT: vptestmd %zmm5, %zmm5, %k1 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 -; AVX512F-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2} -; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k2} +; AVX512F-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k1} ; AVX512F-NEXT: kshiftrw $8, %k2, %k2 -; AVX512F-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k2} +; AVX512F-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k2} ; AVX512F-NEXT: kshiftrw $8, %k1, %k1 -; AVX512F-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1} +; AVX512F-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1} ; AVX512F-NEXT: vmovapd %zmm5, %zmm2 ; AVX512F-NEXT: retq ; @@ -255,13 +255,12 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 ; SKX-NEXT: vpmovb2m %ymm0, %k1 ; SKX-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} -; SKX-NEXT: kshiftrd $16, %k1, %k2 -; SKX-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2} +; SKX-NEXT: kshiftrw $8, %k1, %k2 +; SKX-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k2} +; SKX-NEXT: kshiftrd $16, %k1, %k1 +; SKX-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm2 {%k1} ; SKX-NEXT: kshiftrw $8, %k1, %k1 -; SKX-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1} -; SKX-NEXT: kshiftrw $8, %k2, %k1 ; SKX-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1} -; SKX-NEXT: vmovapd %zmm5, %zmm2 ; SKX-NEXT: retq %res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0) ret <32 x double> %res diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index aa6ae0964453..812d9f50fe3c 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -976,8 +976,8 @@ define <4 x i64> @mload_constmask_v4i64(<4 x i64>* %addr, <4 x i64> %dst) { define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %dst) { ; AVX-LABEL: mload_constmask_v8f64: ; AVX: ## %bb.0: -; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],mem[6,7] ; AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6,7] +; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],mem[6,7] ; AVX-NEXT: retq ; ; AVX512F-LABEL: mload_constmask_v8f64: