From 3daa71ee005827b3aa3de2aa24417463ef6bccea Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 25 Jan 2020 20:20:37 +0000 Subject: [PATCH] [SelectionDAG] ComputeNumSignBits - add DemandedElts support for MIN/MAX ops --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 +-- .../test/CodeGen/X86/known-signbits-vector.ll | 60 ------------------- 2 files changed, 4 insertions(+), 64 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4d8977856d27..d44b92d1fa17 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3702,18 +3702,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } // Fallback - just get the minimum number of sign bits of the operands. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); return std::min(Tmp, Tmp2); } case ISD::UMIN: case ISD::UMAX: - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); return std::min(Tmp, Tmp2); case ISD::SADDO: case ISD::UADDO: diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll index d5bc1b22d68d..1d0a631280fc 100644 --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -505,13 +505,6 @@ define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) { ; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X86-NEXT: vpsrad $4, %xmm0, %xmm1 -; X86-NEXT: vpsrad $2, %xmm0, %xmm2 -; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; X86-NEXT: vpsrad $3, %xmm0, %xmm2 -; X86-NEXT: vpsrad $1, %xmm0, %xmm0 -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-NEXT: retl ; @@ -531,13 +524,6 @@ define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) { ; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X64-AVX1-NEXT: vpsrad $4, %xmm0, %xmm1 -; X64-AVX1-NEXT: vpsrad $2, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; X64-AVX1-NEXT: vpsrad $3, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX1-NEXT: retq ; @@ -548,7 +534,6 @@ define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) { ; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: retq %1 = ashr <4 x i32> %a0, @@ -578,13 +563,6 @@ define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) { ; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X86-NEXT: vpsrad $4, %xmm0, %xmm1 -; X86-NEXT: vpsrad $2, %xmm0, %xmm2 -; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; X86-NEXT: vpsrad $3, %xmm0, %xmm2 -; X86-NEXT: vpsrad $1, %xmm0, %xmm0 -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-NEXT: retl ; @@ -604,13 +582,6 @@ define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) { ; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X64-AVX1-NEXT: vpsrad $4, %xmm0, %xmm1 -; X64-AVX1-NEXT: vpsrad $2, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; X64-AVX1-NEXT: vpsrad $3, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX1-NEXT: retq ; @@ -621,7 +592,6 @@ define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) { ; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: retq %1 = ashr <4 x i32> %a0, @@ -651,13 +621,6 @@ define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) { ; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; X86-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X86-NEXT: vpsrad $4, %xmm0, %xmm1 -; X86-NEXT: vpsrad $2, %xmm0, %xmm2 -; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; X86-NEXT: vpsrad $3, %xmm0, %xmm2 -; X86-NEXT: vpsrad $1, %xmm0, %xmm0 -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-NEXT: retl ; @@ -677,13 +640,6 @@ define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) { ; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X64-AVX1-NEXT: vpsrad $4, %xmm0, %xmm1 -; X64-AVX1-NEXT: vpsrad $2, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; X64-AVX1-NEXT: vpsrad $3, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX1-NEXT: retq ; @@ -694,7 +650,6 @@ define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) { ; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: retq %1 = ashr <4 x i32> %a0, @@ -724,13 +679,6 @@ define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) { ; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; X86-NEXT: vpminud %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X86-NEXT: vpsrad $4, %xmm0, %xmm1 -; X86-NEXT: vpsrad $2, %xmm0, %xmm2 -; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; X86-NEXT: vpsrad $3, %xmm0, %xmm2 -; X86-NEXT: vpsrad $1, %xmm0, %xmm0 -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-NEXT: retl ; @@ -750,13 +698,6 @@ define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) { ; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X64-AVX1-NEXT: vpsrad $4, %xmm0, %xmm1 -; X64-AVX1-NEXT: vpsrad $2, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; X64-AVX1-NEXT: vpsrad $3, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX1-NEXT: retq ; @@ -767,7 +708,6 @@ define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) { ; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: retq %1 = ashr <4 x i32> %a0,