AMDGPU: Use conditions directly in division expansion

This was creating a select on true/false values, and then comparing that later. This produced more work for later combines, which can be avoided by just using the boolean values. This was copied from the original DAG expansion, which also has the same problem. This doesn't have a observable change using SelectionDAG, but since GlobalISel is missing these optimizations, the final code was noticeably longer.
2020-02-11 15:13:42 -05:00 · 2020-02-11 15:13:42 -05:00 · 6d4ebada79
parent a5153dbc36
commit 6d4ebada79
3 changed files with 802 additions and 897 deletions
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@ -987,7 +987,6 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,

  ConstantInt *Zero = Builder.getInt32(0);
  ConstantInt *One = Builder.getInt32(1);
-  ConstantInt *MinusOne = Builder.getInt32(~0);

  Value *Sign = nullptr;
  if (IsSigned) {
@ -1048,18 +1047,14 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
  // Remainder = Num - Num_S_Remainder
  Value *Remainder = Builder.CreateSub(Num, Num_S_Remainder);

-  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
-  Value *Rem_GE_Den_CC = Builder.CreateICmpUGE(Remainder, Den);
-  Value *Remainder_GE_Den = Builder.CreateSelect(Rem_GE_Den_CC, MinusOne, Zero);
+  // Remainder_GE_Den = Remainder >= Den;
+  Value *Remainder_GE_Den = Builder.CreateICmpUGE(Remainder, Den);

-  // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
-  Value *Num_GE_Num_S_Rem_CC = Builder.CreateICmpUGE(Num, Num_S_Remainder);
-  Value *Remainder_GE_Zero = Builder.CreateSelect(Num_GE_Num_S_Rem_CC,
-                                                  MinusOne, Zero);
+  // Remainder_GE_Zero = Num >= Num_S_Remainder
+  Value *Remainder_GE_Zero = Builder.CreateICmpUGE(Num, Num_S_Remainder);

  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
  Value *Tmp1 = Builder.CreateAnd(Remainder_GE_Den, Remainder_GE_Zero);
-  Value *Tmp1_0_CC = Builder.CreateICmpEQ(Tmp1, Zero);

  Value *Res;
  if (IsDiv) {
@ -1069,11 +1064,11 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
    // Quotient_S_One = Quotient - 1
    Value *Quotient_S_One = Builder.CreateSub(Quotient, One);

-    // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
-    Value *Div = Builder.CreateSelect(Tmp1_0_CC, Quotient, Quotient_A_One);
+    // Div = (Tmp1 ? Quotient_A_One : Quotient)
+    Value *Div = Builder.CreateSelect(Tmp1, Quotient_A_One, Quotient);

-    // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
-    Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Div, Quotient_S_One);
+    // Div = (Remainder_GE_Zero ? Div : Quotient_S_One)
+    Res = Builder.CreateSelect(Remainder_GE_Zero, Div, Quotient_S_One);
  } else {
    // Remainder_S_Den = Remainder - Den
    Value *Remainder_S_Den = Builder.CreateSub(Remainder, Den);
@ -1081,11 +1076,11 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
    // Remainder_A_Den = Remainder + Den
    Value *Remainder_A_Den = Builder.CreateAdd(Remainder, Den);

-    // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
-    Value *Rem = Builder.CreateSelect(Tmp1_0_CC, Remainder, Remainder_S_Den);
+    // Rem = (Tmp1 ?  Remainder_S_Den : Remainder)
+    Value *Rem = Builder.CreateSelect(Tmp1, Remainder_S_Den, Remainder);

-    // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
-    Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Rem, Remainder_A_Den);
+    // Rem = (Remainder_GE_Zero ? Rem : Remainder_A_Den)
+    Res = Builder.CreateSelect(Remainder_GE_Zero, Rem, Remainder_A_Den);
  }

  if (IsSigned) {
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@ -121,18 +121,15 @@ define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) {
 ; IR-NEXT:    [[TMP32:%.*]] = mul i32 [[TMP31]], [[TMP4]]
 ; IR-NEXT:    [[TMP33:%.*]] = sub i32 1000000, [[TMP32]]
 ; IR-NEXT:    [[TMP34:%.*]] = icmp uge i32 [[TMP33]], [[TMP4]]
-; IR-NEXT:    [[TMP35:%.*]] = select i1 [[TMP34]], i32 -1, i32 0
-; IR-NEXT:    [[TMP36:%.*]] = icmp uge i32 1000000, [[TMP32]]
-; IR-NEXT:    [[TMP37:%.*]] = select i1 [[TMP36]], i32 -1, i32 0
-; IR-NEXT:    [[TMP38:%.*]] = and i32 [[TMP35]], [[TMP37]]
-; IR-NEXT:    [[TMP39:%.*]] = icmp eq i32 [[TMP38]], 0
-; IR-NEXT:    [[TMP40:%.*]] = add i32 [[TMP31]], 1
-; IR-NEXT:    [[TMP41:%.*]] = sub i32 [[TMP31]], 1
-; IR-NEXT:    [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP31]], i32 [[TMP40]]
-; IR-NEXT:    [[TMP43:%.*]] = select i1 [[TMP36]], i32 [[TMP42]], i32 [[TMP41]]
-; IR-NEXT:    [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP2]]
-; IR-NEXT:    [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP2]]
-; IR-NEXT:    ret i32 [[TMP45]]
+; IR-NEXT:    [[TMP35:%.*]] = icmp uge i32 1000000, [[TMP32]]
+; IR-NEXT:    [[TMP36:%.*]] = and i1 [[TMP34]], [[TMP35]]
+; IR-NEXT:    [[TMP37:%.*]] = add i32 [[TMP31]], 1
+; IR-NEXT:    [[TMP38:%.*]] = sub i32 [[TMP31]], 1
+; IR-NEXT:    [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP31]]
+; IR-NEXT:    [[TMP40:%.*]] = select i1 [[TMP35]], i32 [[TMP39]], i32 [[TMP38]]
+; IR-NEXT:    [[TMP41:%.*]] = xor i32 [[TMP40]], [[TMP2]]
+; IR-NEXT:    [[TMP42:%.*]] = sub i32 [[TMP41]], [[TMP2]]
+; IR-NEXT:    ret i32 [[TMP42]]
 ;
 ; GCN-LABEL: select_sdiv_lhs_opaque_const0_i32:
 ; GCN:       ; %bb.0:
@ -219,18 +216,15 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) {
 ; IR-NEXT:    [[TMP32:%.*]] = mul i32 [[TMP31]], [[TMP4]]
 ; IR-NEXT:    [[TMP33:%.*]] = sub i32 1000000, [[TMP32]]
 ; IR-NEXT:    [[TMP34:%.*]] = icmp uge i32 [[TMP33]], [[TMP4]]
-; IR-NEXT:    [[TMP35:%.*]] = select i1 [[TMP34]], i32 -1, i32 0
-; IR-NEXT:    [[TMP36:%.*]] = icmp uge i32 1000000, [[TMP32]]
-; IR-NEXT:    [[TMP37:%.*]] = select i1 [[TMP36]], i32 -1, i32 0
-; IR-NEXT:    [[TMP38:%.*]] = and i32 [[TMP35]], [[TMP37]]
-; IR-NEXT:    [[TMP39:%.*]] = icmp eq i32 [[TMP38]], 0
-; IR-NEXT:    [[TMP40:%.*]] = add i32 [[TMP31]], 1
-; IR-NEXT:    [[TMP41:%.*]] = sub i32 [[TMP31]], 1
-; IR-NEXT:    [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP31]], i32 [[TMP40]]
-; IR-NEXT:    [[TMP43:%.*]] = select i1 [[TMP36]], i32 [[TMP42]], i32 [[TMP41]]
-; IR-NEXT:    [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP2]]
-; IR-NEXT:    [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP2]]
-; IR-NEXT:    ret i32 [[TMP45]]
+; IR-NEXT:    [[TMP35:%.*]] = icmp uge i32 1000000, [[TMP32]]
+; IR-NEXT:    [[TMP36:%.*]] = and i1 [[TMP34]], [[TMP35]]
+; IR-NEXT:    [[TMP37:%.*]] = add i32 [[TMP31]], 1
+; IR-NEXT:    [[TMP38:%.*]] = sub i32 [[TMP31]], 1
+; IR-NEXT:    [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP31]]
+; IR-NEXT:    [[TMP40:%.*]] = select i1 [[TMP35]], i32 [[TMP39]], i32 [[TMP38]]
+; IR-NEXT:    [[TMP41:%.*]] = xor i32 [[TMP40]], [[TMP2]]
+; IR-NEXT:    [[TMP42:%.*]] = sub i32 [[TMP41]], [[TMP2]]
+; IR-NEXT:    ret i32 [[TMP42]]
 ;
 ; GCN-LABEL: select_sdiv_lhs_opaque_const1_i32:
 ; GCN:       ; %bb.0:
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll