AMDGPU: Use conditions directly in division expansion

This was creating a select on true/false values, and then comparing
that later. This produced more work for later combines, which can be
avoided by just using the boolean values. This was copied from the
original DAG expansion, which also has the same problem. This doesn't
have a observable change using SelectionDAG, but since GlobalISel is
missing these optimizations, the final code was noticeably longer.
This commit is contained in:
Matt Arsenault 2020-02-11 15:13:42 -05:00 committed by Matt Arsenault
parent a5153dbc36
commit 6d4ebada79
3 changed files with 802 additions and 897 deletions

View File

@ -987,7 +987,6 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
ConstantInt *Zero = Builder.getInt32(0);
ConstantInt *One = Builder.getInt32(1);
ConstantInt *MinusOne = Builder.getInt32(~0);
Value *Sign = nullptr;
if (IsSigned) {
@ -1048,18 +1047,14 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
// Remainder = Num - Num_S_Remainder
Value *Remainder = Builder.CreateSub(Num, Num_S_Remainder);
// Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
Value *Rem_GE_Den_CC = Builder.CreateICmpUGE(Remainder, Den);
Value *Remainder_GE_Den = Builder.CreateSelect(Rem_GE_Den_CC, MinusOne, Zero);
// Remainder_GE_Den = Remainder >= Den;
Value *Remainder_GE_Den = Builder.CreateICmpUGE(Remainder, Den);
// Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
Value *Num_GE_Num_S_Rem_CC = Builder.CreateICmpUGE(Num, Num_S_Remainder);
Value *Remainder_GE_Zero = Builder.CreateSelect(Num_GE_Num_S_Rem_CC,
MinusOne, Zero);
// Remainder_GE_Zero = Num >= Num_S_Remainder
Value *Remainder_GE_Zero = Builder.CreateICmpUGE(Num, Num_S_Remainder);
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
Value *Tmp1 = Builder.CreateAnd(Remainder_GE_Den, Remainder_GE_Zero);
Value *Tmp1_0_CC = Builder.CreateICmpEQ(Tmp1, Zero);
Value *Res;
if (IsDiv) {
@ -1069,11 +1064,11 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
// Quotient_S_One = Quotient - 1
Value *Quotient_S_One = Builder.CreateSub(Quotient, One);
// Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
Value *Div = Builder.CreateSelect(Tmp1_0_CC, Quotient, Quotient_A_One);
// Div = (Tmp1 ? Quotient_A_One : Quotient)
Value *Div = Builder.CreateSelect(Tmp1, Quotient_A_One, Quotient);
// Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Div, Quotient_S_One);
// Div = (Remainder_GE_Zero ? Div : Quotient_S_One)
Res = Builder.CreateSelect(Remainder_GE_Zero, Div, Quotient_S_One);
} else {
// Remainder_S_Den = Remainder - Den
Value *Remainder_S_Den = Builder.CreateSub(Remainder, Den);
@ -1081,11 +1076,11 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
// Remainder_A_Den = Remainder + Den
Value *Remainder_A_Den = Builder.CreateAdd(Remainder, Den);
// Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
Value *Rem = Builder.CreateSelect(Tmp1_0_CC, Remainder, Remainder_S_Den);
// Rem = (Tmp1 ? Remainder_S_Den : Remainder)
Value *Rem = Builder.CreateSelect(Tmp1, Remainder_S_Den, Remainder);
// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Rem, Remainder_A_Den);
// Rem = (Remainder_GE_Zero ? Rem : Remainder_A_Den)
Res = Builder.CreateSelect(Remainder_GE_Zero, Rem, Remainder_A_Den);
}
if (IsSigned) {

View File

@ -121,18 +121,15 @@ define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) {
; IR-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], [[TMP4]]
; IR-NEXT: [[TMP33:%.*]] = sub i32 1000000, [[TMP32]]
; IR-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP33]], [[TMP4]]
; IR-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 -1, i32 0
; IR-NEXT: [[TMP36:%.*]] = icmp uge i32 1000000, [[TMP32]]
; IR-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 -1, i32 0
; IR-NEXT: [[TMP38:%.*]] = and i32 [[TMP35]], [[TMP37]]
; IR-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP38]], 0
; IR-NEXT: [[TMP40:%.*]] = add i32 [[TMP31]], 1
; IR-NEXT: [[TMP41:%.*]] = sub i32 [[TMP31]], 1
; IR-NEXT: [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP31]], i32 [[TMP40]]
; IR-NEXT: [[TMP43:%.*]] = select i1 [[TMP36]], i32 [[TMP42]], i32 [[TMP41]]
; IR-NEXT: [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP2]]
; IR-NEXT: [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP2]]
; IR-NEXT: ret i32 [[TMP45]]
; IR-NEXT: [[TMP35:%.*]] = icmp uge i32 1000000, [[TMP32]]
; IR-NEXT: [[TMP36:%.*]] = and i1 [[TMP34]], [[TMP35]]
; IR-NEXT: [[TMP37:%.*]] = add i32 [[TMP31]], 1
; IR-NEXT: [[TMP38:%.*]] = sub i32 [[TMP31]], 1
; IR-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP31]]
; IR-NEXT: [[TMP40:%.*]] = select i1 [[TMP35]], i32 [[TMP39]], i32 [[TMP38]]
; IR-NEXT: [[TMP41:%.*]] = xor i32 [[TMP40]], [[TMP2]]
; IR-NEXT: [[TMP42:%.*]] = sub i32 [[TMP41]], [[TMP2]]
; IR-NEXT: ret i32 [[TMP42]]
;
; GCN-LABEL: select_sdiv_lhs_opaque_const0_i32:
; GCN: ; %bb.0:
@ -219,18 +216,15 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) {
; IR-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], [[TMP4]]
; IR-NEXT: [[TMP33:%.*]] = sub i32 1000000, [[TMP32]]
; IR-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP33]], [[TMP4]]
; IR-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 -1, i32 0
; IR-NEXT: [[TMP36:%.*]] = icmp uge i32 1000000, [[TMP32]]
; IR-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 -1, i32 0
; IR-NEXT: [[TMP38:%.*]] = and i32 [[TMP35]], [[TMP37]]
; IR-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP38]], 0
; IR-NEXT: [[TMP40:%.*]] = add i32 [[TMP31]], 1
; IR-NEXT: [[TMP41:%.*]] = sub i32 [[TMP31]], 1
; IR-NEXT: [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP31]], i32 [[TMP40]]
; IR-NEXT: [[TMP43:%.*]] = select i1 [[TMP36]], i32 [[TMP42]], i32 [[TMP41]]
; IR-NEXT: [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP2]]
; IR-NEXT: [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP2]]
; IR-NEXT: ret i32 [[TMP45]]
; IR-NEXT: [[TMP35:%.*]] = icmp uge i32 1000000, [[TMP32]]
; IR-NEXT: [[TMP36:%.*]] = and i1 [[TMP34]], [[TMP35]]
; IR-NEXT: [[TMP37:%.*]] = add i32 [[TMP31]], 1
; IR-NEXT: [[TMP38:%.*]] = sub i32 [[TMP31]], 1
; IR-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP31]]
; IR-NEXT: [[TMP40:%.*]] = select i1 [[TMP35]], i32 [[TMP39]], i32 [[TMP38]]
; IR-NEXT: [[TMP41:%.*]] = xor i32 [[TMP40]], [[TMP2]]
; IR-NEXT: [[TMP42:%.*]] = sub i32 [[TMP41]], [[TMP2]]
; IR-NEXT: ret i32 [[TMP42]]
;
; GCN-LABEL: select_sdiv_lhs_opaque_const1_i32:
; GCN: ; %bb.0:

File diff suppressed because it is too large Load Diff