forked from OSchip/llvm-project
AMDGPU: Use conditions directly in division expansion
This was creating a select on true/false values, and then comparing that later. This produced more work for later combines, which can be avoided by just using the boolean values. This was copied from the original DAG expansion, which also has the same problem. This doesn't have a observable change using SelectionDAG, but since GlobalISel is missing these optimizations, the final code was noticeably longer.
This commit is contained in:
parent
a5153dbc36
commit
6d4ebada79
|
@ -987,7 +987,6 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
|
|||
|
||||
ConstantInt *Zero = Builder.getInt32(0);
|
||||
ConstantInt *One = Builder.getInt32(1);
|
||||
ConstantInt *MinusOne = Builder.getInt32(~0);
|
||||
|
||||
Value *Sign = nullptr;
|
||||
if (IsSigned) {
|
||||
|
@ -1048,18 +1047,14 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
|
|||
// Remainder = Num - Num_S_Remainder
|
||||
Value *Remainder = Builder.CreateSub(Num, Num_S_Remainder);
|
||||
|
||||
// Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
|
||||
Value *Rem_GE_Den_CC = Builder.CreateICmpUGE(Remainder, Den);
|
||||
Value *Remainder_GE_Den = Builder.CreateSelect(Rem_GE_Den_CC, MinusOne, Zero);
|
||||
// Remainder_GE_Den = Remainder >= Den;
|
||||
Value *Remainder_GE_Den = Builder.CreateICmpUGE(Remainder, Den);
|
||||
|
||||
// Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
|
||||
Value *Num_GE_Num_S_Rem_CC = Builder.CreateICmpUGE(Num, Num_S_Remainder);
|
||||
Value *Remainder_GE_Zero = Builder.CreateSelect(Num_GE_Num_S_Rem_CC,
|
||||
MinusOne, Zero);
|
||||
// Remainder_GE_Zero = Num >= Num_S_Remainder
|
||||
Value *Remainder_GE_Zero = Builder.CreateICmpUGE(Num, Num_S_Remainder);
|
||||
|
||||
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
|
||||
Value *Tmp1 = Builder.CreateAnd(Remainder_GE_Den, Remainder_GE_Zero);
|
||||
Value *Tmp1_0_CC = Builder.CreateICmpEQ(Tmp1, Zero);
|
||||
|
||||
Value *Res;
|
||||
if (IsDiv) {
|
||||
|
@ -1069,11 +1064,11 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
|
|||
// Quotient_S_One = Quotient - 1
|
||||
Value *Quotient_S_One = Builder.CreateSub(Quotient, One);
|
||||
|
||||
// Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
|
||||
Value *Div = Builder.CreateSelect(Tmp1_0_CC, Quotient, Quotient_A_One);
|
||||
// Div = (Tmp1 ? Quotient_A_One : Quotient)
|
||||
Value *Div = Builder.CreateSelect(Tmp1, Quotient_A_One, Quotient);
|
||||
|
||||
// Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
|
||||
Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Div, Quotient_S_One);
|
||||
// Div = (Remainder_GE_Zero ? Div : Quotient_S_One)
|
||||
Res = Builder.CreateSelect(Remainder_GE_Zero, Div, Quotient_S_One);
|
||||
} else {
|
||||
// Remainder_S_Den = Remainder - Den
|
||||
Value *Remainder_S_Den = Builder.CreateSub(Remainder, Den);
|
||||
|
@ -1081,11 +1076,11 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
|
|||
// Remainder_A_Den = Remainder + Den
|
||||
Value *Remainder_A_Den = Builder.CreateAdd(Remainder, Den);
|
||||
|
||||
// Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
|
||||
Value *Rem = Builder.CreateSelect(Tmp1_0_CC, Remainder, Remainder_S_Den);
|
||||
// Rem = (Tmp1 ? Remainder_S_Den : Remainder)
|
||||
Value *Rem = Builder.CreateSelect(Tmp1, Remainder_S_Den, Remainder);
|
||||
|
||||
// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
|
||||
Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Rem, Remainder_A_Den);
|
||||
// Rem = (Remainder_GE_Zero ? Rem : Remainder_A_Den)
|
||||
Res = Builder.CreateSelect(Remainder_GE_Zero, Rem, Remainder_A_Den);
|
||||
}
|
||||
|
||||
if (IsSigned) {
|
||||
|
|
|
@ -121,18 +121,15 @@ define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) {
|
|||
; IR-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], [[TMP4]]
|
||||
; IR-NEXT: [[TMP33:%.*]] = sub i32 1000000, [[TMP32]]
|
||||
; IR-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP33]], [[TMP4]]
|
||||
; IR-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 -1, i32 0
|
||||
; IR-NEXT: [[TMP36:%.*]] = icmp uge i32 1000000, [[TMP32]]
|
||||
; IR-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 -1, i32 0
|
||||
; IR-NEXT: [[TMP38:%.*]] = and i32 [[TMP35]], [[TMP37]]
|
||||
; IR-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP38]], 0
|
||||
; IR-NEXT: [[TMP40:%.*]] = add i32 [[TMP31]], 1
|
||||
; IR-NEXT: [[TMP41:%.*]] = sub i32 [[TMP31]], 1
|
||||
; IR-NEXT: [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP31]], i32 [[TMP40]]
|
||||
; IR-NEXT: [[TMP43:%.*]] = select i1 [[TMP36]], i32 [[TMP42]], i32 [[TMP41]]
|
||||
; IR-NEXT: [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP2]]
|
||||
; IR-NEXT: [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP2]]
|
||||
; IR-NEXT: ret i32 [[TMP45]]
|
||||
; IR-NEXT: [[TMP35:%.*]] = icmp uge i32 1000000, [[TMP32]]
|
||||
; IR-NEXT: [[TMP36:%.*]] = and i1 [[TMP34]], [[TMP35]]
|
||||
; IR-NEXT: [[TMP37:%.*]] = add i32 [[TMP31]], 1
|
||||
; IR-NEXT: [[TMP38:%.*]] = sub i32 [[TMP31]], 1
|
||||
; IR-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP31]]
|
||||
; IR-NEXT: [[TMP40:%.*]] = select i1 [[TMP35]], i32 [[TMP39]], i32 [[TMP38]]
|
||||
; IR-NEXT: [[TMP41:%.*]] = xor i32 [[TMP40]], [[TMP2]]
|
||||
; IR-NEXT: [[TMP42:%.*]] = sub i32 [[TMP41]], [[TMP2]]
|
||||
; IR-NEXT: ret i32 [[TMP42]]
|
||||
;
|
||||
; GCN-LABEL: select_sdiv_lhs_opaque_const0_i32:
|
||||
; GCN: ; %bb.0:
|
||||
|
@ -219,18 +216,15 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) {
|
|||
; IR-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], [[TMP4]]
|
||||
; IR-NEXT: [[TMP33:%.*]] = sub i32 1000000, [[TMP32]]
|
||||
; IR-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP33]], [[TMP4]]
|
||||
; IR-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 -1, i32 0
|
||||
; IR-NEXT: [[TMP36:%.*]] = icmp uge i32 1000000, [[TMP32]]
|
||||
; IR-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 -1, i32 0
|
||||
; IR-NEXT: [[TMP38:%.*]] = and i32 [[TMP35]], [[TMP37]]
|
||||
; IR-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP38]], 0
|
||||
; IR-NEXT: [[TMP40:%.*]] = add i32 [[TMP31]], 1
|
||||
; IR-NEXT: [[TMP41:%.*]] = sub i32 [[TMP31]], 1
|
||||
; IR-NEXT: [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP31]], i32 [[TMP40]]
|
||||
; IR-NEXT: [[TMP43:%.*]] = select i1 [[TMP36]], i32 [[TMP42]], i32 [[TMP41]]
|
||||
; IR-NEXT: [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP2]]
|
||||
; IR-NEXT: [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP2]]
|
||||
; IR-NEXT: ret i32 [[TMP45]]
|
||||
; IR-NEXT: [[TMP35:%.*]] = icmp uge i32 1000000, [[TMP32]]
|
||||
; IR-NEXT: [[TMP36:%.*]] = and i1 [[TMP34]], [[TMP35]]
|
||||
; IR-NEXT: [[TMP37:%.*]] = add i32 [[TMP31]], 1
|
||||
; IR-NEXT: [[TMP38:%.*]] = sub i32 [[TMP31]], 1
|
||||
; IR-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP31]]
|
||||
; IR-NEXT: [[TMP40:%.*]] = select i1 [[TMP35]], i32 [[TMP39]], i32 [[TMP38]]
|
||||
; IR-NEXT: [[TMP41:%.*]] = xor i32 [[TMP40]], [[TMP2]]
|
||||
; IR-NEXT: [[TMP42:%.*]] = sub i32 [[TMP41]], [[TMP2]]
|
||||
; IR-NEXT: ret i32 [[TMP42]]
|
||||
;
|
||||
; GCN-LABEL: select_sdiv_lhs_opaque_const1_i32:
|
||||
; GCN: ; %bb.0:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue