forked from OSchip/llvm-project
[CGP] Also freeze ctlz/cttz operand when despeculating
D125887 changed the ctlz/cttz despeculation transform to insert a freeze for the introduced branch on zero. While this does fix the "branch on poison" issue, we may still get in trouble if we pick a different value for the branch and for the ctz argument (i.e. non-zero for the branch, but zero for the ctz). To avoid this, we should use the same frozen value in both positions. This does cause a regression in RISCV codegen by introducing an additional sext. The DAG looks like this: t0: ch = EntryToken t2: i64,ch = CopyFromReg t0, Register:i64 %3 t4: i64 = AssertSext t2, ValueType:ch:i32 t23: i64 = freeze t4 t9: ch = CopyToReg t0, Register:i64 %0, t23 t16: ch = CopyToReg t0, Register:i64 %4, Constant:i64<32> t18: ch = TokenFactor t9, t16 t25: i64 = sign_extend_inreg t23, ValueType:ch:i32 t24: i64 = setcc t25, Constant:i64<0>, seteq:ch t28: i64 = and t24, Constant:i64<1> t19: ch = brcond t18, t28, BasicBlock:ch<cond.end 0x8311f68> t21: ch = br t19, BasicBlock:ch<cond.false 0x8311e80> I don't see a really obvious way to improve this, as we can't push the freeze past the AssertSext (which may produce poison). Differential Revision: https://reviews.llvm.org/D126638
This commit is contained in:
parent
6c372daa84
commit
c10921fa1a
|
@ -2056,7 +2056,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
|
|||
return false;
|
||||
|
||||
// Bail if the value is never zero.
|
||||
Value *Op = CountZeros->getOperand(0);
|
||||
Use &Op = CountZeros->getOperandUse(0);
|
||||
if (isKnownNonZero(Op, *DL))
|
||||
return false;
|
||||
|
||||
|
@ -2078,7 +2078,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
|
|||
// Replace the unconditional branch that was created by the first split with
|
||||
// a compare against zero and a conditional branch.
|
||||
Value *Zero = Constant::getNullValue(Ty);
|
||||
// Avoid introducing branch on poison.
|
||||
// Avoid introducing branch on poison. This also replaces the ctz operand.
|
||||
if (!isGuaranteedNotToBeUndefOrPoison(Op))
|
||||
Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
|
||||
Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
|
||||
|
|
|
@ -11,7 +11,8 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
|
|||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: addi sp, sp, -16
|
||||
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: beqz a0, .LBB0_2
|
||||
; RV64I-NEXT: sext.w a1, a0
|
||||
; RV64I-NEXT: beqz a1, .LBB0_2
|
||||
; RV64I-NEXT: # %bb.1: # %cond.false
|
||||
; RV64I-NEXT: srliw a1, a0, 1
|
||||
; RV64I-NEXT: or a0, a0, a1
|
||||
|
@ -65,7 +66,8 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
|
|||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: addi sp, sp, -16
|
||||
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: beqz a0, .LBB1_2
|
||||
; RV64I-NEXT: sext.w a1, a0
|
||||
; RV64I-NEXT: beqz a1, .LBB1_2
|
||||
; RV64I-NEXT: # %bb.1: # %cond.false
|
||||
; RV64I-NEXT: srliw a1, a0, 1
|
||||
; RV64I-NEXT: or a0, a0, a1
|
||||
|
@ -257,16 +259,15 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
|
|||
; RV64I-NEXT: srliw a0, a0, 1
|
||||
; RV64I-NEXT: beqz a0, .LBB4_2
|
||||
; RV64I-NEXT: # %bb.1: # %cond.false
|
||||
; RV64I-NEXT: srli a1, a0, 1
|
||||
; RV64I-NEXT: srliw a1, a0, 1
|
||||
; RV64I-NEXT: or a0, a0, a1
|
||||
; RV64I-NEXT: srli a1, a0, 2
|
||||
; RV64I-NEXT: srliw a1, a0, 2
|
||||
; RV64I-NEXT: or a0, a0, a1
|
||||
; RV64I-NEXT: srli a1, a0, 4
|
||||
; RV64I-NEXT: srliw a1, a0, 4
|
||||
; RV64I-NEXT: or a0, a0, a1
|
||||
; RV64I-NEXT: srli a1, a0, 8
|
||||
; RV64I-NEXT: srliw a1, a0, 8
|
||||
; RV64I-NEXT: or a0, a0, a1
|
||||
; RV64I-NEXT: slli a1, a0, 33
|
||||
; RV64I-NEXT: srli a1, a1, 49
|
||||
; RV64I-NEXT: srliw a1, a0, 16
|
||||
; RV64I-NEXT: or a0, a0, a1
|
||||
; RV64I-NEXT: not a0, a0
|
||||
; RV64I-NEXT: srli a1, a0, 1
|
||||
|
@ -372,7 +373,8 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
|
|||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: addi sp, sp, -16
|
||||
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: beqz a0, .LBB6_2
|
||||
; RV64I-NEXT: sext.w a1, a0
|
||||
; RV64I-NEXT: beqz a1, .LBB6_2
|
||||
; RV64I-NEXT: # %bb.1: # %cond.false
|
||||
; RV64I-NEXT: addiw a1, a0, -1
|
||||
; RV64I-NEXT: not a0, a0
|
||||
|
|
|
@ -17,7 +17,7 @@ define i64 @cttz(i64 %A) {
|
|||
; SLOW-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A_FR]], 0
|
||||
; SLOW-NEXT: br i1 [[CMPZ]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
|
||||
; SLOW: cond.false:
|
||||
; SLOW-NEXT: [[Z:%.*]] = call i64 @llvm.cttz.i64(i64 [[A]], i1 true)
|
||||
; SLOW-NEXT: [[Z:%.*]] = call i64 @llvm.cttz.i64(i64 [[A_FR]], i1 true)
|
||||
; SLOW-NEXT: br label [[COND_END]]
|
||||
; SLOW: cond.end:
|
||||
; SLOW-NEXT: [[CTZ:%.*]] = phi i64 [ 64, [[ENTRY:%.*]] ], [ [[Z]], [[COND_FALSE]] ]
|
||||
|
@ -34,7 +34,7 @@ define i64 @cttz(i64 %A) {
|
|||
; FAST_LZ-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A_FR]], 0
|
||||
; FAST_LZ-NEXT: br i1 [[CMPZ]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
|
||||
; FAST_LZ: cond.false:
|
||||
; FAST_LZ-NEXT: [[Z:%.*]] = call i64 @llvm.cttz.i64(i64 [[A]], i1 true)
|
||||
; FAST_LZ-NEXT: [[Z:%.*]] = call i64 @llvm.cttz.i64(i64 [[A_FR]], i1 true)
|
||||
; FAST_LZ-NEXT: br label [[COND_END]]
|
||||
; FAST_LZ: cond.end:
|
||||
; FAST_LZ-NEXT: [[CTZ:%.*]] = phi i64 [ 64, [[ENTRY:%.*]] ], [ [[Z]], [[COND_FALSE]] ]
|
||||
|
@ -52,7 +52,7 @@ define i64 @ctlz(i64 %A) {
|
|||
; SLOW-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A_FR]], 0
|
||||
; SLOW-NEXT: br i1 [[CMPZ]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
|
||||
; SLOW: cond.false:
|
||||
; SLOW-NEXT: [[Z:%.*]] = call i64 @llvm.ctlz.i64(i64 [[A]], i1 true)
|
||||
; SLOW-NEXT: [[Z:%.*]] = call i64 @llvm.ctlz.i64(i64 [[A_FR]], i1 true)
|
||||
; SLOW-NEXT: br label [[COND_END]]
|
||||
; SLOW: cond.end:
|
||||
; SLOW-NEXT: [[CTZ:%.*]] = phi i64 [ 64, [[ENTRY:%.*]] ], [ [[Z]], [[COND_FALSE]] ]
|
||||
|
@ -64,7 +64,7 @@ define i64 @ctlz(i64 %A) {
|
|||
; FAST_TZ-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A_FR]], 0
|
||||
; FAST_TZ-NEXT: br i1 [[CMPZ]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
|
||||
; FAST_TZ: cond.false:
|
||||
; FAST_TZ-NEXT: [[Z:%.*]] = call i64 @llvm.ctlz.i64(i64 [[A]], i1 true)
|
||||
; FAST_TZ-NEXT: [[Z:%.*]] = call i64 @llvm.ctlz.i64(i64 [[A_FR]], i1 true)
|
||||
; FAST_TZ-NEXT: br label [[COND_END]]
|
||||
; FAST_TZ: cond.end:
|
||||
; FAST_TZ-NEXT: [[CTZ:%.*]] = phi i64 [ 64, [[ENTRY:%.*]] ], [ [[Z]], [[COND_FALSE]] ]
|
||||
|
|
Loading…
Reference in New Issue