forked from OSchip/llvm-project
[InstCombine] freeze operand in urem expansion
As discussed in issue #37809, this transform is not safe if the input is an undefined value. There is no difference in codegen on the basic examples, but this could lead to regressions. We may need to improve freeze analysis or lowering if that happens.
This commit is contained in:
parent
8cb7a873ab
commit
d428f09b2c
|
@ -1493,11 +1493,13 @@ Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {
|
|||
return CastInst::CreateZExtOrBitCast(Cmp, Ty);
|
||||
}
|
||||
|
||||
// X urem C -> X < C ? X : X - C, where C >= signbit.
|
||||
// Op0 urem C -> Op0 < C ? Op0 : Op0 - C, where C >= signbit.
|
||||
// Op0 must be frozen because we are increasing its number of uses.
|
||||
if (match(Op1, m_Negative())) {
|
||||
Value *Cmp = Builder.CreateICmpULT(Op0, Op1);
|
||||
Value *Sub = Builder.CreateSub(Op0, Op1);
|
||||
return SelectInst::Create(Cmp, Op0, Sub);
|
||||
Value *F0 = Builder.CreateFreeze(Op0, Op0->getName() + ".fr");
|
||||
Value *Cmp = Builder.CreateICmpULT(F0, Op1);
|
||||
Value *Sub = Builder.CreateSub(F0, Op1);
|
||||
return SelectInst::Create(Cmp, F0, Sub);
|
||||
}
|
||||
|
||||
// If the divisor is a sext of a boolean, then the divisor must be max
|
||||
|
|
|
@ -40,9 +40,10 @@ define i64 @rem_unsigned(i64 %x1, i64 %y2) {
|
|||
|
||||
define i8 @big_divisor(i8 %x) {
|
||||
; CHECK-LABEL: @big_divisor(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], -127
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X]], 127
|
||||
; CHECK-NEXT: [[REM:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[TMP2]]
|
||||
; CHECK-NEXT: [[X_FR:%.*]] = freeze i8 [[X:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X_FR]], -127
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X_FR]], 127
|
||||
; CHECK-NEXT: [[REM:%.*]] = select i1 [[TMP1]], i8 [[X_FR]], i8 [[TMP2]]
|
||||
; CHECK-NEXT: ret i8 [[REM]]
|
||||
;
|
||||
%rem = urem i8 %x, 129
|
||||
|
@ -51,8 +52,9 @@ define i8 @big_divisor(i8 %x) {
|
|||
|
||||
define i5 @biggest_divisor(i5 %x) {
|
||||
; CHECK-LABEL: @biggest_divisor(
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i5 [[X:%.*]], -1
|
||||
; CHECK-NEXT: [[REM:%.*]] = select i1 [[DOTNOT]], i5 0, i5 [[X]]
|
||||
; CHECK-NEXT: [[X_FR:%.*]] = freeze i5 [[X:%.*]]
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i5 [[X_FR]], -1
|
||||
; CHECK-NEXT: [[REM:%.*]] = select i1 [[DOTNOT]], i5 0, i5 [[X_FR]]
|
||||
; CHECK-NEXT: ret i5 [[REM]]
|
||||
;
|
||||
%rem = urem i5 %x, -1
|
||||
|
@ -83,9 +85,10 @@ define <2 x i8> @urem_with_sext_bool_divisor_vec(<2 x i1> %x, <2 x i8> %y) {
|
|||
|
||||
define <2 x i4> @big_divisor_vec(<2 x i4> %x) {
|
||||
; CHECK-LABEL: @big_divisor_vec(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i4> [[X:%.*]], <i4 -3, i4 -3>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i4> [[X]], <i4 3, i4 3>
|
||||
; CHECK-NEXT: [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i4> [[X]], <2 x i4> [[TMP2]]
|
||||
; CHECK-NEXT: [[X_FR:%.*]] = freeze <2 x i4> [[X:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i4> [[X_FR]], <i4 -3, i4 -3>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i4> [[X_FR]], <i4 3, i4 3>
|
||||
; CHECK-NEXT: [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i4> [[X_FR]], <2 x i4> [[TMP2]]
|
||||
; CHECK-NEXT: ret <2 x i4> [[REM]]
|
||||
;
|
||||
%rem = urem <2 x i4> %x, <i4 13, i4 13>
|
||||
|
|
|
@ -49,9 +49,10 @@ define <4 x i32> @test_v4i32_one_undef(<4 x i32> %a0) {
|
|||
|
||||
define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) {
|
||||
; CHECK-LABEL: @test_v4i32_negconstsplat(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0:%.*]], <i32 -3, i32 -3, i32 -3, i32 -3>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0]], <i32 3, i32 3, i32 3, i32 3>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0]], <4 x i32> [[TMP2]]
|
||||
; CHECK-NEXT: [[A0_FR:%.*]] = freeze <4 x i32> [[A0:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0_FR]], <i32 -3, i32 -3, i32 -3, i32 -3>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0_FR]], <i32 3, i32 3, i32 3, i32 3>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0_FR]], <4 x i32> [[TMP2]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
|
||||
;
|
||||
%1 = urem <4 x i32> %a0, <i32 -3, i32 -3, i32 -3, i32 -3>
|
||||
|
@ -60,9 +61,10 @@ define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) {
|
|||
|
||||
define <4 x i32> @test_v4i32_negconst(<4 x i32> %a0) {
|
||||
; CHECK-LABEL: @test_v4i32_negconst(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0:%.*]], <i32 -3, i32 -5, i32 -7, i32 -9>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0]], <i32 3, i32 5, i32 7, i32 9>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0]], <4 x i32> [[TMP2]]
|
||||
; CHECK-NEXT: [[A0_FR:%.*]] = freeze <4 x i32> [[A0:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0_FR]], <i32 -3, i32 -5, i32 -7, i32 -9>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0_FR]], <i32 3, i32 5, i32 7, i32 9>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0_FR]], <4 x i32> [[TMP2]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
|
||||
;
|
||||
%1 = urem <4 x i32> %a0, <i32 -3, i32 -5, i32 -7, i32 -9>
|
||||
|
|
Loading…
Reference in New Issue