[InstCombine] freeze operand in urem expansion

As discussed in issue #37809, this transform is not safe
if the input is an undefined value.

There is no difference in codegen on the basic examples,
but this could lead to regressions. We may need to
improve freeze analysis or lowering if that happens.
This commit is contained in:
Sanjay Patel 2022-05-11 12:09:47 -04:00
parent 8cb7a873ab
commit d428f09b2c
3 changed files with 25 additions and 18 deletions

View File

@ -1493,11 +1493,13 @@ Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {
return CastInst::CreateZExtOrBitCast(Cmp, Ty);
}
// X urem C -> X < C ? X : X - C, where C >= signbit.
// Op0 urem C -> Op0 < C ? Op0 : Op0 - C, where C >= signbit.
// Op0 must be frozen because we are increasing its number of uses.
if (match(Op1, m_Negative())) {
Value *Cmp = Builder.CreateICmpULT(Op0, Op1);
Value *Sub = Builder.CreateSub(Op0, Op1);
return SelectInst::Create(Cmp, Op0, Sub);
Value *F0 = Builder.CreateFreeze(Op0, Op0->getName() + ".fr");
Value *Cmp = Builder.CreateICmpULT(F0, Op1);
Value *Sub = Builder.CreateSub(F0, Op1);
return SelectInst::Create(Cmp, F0, Sub);
}
// If the divisor is a sext of a boolean, then the divisor must be max

View File

@ -40,9 +40,10 @@ define i64 @rem_unsigned(i64 %x1, i64 %y2) {
define i8 @big_divisor(i8 %x) {
; CHECK-LABEL: @big_divisor(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], -127
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X]], 127
; CHECK-NEXT: [[REM:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[TMP2]]
; CHECK-NEXT: [[X_FR:%.*]] = freeze i8 [[X:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X_FR]], -127
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X_FR]], 127
; CHECK-NEXT: [[REM:%.*]] = select i1 [[TMP1]], i8 [[X_FR]], i8 [[TMP2]]
; CHECK-NEXT: ret i8 [[REM]]
;
%rem = urem i8 %x, 129
@ -51,8 +52,9 @@ define i8 @big_divisor(i8 %x) {
define i5 @biggest_divisor(i5 %x) {
; CHECK-LABEL: @biggest_divisor(
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i5 [[X:%.*]], -1
; CHECK-NEXT: [[REM:%.*]] = select i1 [[DOTNOT]], i5 0, i5 [[X]]
; CHECK-NEXT: [[X_FR:%.*]] = freeze i5 [[X:%.*]]
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i5 [[X_FR]], -1
; CHECK-NEXT: [[REM:%.*]] = select i1 [[DOTNOT]], i5 0, i5 [[X_FR]]
; CHECK-NEXT: ret i5 [[REM]]
;
%rem = urem i5 %x, -1
@ -83,9 +85,10 @@ define <2 x i8> @urem_with_sext_bool_divisor_vec(<2 x i1> %x, <2 x i8> %y) {
define <2 x i4> @big_divisor_vec(<2 x i4> %x) {
; CHECK-LABEL: @big_divisor_vec(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i4> [[X:%.*]], <i4 -3, i4 -3>
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i4> [[X]], <i4 3, i4 3>
; CHECK-NEXT: [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i4> [[X]], <2 x i4> [[TMP2]]
; CHECK-NEXT: [[X_FR:%.*]] = freeze <2 x i4> [[X:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i4> [[X_FR]], <i4 -3, i4 -3>
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i4> [[X_FR]], <i4 3, i4 3>
; CHECK-NEXT: [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i4> [[X_FR]], <2 x i4> [[TMP2]]
; CHECK-NEXT: ret <2 x i4> [[REM]]
;
%rem = urem <2 x i4> %x, <i4 13, i4 13>

View File

@ -49,9 +49,10 @@ define <4 x i32> @test_v4i32_one_undef(<4 x i32> %a0) {
define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) {
; CHECK-LABEL: @test_v4i32_negconstsplat(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0:%.*]], <i32 -3, i32 -3, i32 -3, i32 -3>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0]], <i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[A0_FR:%.*]] = freeze <4 x i32> [[A0:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0_FR]], <i32 -3, i32 -3, i32 -3, i32 -3>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0_FR]], <i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0_FR]], <4 x i32> [[TMP2]]
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
;
%1 = urem <4 x i32> %a0, <i32 -3, i32 -3, i32 -3, i32 -3>
@ -60,9 +61,10 @@ define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) {
define <4 x i32> @test_v4i32_negconst(<4 x i32> %a0) {
; CHECK-LABEL: @test_v4i32_negconst(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0:%.*]], <i32 -3, i32 -5, i32 -7, i32 -9>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0]], <i32 3, i32 5, i32 7, i32 9>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[A0_FR:%.*]] = freeze <4 x i32> [[A0:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0_FR]], <i32 -3, i32 -5, i32 -7, i32 -9>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0_FR]], <i32 3, i32 5, i32 7, i32 9>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0_FR]], <4 x i32> [[TMP2]]
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
;
%1 = urem <4 x i32> %a0, <i32 -3, i32 -5, i32 -7, i32 -9>