forked from OSchip/llvm-project
[InstCombine] relax masking requirement for truncated funnel/rotate match
I was investigating a seemingly unrelated improvement in demanded bits for shift-left, but that caused regressions on these tests because we were able to look through/eliminate the mask. https://alive2.llvm.org/ce/z/Ztdr22 define i8 @src(i32 %x, i32 %y, i32 %shift) { %and = and i32 %shift, 3 %conv = and i32 %x, 255 %shr = lshr i32 %conv, %and %sub = sub i32 8, %and %shl = shl i32 %y, %sub %or = or i32 %shr, %shl %conv2 = trunc i32 %or to i8 ret i8 %conv2 } define i8 @tgt(i32 %x, i32 %y, i32 %shift) { %x8 = trunc i32 %x to i8 %y8 = trunc i32 %y to i8 %shift8 = trunc i32 %shift to i8 %and = and i8 %shift8, 3 %conv2 = call i8 @llvm.fshr.i8(i8 %y8, i8 %x8, i8 %and) ret i8 %conv2 } declare i8 @llvm.fshr.i8(i8,i8,i8)
This commit is contained in:
parent
9fb946f1a8
commit
abd7529625
|
@ -589,16 +589,16 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
|
|||
if (!ShAmt)
|
||||
return nullptr;
|
||||
|
||||
// The shifted value must have high zeros in the wide type. Typically, this
|
||||
// will be a zext, but it could also be the result of an 'and' or 'shift'.
|
||||
// The right-shifted value must have high zeros in the wide type (for example
|
||||
// from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
|
||||
// truncated, so those do not matter.
|
||||
unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
|
||||
APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
|
||||
if (!MaskedValueIsZero(ShVal0, HiBitMask, 0, &Trunc) ||
|
||||
!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
|
||||
if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
|
||||
return nullptr;
|
||||
|
||||
// We have an unnecessarily wide rotate!
|
||||
// trunc (or (lshr ShVal0, ShAmt), (shl ShVal1, BitWidth - ShAmt))
|
||||
// trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
|
||||
// Narrow the inputs and convert to funnel shift intrinsic:
|
||||
// llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
|
||||
Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
|
||||
|
|
|
@ -281,18 +281,15 @@ define i8 @fshr_commute_8bit(i32 %x, i32 %y, i32 %shift) {
|
|||
ret i8 %conv2
|
||||
}
|
||||
|
||||
; TODO:
|
||||
; The left-shifted value does not need to be masked at all.
|
||||
|
||||
define i8 @fshr_commute_8bit_unmasked_shl(i32 %x, i32 %y, i32 %shift) {
|
||||
; CHECK-LABEL: @fshr_commute_8bit_unmasked_shl(
|
||||
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHIFT:%.*]], 3
|
||||
; CHECK-NEXT: [[CONVX:%.*]] = and i32 [[X:%.*]], 255
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONVX]], [[AND]]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 8, [[AND]]
|
||||
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[Y:%.*]], [[SUB]]
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
|
||||
; CHECK-NEXT: [[CONV2:%.*]] = trunc i32 [[OR]] to i8
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[Y:%.*]] to i8
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[X:%.*]] to i8
|
||||
; CHECK-NEXT: [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
|
||||
; CHECK-NEXT: ret i8 [[CONV2]]
|
||||
;
|
||||
%and = and i32 %shift, 3
|
||||
|
|
|
@ -456,18 +456,15 @@ define i8 @rotate_right_commute_8bit_unmasked_shl(i32 %v, i32 %shift) {
|
|||
ret i8 %conv2
|
||||
}
|
||||
|
||||
; TODO:
|
||||
; The left-shifted value does not need to be masked at all.
|
||||
|
||||
define i8 @rotate_right_commute_8bit(i32 %v, i32 %shift) {
|
||||
; CHECK-LABEL: @rotate_right_commute_8bit(
|
||||
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHIFT:%.*]], 3
|
||||
; CHECK-NEXT: [[CONV:%.*]] = and i32 [[V:%.*]], 255
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONV]], [[AND]]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 8, [[AND]]
|
||||
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[V]], [[SUB]]
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
|
||||
; CHECK-NEXT: [[CONV2:%.*]] = trunc i32 [[OR]] to i8
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[V]] to i8
|
||||
; CHECK-NEXT: [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
|
||||
; CHECK-NEXT: ret i8 [[CONV2]]
|
||||
;
|
||||
%and = and i32 %shift, 3
|
||||
|
|
Loading…
Reference in New Issue