forked from OSchip/llvm-project
[InstCombine] Simplify bswap -> shift
Simplify bswap(x) to shl(x) or lshr(x) if x has exactly one "active byte", i.e. all active bits are contained in boundaries of a single byte of x. https://alive2.llvm.org/ce/z/nvbbU5 https://alive2.llvm.org/ce/z/KiiL3J Reviewed By: spatel, craig.topper, lebedev.ri Differential Revision: https://reviews.llvm.org/D117680
This commit is contained in:
parent
37e0c58559
commit
1d7604fdce
|
@ -1215,6 +1215,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
|
|||
Value *IIOperand = II->getArgOperand(0);
|
||||
Value *X = nullptr;
|
||||
|
||||
KnownBits Known = computeKnownBits(IIOperand, 0, II);
|
||||
uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
|
||||
uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
|
||||
|
||||
// bswap(x) -> shift(x) if x has exactly one "active byte"
|
||||
if (Known.getBitWidth() - LZ - TZ == 8) {
|
||||
assert(LZ != TZ && "active byte cannot be in the middle");
|
||||
if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
|
||||
return BinaryOperator::CreateNUWShl(
|
||||
IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
|
||||
// -> lshr(x) if the "active byte" is in the high part of x
|
||||
return BinaryOperator::CreateExactLShr(
|
||||
IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
|
||||
}
|
||||
|
||||
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
|
||||
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
|
||||
unsigned C = X->getType()->getScalarSizeInBits() -
|
||||
|
|
|
@ -358,9 +358,8 @@ define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
|
|||
|
||||
define i64 @bs_active_high8(i64 %0) {
|
||||
; CHECK-LABEL: @bs_active_high8(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
|
||||
; CHECK-NEXT: ret i64 [[TMP2]]
|
||||
;
|
||||
%2 = shl i64 %0, 56
|
||||
%3 = call i64 @llvm.bswap.i64(i64 %2)
|
||||
|
@ -369,8 +368,8 @@ define i64 @bs_active_high8(i64 %0) {
|
|||
|
||||
define i32 @bs_active_high7(i32 %0) {
|
||||
; CHECK-LABEL: @bs_active_high7(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], -33554432
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 24
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 254
|
||||
; CHECK-NEXT: ret i32 [[TMP3]]
|
||||
;
|
||||
%2 = and i32 %0, -33554432 ; 0xfe000000
|
||||
|
@ -380,8 +379,8 @@ define i32 @bs_active_high7(i32 %0) {
|
|||
|
||||
define <2 x i64> @bs_active_high4(<2 x i64> %0) {
|
||||
; CHECK-LABEL: @bs_active_high4(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 60, i64 60>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 4, i64 4>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 240, i64 240>
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
|
||||
;
|
||||
%2 = shl <2 x i64> %0, <i64 60, i64 60>
|
||||
|
@ -392,7 +391,7 @@ define <2 x i64> @bs_active_high4(<2 x i64> %0) {
|
|||
define <2 x i64> @bs_active_high_different(<2 x i64> %0) {
|
||||
; CHECK-LABEL: @bs_active_high_different(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 57>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], <i64 56, i64 56>
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
|
||||
;
|
||||
%2 = shl <2 x i64> %0, <i64 56, i64 57>
|
||||
|
@ -427,7 +426,7 @@ define <2 x i64> @bs_active_high_undef(<2 x i64> %0) {
|
|||
define i64 @bs_active_high8_multiuse(i64 %0) {
|
||||
; CHECK-LABEL: @bs_active_high8_multiuse(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 255
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: ret i64 [[TMP4]]
|
||||
;
|
||||
|
@ -440,7 +439,7 @@ define i64 @bs_active_high8_multiuse(i64 %0) {
|
|||
define i64 @bs_active_high7_multiuse(i64 %0) {
|
||||
; CHECK-LABEL: @bs_active_high7_multiuse(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 57
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 56
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: ret i64 [[TMP4]]
|
||||
;
|
||||
|
@ -452,8 +451,8 @@ define i64 @bs_active_high7_multiuse(i64 %0) {
|
|||
|
||||
define i64 @bs_active_byte_6h(i64 %0) {
|
||||
; CHECK-LABEL: @bs_active_byte_6h(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 280375465082880
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 16711680
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%2 = and i64 %0, 280375465082880 ; 0xff00'00000000
|
||||
|
@ -463,8 +462,8 @@ define i64 @bs_active_byte_6h(i64 %0) {
|
|||
|
||||
define i32 @bs_active_byte_3h(i32 %0) {
|
||||
; CHECK-LABEL: @bs_active_byte_3h(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 393216
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1536
|
||||
; CHECK-NEXT: ret i32 [[TMP3]]
|
||||
;
|
||||
%2 = and i32 %0, 393216 ; 0x0006'0000
|
||||
|
@ -475,7 +474,7 @@ define i32 @bs_active_byte_3h(i32 %0) {
|
|||
define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) {
|
||||
; CHECK-LABEL: @bs_active_byte_3h_v2(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 8388608, i32 65536>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], <i32 8, i32 8>
|
||||
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
|
||||
;
|
||||
%2 = and <2 x i32> %0, <i32 8388608, i32 65536> ; 0x0080'0000, 0x0001'0000
|
||||
|
@ -498,8 +497,8 @@ define i64 @bs_active_byte_78h(i64 %0) {
|
|||
|
||||
define i16 @bs_active_low1(i16 %0) {
|
||||
; CHECK-LABEL: @bs_active_low1(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 15
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 7
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], 256
|
||||
; CHECK-NEXT: ret i16 [[TMP3]]
|
||||
;
|
||||
%2 = lshr i16 %0, 15
|
||||
|
@ -509,9 +508,8 @@ define i16 @bs_active_low1(i16 %0) {
|
|||
|
||||
define <2 x i32> @bs_active_low8(<2 x i32> %0) {
|
||||
; CHECK-LABEL: @bs_active_low8(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 255, i32 255>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
|
||||
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], <i32 24, i32 24>
|
||||
; CHECK-NEXT: ret <2 x i32> [[TMP2]]
|
||||
;
|
||||
%2 = and <2 x i32> %0, <i32 255, i32 255>
|
||||
%3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
|
||||
|
@ -521,7 +519,7 @@ define <2 x i32> @bs_active_low8(<2 x i32> %0) {
|
|||
define <2 x i32> @bs_active_low_different(<2 x i32> %0) {
|
||||
; CHECK-LABEL: @bs_active_low_different(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 2, i32 128>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], <i32 24, i32 24>
|
||||
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
|
||||
;
|
||||
%2 = and <2 x i32> %0, <i32 2, i32 128>
|
||||
|
@ -556,7 +554,7 @@ define <2 x i32> @bs_active_low_undef(<2 x i32> %0) {
|
|||
define i64 @bs_active_low8_multiuse(i64 %0) {
|
||||
; CHECK-LABEL: @bs_active_low8_multiuse(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 56
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: ret i64 [[TMP4]]
|
||||
;
|
||||
|
@ -569,7 +567,7 @@ define i64 @bs_active_low8_multiuse(i64 %0) {
|
|||
define i64 @bs_active_low7_multiuse(i64 %0) {
|
||||
; CHECK-LABEL: @bs_active_low7_multiuse(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 127
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 56
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: ret i64 [[TMP4]]
|
||||
;
|
||||
|
@ -581,8 +579,8 @@ define i64 @bs_active_low7_multiuse(i64 %0) {
|
|||
|
||||
define i64 @bs_active_byte_4l(i64 %0) {
|
||||
; CHECK-LABEL: @bs_active_byte_4l(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 1140850688
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 292057776128
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%2 = and i64 %0, 1140850688 ; 0x44000000
|
||||
|
@ -592,8 +590,8 @@ define i64 @bs_active_byte_4l(i64 %0) {
|
|||
|
||||
define i32 @bs_active_byte_2l(i32 %0) {
|
||||
; CHECK-LABEL: @bs_active_byte_2l(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 65280
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 16711680
|
||||
; CHECK-NEXT: ret i32 [[TMP3]]
|
||||
;
|
||||
%2 = and i32 %0, 65280 ; 0xff00
|
||||
|
@ -604,7 +602,7 @@ define i32 @bs_active_byte_2l(i32 %0) {
|
|||
define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) {
|
||||
; CHECK-LABEL: @bs_active_byte_2l_v2(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], <i64 256, i64 65280>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], <i64 40, i64 40>
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
|
||||
;
|
||||
%2 = and <2 x i64> %0, <i64 256, i64 65280> ; 0x0100, 0xff00
|
||||
|
|
Loading…
Reference in New Issue