[InstCombine] Simplify bswap -> shift

Simplify bswap(x) to shl(x) or lshr(x) if x has exactly one
"active byte", i.e. all active bits are contained in boundaries
of a single byte of x.

https://alive2.llvm.org/ce/z/nvbbU5
https://alive2.llvm.org/ce/z/KiiL3J

Reviewed By: spatel, craig.topper, lebedev.ri

Differential Revision: https://reviews.llvm.org/D117680
This commit is contained in:
Pawe Bylica 2022-01-21 00:56:38 +01:00 committed by Paweł Bylica
parent 37e0c58559
commit 1d7604fdce
No known key found for this signature in database
GPG Key ID: 7A0C037434FE77EF
2 changed files with 41 additions and 28 deletions

View File

@ -1215,6 +1215,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *IIOperand = II->getArgOperand(0);
Value *X = nullptr;
KnownBits Known = computeKnownBits(IIOperand, 0, II);
uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
// bswap(x) -> shift(x) if x has exactly one "active byte"
if (Known.getBitWidth() - LZ - TZ == 8) {
assert(LZ != TZ && "active byte cannot be in the middle");
if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
return BinaryOperator::CreateNUWShl(
IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
// -> lshr(x) if the "active byte" is in the high part of x
return BinaryOperator::CreateExactLShr(
IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
}
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
unsigned C = X->getType()->getScalarSizeInBits() -

View File

@ -358,9 +358,8 @@ define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
define i64 @bs_active_high8(i64 %0) {
; CHECK-LABEL: @bs_active_high8(
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
; CHECK-NEXT: ret i64 [[TMP3]]
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
; CHECK-NEXT: ret i64 [[TMP2]]
;
%2 = shl i64 %0, 56
%3 = call i64 @llvm.bswap.i64(i64 %2)
@ -369,8 +368,8 @@ define i64 @bs_active_high8(i64 %0) {
define i32 @bs_active_high7(i32 %0) {
; CHECK-LABEL: @bs_active_high7(
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], -33554432
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 24
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 254
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = and i32 %0, -33554432 ; 0xfe000000
@ -380,8 +379,8 @@ define i32 @bs_active_high7(i32 %0) {
define <2 x i64> @bs_active_high4(<2 x i64> %0) {
; CHECK-LABEL: @bs_active_high4(
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 60, i64 60>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 4, i64 4>
; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 240, i64 240>
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%2 = shl <2 x i64> %0, <i64 60, i64 60>
@ -392,7 +391,7 @@ define <2 x i64> @bs_active_high4(<2 x i64> %0) {
define <2 x i64> @bs_active_high_different(<2 x i64> %0) {
; CHECK-LABEL: @bs_active_high_different(
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 57>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], <i64 56, i64 56>
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%2 = shl <2 x i64> %0, <i64 56, i64 57>
@ -427,7 +426,7 @@ define <2 x i64> @bs_active_high_undef(<2 x i64> %0) {
define i64 @bs_active_high8_multiuse(i64 %0) {
; CHECK-LABEL: @bs_active_high8_multiuse(
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i64 [[TMP4]]
;
@ -440,7 +439,7 @@ define i64 @bs_active_high8_multiuse(i64 %0) {
define i64 @bs_active_high7_multiuse(i64 %0) {
; CHECK-LABEL: @bs_active_high7_multiuse(
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 57
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
; CHECK-NEXT: [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 56
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i64 [[TMP4]]
;
@ -452,8 +451,8 @@ define i64 @bs_active_high7_multiuse(i64 %0) {
define i64 @bs_active_byte_6h(i64 %0) {
; CHECK-LABEL: @bs_active_byte_6h(
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 280375465082880
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 16711680
; CHECK-NEXT: ret i64 [[TMP3]]
;
%2 = and i64 %0, 280375465082880 ; 0xff00'00000000
@ -463,8 +462,8 @@ define i64 @bs_active_byte_6h(i64 %0) {
define i32 @bs_active_byte_3h(i32 %0) {
; CHECK-LABEL: @bs_active_byte_3h(
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 393216
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1536
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = and i32 %0, 393216 ; 0x0006'0000
@ -475,7 +474,7 @@ define i32 @bs_active_byte_3h(i32 %0) {
define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) {
; CHECK-LABEL: @bs_active_byte_3h_v2(
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 8388608, i32 65536>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], <i32 8, i32 8>
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%2 = and <2 x i32> %0, <i32 8388608, i32 65536> ; 0x0080'0000, 0x0001'0000
@ -498,8 +497,8 @@ define i64 @bs_active_byte_78h(i64 %0) {
define i16 @bs_active_low1(i16 %0) {
; CHECK-LABEL: @bs_active_low1(
; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 15
; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 7
; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], 256
; CHECK-NEXT: ret i16 [[TMP3]]
;
%2 = lshr i16 %0, 15
@ -509,9 +508,8 @@ define i16 @bs_active_low1(i16 %0) {
define <2 x i32> @bs_active_low8(<2 x i32> %0) {
; CHECK-LABEL: @bs_active_low8(
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 255, i32 255>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], <i32 24, i32 24>
; CHECK-NEXT: ret <2 x i32> [[TMP2]]
;
%2 = and <2 x i32> %0, <i32 255, i32 255>
%3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
@ -521,7 +519,7 @@ define <2 x i32> @bs_active_low8(<2 x i32> %0) {
define <2 x i32> @bs_active_low_different(<2 x i32> %0) {
; CHECK-LABEL: @bs_active_low_different(
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 2, i32 128>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], <i32 24, i32 24>
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%2 = and <2 x i32> %0, <i32 2, i32 128>
@ -556,7 +554,7 @@ define <2 x i32> @bs_active_low_undef(<2 x i32> %0) {
define i64 @bs_active_low8_multiuse(i64 %0) {
; CHECK-LABEL: @bs_active_low8_multiuse(
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 56
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i64 [[TMP4]]
;
@ -569,7 +567,7 @@ define i64 @bs_active_low8_multiuse(i64 %0) {
define i64 @bs_active_low7_multiuse(i64 %0) {
; CHECK-LABEL: @bs_active_low7_multiuse(
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 127
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 56
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i64 [[TMP4]]
;
@ -581,8 +579,8 @@ define i64 @bs_active_low7_multiuse(i64 %0) {
define i64 @bs_active_byte_4l(i64 %0) {
; CHECK-LABEL: @bs_active_byte_4l(
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 1140850688
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 8
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 292057776128
; CHECK-NEXT: ret i64 [[TMP3]]
;
%2 = and i64 %0, 1140850688 ; 0x44000000
@ -592,8 +590,8 @@ define i64 @bs_active_byte_4l(i64 %0) {
define i32 @bs_active_byte_2l(i32 %0) {
; CHECK-LABEL: @bs_active_byte_2l(
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 65280
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 8
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 16711680
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = and i32 %0, 65280 ; 0xff00
@ -604,7 +602,7 @@ define i32 @bs_active_byte_2l(i32 %0) {
define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) {
; CHECK-LABEL: @bs_active_byte_2l_v2(
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], <i64 256, i64 65280>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], <i64 40, i64 40>
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%2 = and <2 x i64> %0, <i64 256, i64 65280> ; 0x0100, 0xff00