[DAG] Fold (srl (shl x, c1), c2) -> and(shl/srl(x, c3), m)

Similar to the existing (shl (srl x, c1), c2) fold

Part of the work to fix the regressions in D77804

Differential Revision: https://reviews.llvm.org/D125836
This commit is contained in:
Simon Pilgrim 2022-06-20 08:37:25 +01:00
parent 26041e1700
commit e4a124dda5
12 changed files with 226 additions and 150 deletions

View File

@ -9419,15 +9419,41 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
}
// fold (srl (shl x, c), c) -> (and x, cst2)
// TODO - (srl (shl x, c1), c2).
if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
SDLoc DL(N);
SDValue Mask =
DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
AddToWorklist(Mask.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
// fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
// (and (srl x, (sub c2, c1), MASK)
if (N0.getOpcode() == ISD::SHL &&
(N0.getOperand(1) == N1 || N0->hasOneUse()) &&
TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
const APInt &LHSC = LHS->getAPIntValue();
const APInt &RHSC = RHS->getAPIntValue();
return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
LHSC.getZExtValue() <= RHSC.getZExtValue();
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
SDLoc DL(N);
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
}
if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
SDLoc DL(N);
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
}
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)

View File

@ -13411,7 +13411,18 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask");
// Don't allow multiuse shift folding with the same shift amount.
return N->getOperand(0)->hasOneUse();
if (!N->getOperand(0)->hasOneUse())
return false;
// Only fold srl(shl(x,c1),c2) iff C1 >= C2 to prevent loss of UBFX patterns.
EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
}
return true;
}
bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,

View File

@ -5844,6 +5844,7 @@ bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
(N->getOpcode() == ISD::SRL &&
N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask");
// TODO: Should we always create i64 masks? Or only folded immediates?
EVT VT = N->getValueType(0);
if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
(Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {

View File

@ -129,11 +129,10 @@ define i16 @combine_shlsat_to_shl_no_fold(i16 %x) nounwind {
; CHECK-LABEL: combine_shlsat_to_shl_no_fold:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xfffc
; CHECK-NEXT: lsl w9, w8, #14
; CHECK-NEXT: lsl w8, w8, #17
; CHECK-NEXT: and w10, w9, #0x1fff0000
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: csinv w8, w8, wzr, eq
; CHECK-NEXT: lsl w9, w8, #17
; CHECK-NEXT: lsl w8, w8, #14
; CHECK-NEXT: cmp w8, w9, lsr #3
; CHECK-NEXT: csinv w8, w9, wzr, eq
; CHECK-NEXT: lsr w0, w8, #16
; CHECK-NEXT: ret
%x2 = lshr i16 %x, 2

View File

@ -2852,7 +2852,7 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
; GFX7-NEXT: v_bfe_i32 v8, v2, 8, 4
; GFX7-NEXT: v_bfe_i32 v9, v2, 4, 4
; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 4
; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3
; GFX7-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5
; GFX7-NEXT: v_and_b32_e32 v6, 0xff, v6
@ -2861,67 +2861,67 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9
; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX7-NEXT: s_waitcnt vmcnt(1)
; GFX7-NEXT: v_ashrrev_i32_e32 v11, 28, v0
; GFX7-NEXT: v_bfe_i32 v12, v0, 24, 4
; GFX7-NEXT: v_bfe_i32 v13, v0, 20, 4
; GFX7-NEXT: v_bfe_i32 v14, v0, 16, 4
; GFX7-NEXT: v_bfe_i32 v15, v0, 12, 4
; GFX7-NEXT: v_bfe_i32 v16, v0, 8, 4
; GFX7-NEXT: v_bfe_i32 v17, v0, 4, 4
; GFX7-NEXT: v_ashrrev_i32_e32 v10, 28, v0
; GFX7-NEXT: v_bfe_i32 v11, v0, 24, 4
; GFX7-NEXT: v_bfe_i32 v12, v0, 20, 4
; GFX7-NEXT: v_bfe_i32 v13, v0, 16, 4
; GFX7-NEXT: v_bfe_i32 v14, v0, 12, 4
; GFX7-NEXT: v_bfe_i32 v15, v0, 8, 4
; GFX7-NEXT: v_bfe_i32 v16, v0, 4, 4
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 4
; GFX7-NEXT: v_or_b32_e32 v4, v4, v10
; GFX7-NEXT: v_or_b32_e32 v5, v6, v5
; GFX7-NEXT: v_or_b32_e32 v6, v8, v7
; GFX7-NEXT: v_or_b32_e32 v3, v4, v3
; GFX7-NEXT: v_or_b32_e32 v4, v6, v5
; GFX7-NEXT: v_or_b32_e32 v5, v8, v7
; GFX7-NEXT: v_or_b32_e32 v2, v2, v9
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v11
; GFX7-NEXT: v_and_b32_e32 v8, 0xff, v12
; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v13
; GFX7-NEXT: v_and_b32_e32 v10, 0xff, v14
; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v15
; GFX7-NEXT: v_and_b32_e32 v13, 0xff, v16
; GFX7-NEXT: v_lshlrev_b32_e32 v14, 8, v17
; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v10
; GFX7-NEXT: v_and_b32_e32 v7, 0xff, v11
; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v12
; GFX7-NEXT: v_and_b32_e32 v9, 0xff, v13
; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v14
; GFX7-NEXT: v_and_b32_e32 v11, 0xff, v15
; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v16
; GFX7-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX7-NEXT: v_or_b32_e32 v7, v8, v7
; GFX7-NEXT: v_or_b32_e32 v8, v10, v9
; GFX7-NEXT: v_or_b32_e32 v9, v13, v12
; GFX7-NEXT: v_or_b32_e32 v0, v0, v14
; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX7-NEXT: v_or_b32_e32 v6, v7, v6
; GFX7-NEXT: v_or_b32_e32 v7, v9, v8
; GFX7-NEXT: v_or_b32_e32 v8, v11, v10
; GFX7-NEXT: v_or_b32_e32 v0, v0, v12
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7-NEXT: v_or_b32_e32 v4, v5, v4
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v7
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v9
; GFX7-NEXT: v_lshlrev_b32_e32 v8, 16, v8
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: v_or_b32_e32 v2, v2, v6
; GFX7-NEXT: v_or_b32_e32 v0, v0, v7
; GFX7-NEXT: v_and_b32_e32 v7, 0xff, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v3
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX7-NEXT: v_or_b32_e32 v2, v2, v5
; GFX7-NEXT: v_or_b32_e32 v0, v0, v8
; GFX7-NEXT: v_or_b32_e32 v4, v4, v13
; GFX7-NEXT: v_and_b32_e32 v8, 0xff, v2
; GFX7-NEXT: v_and_b32_e32 v13, 0xff, v0
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff, v8
; GFX7-NEXT: v_bfe_u32 v8, v2, 8, 8
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v6
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX7-NEXT: v_bfe_u32 v9, v2, 8, 8
; GFX7-NEXT: v_bfe_u32 v14, v0, 8, 8
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mad_u32_u24 v1, v7, v13, v1
; GFX7-NEXT: v_or_b32_e32 v5, v6, v5
; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v2
; GFX7-NEXT: v_mad_u32_u24 v1, v8, v13, v1
; GFX7-NEXT: v_or_b32_e32 v5, v7, v5
; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v2
; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8
; GFX7-NEXT: v_lshrrev_b32_e32 v12, 24, v0
; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8
; GFX7-NEXT: v_mad_u32_u24 v1, v8, v14, v1
; GFX7-NEXT: v_mad_u32_u24 v1, v9, v14, v1
; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1
; GFX7-NEXT: v_and_b32_e32 v9, 0xff, v4
; GFX7-NEXT: v_and_b32_e32 v10, 0xff, v4
; GFX7-NEXT: v_and_b32_e32 v15, 0xff, v5
; GFX7-NEXT: v_mad_u32_u24 v0, v6, v12, v0
; GFX7-NEXT: v_bfe_u32 v10, v4, 8, 8
; GFX7-NEXT: v_mad_u32_u24 v0, v7, v12, v0
; GFX7-NEXT: v_bfe_u32 v11, v4, 8, 8
; GFX7-NEXT: v_bfe_u32 v16, v5, 8, 8
; GFX7-NEXT: v_mad_u32_u24 v0, v9, v15, v0
; GFX7-NEXT: v_mad_u32_u24 v0, v10, v15, v0
; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8
; GFX7-NEXT: v_bfe_u32 v5, v5, 16, 8
; GFX7-NEXT: v_mad_u32_u24 v0, v10, v16, v0
; GFX7-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX7-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX7-NEXT: v_mad_u32_u24 v0, v11, v16, v0
; GFX7-NEXT: v_bfe_u32 v3, v3, 8, 8
; GFX7-NEXT: v_bfe_u32 v6, v6, 8, 8
; GFX7-NEXT: v_mad_u32_u24 v0, v4, v5, v0
; GFX7-NEXT: v_mad_u32_u24 v0, v3, v11, v0
; GFX7-NEXT: v_mad_u32_u24 v0, v3, v6, v0
; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;

View File

@ -683,8 +683,8 @@ define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
; SI-NEXT: v_lshlrev_b32_e32 v0, 30, v0
; SI-NEXT: v_and_b32_e32 v0, 2.0, v0
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
@ -702,8 +702,8 @@ define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(
; VI-NEXT: s_mov_b32 s4, s0
; VI-NEXT: s_mov_b32 s5, s1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
; VI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
; VI-NEXT: v_lshlrev_b32_e32 v0, 30, v0
; VI-NEXT: v_and_b32_e32 v0, 2.0, v0
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
%x = load i32, i32 addrspace(1)* %in, align 4

View File

@ -31,23 +31,18 @@ define i32 @test2(i32* %m_degree) ssp {
; CHECK-LABEL: test2:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: movs r1, #7
; CHECK-NEXT: lsls r1, r1, #29
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: bics r2, r1
; CHECK-NEXT: subs r1, r0, r2
; CHECK-NEXT: ldr r1, [r0]
; CHECK-NEXT: lsls r0, r1, #3
; CHECK-NEXT: lsrs r2, r0, #3
; CHECK-NEXT: subs r1, r1, r2
; CHECK-NEXT: subs r2, r1, #1
; CHECK-NEXT: sbcs r1, r2
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: bne .LBB1_2
; CHECK-NEXT: beq .LBB1_2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: lsls r0, r0, #3
; CHECK-NEXT: b .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: mvns r0, r4
; CHECK-NEXT: .LBB1_3:
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: bl _Znam
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: pop {r4, pc}

View File

@ -9,9 +9,8 @@ define void @fn1() {
; CHECK-LABEL: fn1:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $1, c(%rip)
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: cmpl $0, c(%rip)
; CHECK-NEXT: sete %al
; CHECK-NEXT: movl %eax, d(%rip)
; CHECK-NEXT: retq
%t0 = load i32, i32* @c, align 4

View File

@ -195,10 +195,9 @@ define i32 @and_signbit_lshr(i32 %x, i32* %dst) {
;
; X86-LABEL: and_signbit_lshr:
; X86: # %bb.0:
; X86-NEXT: movzwl 6(%esp), %eax
; X86-NEXT: shll $16, %eax
; X86-NEXT: movl 8(%esp), %ecx
; X86-NEXT: shrl $8, %eax
; X86-NEXT: movzwl 6(%esp), %eax
; X86-NEXT: shll $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
%t0 = and i32 %x, 4294901760 ; 0xFFFF0000

View File

@ -147,13 +147,19 @@ define <32 x i16> @illegal_no_extract_mul(<32 x i16> %i) nounwind {
; Result would undershift
define <4 x i64> @no_extract_shl(<4 x i64> %i) nounwind {
; CHECK-LABEL: no_extract_shl:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsllq $11, %ymm0, %ymm1
; CHECK-NEXT: vpsllq $24, %ymm0, %ymm0
; CHECK-NEXT: vpsrlq $50, %ymm1, %ymm1
; CHECK-NEXT: vpor %ymm0, %ymm1, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
; X86-LABEL: no_extract_shl:
; X86: # %bb.0:
; X86-NEXT: vpsllq $24, %ymm0, %ymm1
; X86-NEXT: vpsrlq $39, %ymm0, %ymm0
; X86-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: no_extract_shl:
; X64: # %bb.0:
; X64-NEXT: vpsllq $24, %ymm0, %ymm1
; X64-NEXT: vpsrlq $39, %ymm0, %ymm0
; X64-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
; X64-NEXT: retq
%lhs_mul = shl <4 x i64> %i, <i64 11, i64 11, i64 11, i64 11>
%rhs_mul = shl <4 x i64> %i, <i64 24, i64 24, i64 24, i64 24>
%lhs_shift = lshr <4 x i64> %lhs_mul, <i64 50, i64 50, i64 50, i64 50>

View File

@ -135,21 +135,21 @@ define i64 @no_extract_shl(i64 %i) nounwind {
; X86-LABEL: no_extract_shl:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: shll $5, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: shldl $10, %ecx, %edx
; X86-NEXT: shll $10, %ecx
; X86-NEXT: shrl $25, %eax
; X86-NEXT: shrl $20, %eax
; X86-NEXT: andl $127, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: no_extract_shl:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shlq $5, %rax
; X64-NEXT: shlq $10, %rdi
; X64-NEXT: shrq $57, %rax
; X64-NEXT: shlq $10, %rax
; X64-NEXT: shrq $52, %rdi
; X64-NEXT: andl $127, %edi
; X64-NEXT: orq %rdi, %rax
; X64-NEXT: retq
%lhs_mul = shl i64 %i, 5

View File

@ -337,17 +337,25 @@ define i8 @test_i8_lshr_lshr_1(i8 %a0) {
; X86-LABEL: test_i8_lshr_lshr_1:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: shlb $3, %al
; X86-NEXT: shrb $5, %al
; X86-NEXT: shrb $2, %al
; X86-NEXT: andb $7, %al
; X86-NEXT: retl
;
; X64-LABEL: test_i8_lshr_lshr_1:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (,%rdi,8), %eax
; X64-NEXT: shrb $5, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i8_lshr_lshr_1:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: movl %edi, %eax
; X64-MASK-NEXT: shrb $2, %al
; X64-MASK-NEXT: andb $7, %al
; X64-MASK-NEXT: # kill: def $al killed $al killed $eax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i8_lshr_lshr_1:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: # kill: def $edi killed $edi def $rdi
; X64-SHIFT-NEXT: leal (,%rdi,8), %eax
; X64-SHIFT-NEXT: shrb $5, %al
; X64-SHIFT-NEXT: # kill: def $al killed $al killed $eax
; X64-SHIFT-NEXT: retq
%1 = shl i8 %a0, 3
%2 = lshr i8 %1, 5
ret i8 %2
@ -357,17 +365,25 @@ define i8 @test_i8_lshr_lshr_2(i8 %a0) {
; X86-LABEL: test_i8_lshr_lshr_2:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: shlb $5, %al
; X86-NEXT: shrb $3, %al
; X86-NEXT: shlb $2, %al
; X86-NEXT: andb $28, %al
; X86-NEXT: retl
;
; X64-LABEL: test_i8_lshr_lshr_2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shlb $5, %al
; X64-NEXT: shrb $3, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i8_lshr_lshr_2:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi
; X64-MASK-NEXT: leal (,%rdi,4), %eax
; X64-MASK-NEXT: andb $28, %al
; X64-MASK-NEXT: # kill: def $al killed $al killed $eax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i8_lshr_lshr_2:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: movl %edi, %eax
; X64-SHIFT-NEXT: shlb $5, %al
; X64-SHIFT-NEXT: shrb $3, %al
; X64-SHIFT-NEXT: # kill: def $al killed $al killed $eax
; X64-SHIFT-NEXT: retq
%1 = shl i8 %a0, 5
%2 = lshr i8 %1, 3
ret i8 %2
@ -476,16 +492,23 @@ define i32 @test_i32_lshr_lshr_1(i32 %a0) {
; X86-LABEL: test_i32_lshr_lshr_1:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $3, %eax
; X86-NEXT: shrl $5, %eax
; X86-NEXT: shrl $2, %eax
; X86-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF
; X86-NEXT: retl
;
; X64-LABEL: test_i32_lshr_lshr_1:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (,%rdi,8), %eax
; X64-NEXT: shrl $5, %eax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i32_lshr_lshr_1:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: movl %edi, %eax
; X64-MASK-NEXT: shrl $2, %eax
; X64-MASK-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i32_lshr_lshr_1:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: # kill: def $edi killed $edi def $rdi
; X64-SHIFT-NEXT: leal (,%rdi,8), %eax
; X64-SHIFT-NEXT: shrl $5, %eax
; X64-SHIFT-NEXT: retq
%1 = shl i32 %a0, 3
%2 = lshr i32 %1, 5
ret i32 %2
@ -495,16 +518,23 @@ define i32 @test_i32_lshr_lshr_2(i32 %a0) {
; X86-LABEL: test_i32_lshr_lshr_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $5, %eax
; X86-NEXT: shrl $3, %eax
; X86-NEXT: shll $2, %eax
; X86-NEXT: andl $536870908, %eax # imm = 0x1FFFFFFC
; X86-NEXT: retl
;
; X64-LABEL: test_i32_lshr_lshr_2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shll $5, %eax
; X64-NEXT: shrl $3, %eax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i32_lshr_lshr_2:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi
; X64-MASK-NEXT: leal (,%rdi,4), %eax
; X64-MASK-NEXT: andl $536870908, %eax # imm = 0x1FFFFFFC
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i32_lshr_lshr_2:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: movl %edi, %eax
; X64-SHIFT-NEXT: shll $5, %eax
; X64-SHIFT-NEXT: shrl $3, %eax
; X64-SHIFT-NEXT: retq
%1 = shl i32 %a0, 5
%2 = lshr i32 %1, 3
ret i32 %2
@ -556,17 +586,23 @@ define i64 @test_i64_lshr_lshr_1(i64 %a0) {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shldl $3, %eax, %edx
; X86-NEXT: shll $3, %eax
; X86-NEXT: shrdl $5, %edx, %eax
; X86-NEXT: shrl $5, %edx
; X86-NEXT: shrdl $2, %edx, %eax
; X86-NEXT: shrl $2, %edx
; X86-NEXT: andl $134217727, %edx # imm = 0x7FFFFFF
; X86-NEXT: retl
;
; X64-LABEL: test_i64_lshr_lshr_1:
; X64: # %bb.0:
; X64-NEXT: leaq (,%rdi,8), %rax
; X64-NEXT: shrq $5, %rax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i64_lshr_lshr_1:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: shrq $2, %rdi
; X64-MASK-NEXT: movabsq $576460752303423487, %rax # imm = 0x7FFFFFFFFFFFFFF
; X64-MASK-NEXT: andq %rdi, %rax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i64_lshr_lshr_1:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: leaq (,%rdi,8), %rax
; X64-SHIFT-NEXT: shrq $5, %rax
; X64-SHIFT-NEXT: retq
%1 = shl i64 %a0, 3
%2 = lshr i64 %1, 5
ret i64 %2
@ -577,20 +613,24 @@ define i64 @test_i64_lshr_lshr_2(i64 %a0) {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shldl $5, %eax, %edx
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shll $5, %ecx
; X86-NEXT: shrl $27, %eax
; X86-NEXT: shldl $29, %ecx, %eax
; X86-NEXT: shrl $3, %edx
; X86-NEXT: shldl $2, %eax, %edx
; X86-NEXT: shll $2, %eax
; X86-NEXT: andl $536870911, %edx # imm = 0x1FFFFFFF
; X86-NEXT: retl
;
; X64-LABEL: test_i64_lshr_lshr_2:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shlq $5, %rax
; X64-NEXT: shrq $3, %rax
; X64-NEXT: retq
; X64-MASK-LABEL: test_i64_lshr_lshr_2:
; X64-MASK: # %bb.0:
; X64-MASK-NEXT: leaq (,%rdi,4), %rcx
; X64-MASK-NEXT: movabsq $2305843009213693948, %rax # imm = 0x1FFFFFFFFFFFFFFC
; X64-MASK-NEXT: andq %rcx, %rax
; X64-MASK-NEXT: retq
;
; X64-SHIFT-LABEL: test_i64_lshr_lshr_2:
; X64-SHIFT: # %bb.0:
; X64-SHIFT-NEXT: movq %rdi, %rax
; X64-SHIFT-NEXT: shlq $5, %rax
; X64-SHIFT-NEXT: shrq $3, %rax
; X64-SHIFT-NEXT: retq
%1 = shl i64 %a0, 5
%2 = lshr i64 %1, 3
ret i64 %2