forked from OSchip/llvm-project
[DAG] Fold (srl (shl x, c1), c2) -> and(shl/srl(x, c3), m)
Similar to the existing (shl (srl x, c1), c2) fold Part of the work to fix the regressions in D77804 Differential Revision: https://reviews.llvm.org/D125836
This commit is contained in:
parent
26041e1700
commit
e4a124dda5
|
@ -9419,15 +9419,41 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
|
|||
}
|
||||
}
|
||||
|
||||
// fold (srl (shl x, c), c) -> (and x, cst2)
|
||||
// TODO - (srl (shl x, c1), c2).
|
||||
if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
|
||||
isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
|
||||
SDLoc DL(N);
|
||||
SDValue Mask =
|
||||
DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
|
||||
AddToWorklist(Mask.getNode());
|
||||
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
|
||||
// fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
|
||||
// (and (srl x, (sub c2, c1), MASK)
|
||||
if (N0.getOpcode() == ISD::SHL &&
|
||||
(N0.getOperand(1) == N1 || N0->hasOneUse()) &&
|
||||
TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
|
||||
auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
|
||||
ConstantSDNode *RHS) {
|
||||
const APInt &LHSC = LHS->getAPIntValue();
|
||||
const APInt &RHSC = RHS->getAPIntValue();
|
||||
return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
|
||||
LHSC.getZExtValue() <= RHSC.getZExtValue();
|
||||
};
|
||||
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
|
||||
/*AllowUndefs*/ false,
|
||||
/*AllowTypeMismatch*/ true)) {
|
||||
SDLoc DL(N);
|
||||
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
|
||||
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
|
||||
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
|
||||
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
|
||||
Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
|
||||
SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
|
||||
}
|
||||
if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
|
||||
/*AllowUndefs*/ false,
|
||||
/*AllowTypeMismatch*/ true)) {
|
||||
SDLoc DL(N);
|
||||
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
|
||||
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
|
||||
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
|
||||
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
|
||||
SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
|
||||
}
|
||||
}
|
||||
|
||||
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
|
||||
|
|
|
@ -13411,7 +13411,18 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
|
|||
N->getOperand(0).getOpcode() == ISD::SHL)) &&
|
||||
"Expected shift-shift mask");
|
||||
// Don't allow multiuse shift folding with the same shift amount.
|
||||
return N->getOperand(0)->hasOneUse();
|
||||
if (!N->getOperand(0)->hasOneUse())
|
||||
return false;
|
||||
|
||||
// Only fold srl(shl(x,c1),c2) iff C1 >= C2 to prevent loss of UBFX patterns.
|
||||
EVT VT = N->getValueType(0);
|
||||
if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
|
||||
auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
|
||||
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
||||
return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
|
||||
|
|
|
@ -5844,6 +5844,7 @@ bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
|
|||
(N->getOpcode() == ISD::SRL &&
|
||||
N->getOperand(0).getOpcode() == ISD::SHL)) &&
|
||||
"Expected shift-shift mask");
|
||||
// TODO: Should we always create i64 masks? Or only folded immediates?
|
||||
EVT VT = N->getValueType(0);
|
||||
if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
|
||||
(Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
|
||||
|
|
|
@ -129,11 +129,10 @@ define i16 @combine_shlsat_to_shl_no_fold(i16 %x) nounwind {
|
|||
; CHECK-LABEL: combine_shlsat_to_shl_no_fold:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: and w8, w0, #0xfffc
|
||||
; CHECK-NEXT: lsl w9, w8, #14
|
||||
; CHECK-NEXT: lsl w8, w8, #17
|
||||
; CHECK-NEXT: and w10, w9, #0x1fff0000
|
||||
; CHECK-NEXT: cmp w9, w10
|
||||
; CHECK-NEXT: csinv w8, w8, wzr, eq
|
||||
; CHECK-NEXT: lsl w9, w8, #17
|
||||
; CHECK-NEXT: lsl w8, w8, #14
|
||||
; CHECK-NEXT: cmp w8, w9, lsr #3
|
||||
; CHECK-NEXT: csinv w8, w9, wzr, eq
|
||||
; CHECK-NEXT: lsr w0, w8, #16
|
||||
; CHECK-NEXT: ret
|
||||
%x2 = lshr i16 %x, 2
|
||||
|
|
|
@ -2852,7 +2852,7 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
|
|||
; GFX7-NEXT: v_bfe_i32 v8, v2, 8, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v9, v2, 4, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 4
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v3
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3
|
||||
; GFX7-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5
|
||||
; GFX7-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||
|
@ -2861,67 +2861,67 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
|
|||
; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9
|
||||
; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(1)
|
||||
; GFX7-NEXT: v_ashrrev_i32_e32 v11, 28, v0
|
||||
; GFX7-NEXT: v_bfe_i32 v12, v0, 24, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v13, v0, 20, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v14, v0, 16, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v15, v0, 12, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v16, v0, 8, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v17, v0, 4, 4
|
||||
; GFX7-NEXT: v_ashrrev_i32_e32 v10, 28, v0
|
||||
; GFX7-NEXT: v_bfe_i32 v11, v0, 24, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v12, v0, 20, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v13, v0, 16, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v14, v0, 12, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v15, v0, 8, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v16, v0, 4, 4
|
||||
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 4
|
||||
; GFX7-NEXT: v_or_b32_e32 v4, v4, v10
|
||||
; GFX7-NEXT: v_or_b32_e32 v5, v6, v5
|
||||
; GFX7-NEXT: v_or_b32_e32 v6, v8, v7
|
||||
; GFX7-NEXT: v_or_b32_e32 v3, v4, v3
|
||||
; GFX7-NEXT: v_or_b32_e32 v4, v6, v5
|
||||
; GFX7-NEXT: v_or_b32_e32 v5, v8, v7
|
||||
; GFX7-NEXT: v_or_b32_e32 v2, v2, v9
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v11
|
||||
; GFX7-NEXT: v_and_b32_e32 v8, 0xff, v12
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v13
|
||||
; GFX7-NEXT: v_and_b32_e32 v10, 0xff, v14
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v15
|
||||
; GFX7-NEXT: v_and_b32_e32 v13, 0xff, v16
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v14, 8, v17
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v10
|
||||
; GFX7-NEXT: v_and_b32_e32 v7, 0xff, v11
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v12
|
||||
; GFX7-NEXT: v_and_b32_e32 v9, 0xff, v13
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v14
|
||||
; GFX7-NEXT: v_and_b32_e32 v11, 0xff, v15
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v16
|
||||
; GFX7-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
||||
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
||||
; GFX7-NEXT: v_or_b32_e32 v7, v8, v7
|
||||
; GFX7-NEXT: v_or_b32_e32 v8, v10, v9
|
||||
; GFX7-NEXT: v_or_b32_e32 v9, v13, v12
|
||||
; GFX7-NEXT: v_or_b32_e32 v0, v0, v14
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
||||
; GFX7-NEXT: v_or_b32_e32 v6, v7, v6
|
||||
; GFX7-NEXT: v_or_b32_e32 v7, v9, v8
|
||||
; GFX7-NEXT: v_or_b32_e32 v8, v11, v10
|
||||
; GFX7-NEXT: v_or_b32_e32 v0, v0, v12
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
||||
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
||||
; GFX7-NEXT: v_or_b32_e32 v4, v5, v4
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v7
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v9
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v8, 16, v8
|
||||
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||
; GFX7-NEXT: v_or_b32_e32 v2, v2, v6
|
||||
; GFX7-NEXT: v_or_b32_e32 v0, v0, v7
|
||||
; GFX7-NEXT: v_and_b32_e32 v7, 0xff, v2
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v3
|
||||
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
||||
; GFX7-NEXT: v_or_b32_e32 v2, v2, v5
|
||||
; GFX7-NEXT: v_or_b32_e32 v0, v0, v8
|
||||
; GFX7-NEXT: v_or_b32_e32 v4, v4, v13
|
||||
; GFX7-NEXT: v_and_b32_e32 v8, 0xff, v2
|
||||
; GFX7-NEXT: v_and_b32_e32 v13, 0xff, v0
|
||||
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff, v8
|
||||
; GFX7-NEXT: v_bfe_u32 v8, v2, 8, 8
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v6
|
||||
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
||||
; GFX7-NEXT: v_bfe_u32 v9, v2, 8, 8
|
||||
; GFX7-NEXT: v_bfe_u32 v14, v0, 8, 8
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: v_mad_u32_u24 v1, v7, v13, v1
|
||||
; GFX7-NEXT: v_or_b32_e32 v5, v6, v5
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v2
|
||||
; GFX7-NEXT: v_mad_u32_u24 v1, v8, v13, v1
|
||||
; GFX7-NEXT: v_or_b32_e32 v5, v7, v5
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v2
|
||||
; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v12, 24, v0
|
||||
; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8
|
||||
; GFX7-NEXT: v_mad_u32_u24 v1, v8, v14, v1
|
||||
; GFX7-NEXT: v_mad_u32_u24 v1, v9, v14, v1
|
||||
; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1
|
||||
; GFX7-NEXT: v_and_b32_e32 v9, 0xff, v4
|
||||
; GFX7-NEXT: v_and_b32_e32 v10, 0xff, v4
|
||||
; GFX7-NEXT: v_and_b32_e32 v15, 0xff, v5
|
||||
; GFX7-NEXT: v_mad_u32_u24 v0, v6, v12, v0
|
||||
; GFX7-NEXT: v_bfe_u32 v10, v4, 8, 8
|
||||
; GFX7-NEXT: v_mad_u32_u24 v0, v7, v12, v0
|
||||
; GFX7-NEXT: v_bfe_u32 v11, v4, 8, 8
|
||||
; GFX7-NEXT: v_bfe_u32 v16, v5, 8, 8
|
||||
; GFX7-NEXT: v_mad_u32_u24 v0, v9, v15, v0
|
||||
; GFX7-NEXT: v_mad_u32_u24 v0, v10, v15, v0
|
||||
; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8
|
||||
; GFX7-NEXT: v_bfe_u32 v5, v5, 16, 8
|
||||
; GFX7-NEXT: v_mad_u32_u24 v0, v10, v16, v0
|
||||
; GFX7-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||
; GFX7-NEXT: v_and_b32_e32 v11, 0xff, v11
|
||||
; GFX7-NEXT: v_mad_u32_u24 v0, v11, v16, v0
|
||||
; GFX7-NEXT: v_bfe_u32 v3, v3, 8, 8
|
||||
; GFX7-NEXT: v_bfe_u32 v6, v6, 8, 8
|
||||
; GFX7-NEXT: v_mad_u32_u24 v0, v4, v5, v0
|
||||
; GFX7-NEXT: v_mad_u32_u24 v0, v3, v11, v0
|
||||
; GFX7-NEXT: v_mad_u32_u24 v0, v3, v6, v0
|
||||
; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
|
|
|
@ -683,8 +683,8 @@ define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(
|
|||
; SI-NEXT: s_mov_b32 s4, s0
|
||||
; SI-NEXT: s_mov_b32 s5, s1
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v0, 30, v0
|
||||
; SI-NEXT: v_and_b32_e32 v0, 2.0, v0
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -702,8 +702,8 @@ define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(
|
|||
; VI-NEXT: s_mov_b32 s4, s0
|
||||
; VI-NEXT: s_mov_b32 s5, s1
|
||||
; VI-NEXT: s_waitcnt vmcnt(0)
|
||||
; VI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
|
||||
; VI-NEXT: v_lshlrev_b32_e32 v0, 30, v0
|
||||
; VI-NEXT: v_and_b32_e32 v0, 2.0, v0
|
||||
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; VI-NEXT: s_endpgm
|
||||
%x = load i32, i32 addrspace(1)* %in, align 4
|
||||
|
|
|
@ -31,23 +31,18 @@ define i32 @test2(i32* %m_degree) ssp {
|
|||
; CHECK-LABEL: test2:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: movs r1, #7
|
||||
; CHECK-NEXT: lsls r1, r1, #29
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: mov r2, r0
|
||||
; CHECK-NEXT: bics r2, r1
|
||||
; CHECK-NEXT: subs r1, r0, r2
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: lsls r0, r1, #3
|
||||
; CHECK-NEXT: lsrs r2, r0, #3
|
||||
; CHECK-NEXT: subs r1, r1, r2
|
||||
; CHECK-NEXT: subs r2, r1, #1
|
||||
; CHECK-NEXT: sbcs r1, r2
|
||||
; CHECK-NEXT: movs r4, #0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: bne .LBB1_2
|
||||
; CHECK-NEXT: beq .LBB1_2
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: lsls r0, r0, #3
|
||||
; CHECK-NEXT: b .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: mvns r0, r4
|
||||
; CHECK-NEXT: .LBB1_3:
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: bl _Znam
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
|
|
|
@ -9,9 +9,8 @@ define void @fn1() {
|
|||
; CHECK-LABEL: fn1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpl $1, c(%rip)
|
||||
; CHECK-NEXT: sbbl %eax, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
; CHECK-NEXT: cmpl $0, c(%rip)
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: movl %eax, d(%rip)
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = load i32, i32* @c, align 4
|
||||
|
|
|
@ -195,10 +195,9 @@ define i32 @and_signbit_lshr(i32 %x, i32* %dst) {
|
|||
;
|
||||
; X86-LABEL: and_signbit_lshr:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzwl 6(%esp), %eax
|
||||
; X86-NEXT: shll $16, %eax
|
||||
; X86-NEXT: movl 8(%esp), %ecx
|
||||
; X86-NEXT: shrl $8, %eax
|
||||
; X86-NEXT: movzwl 6(%esp), %eax
|
||||
; X86-NEXT: shll $8, %eax
|
||||
; X86-NEXT: movl %eax, (%ecx)
|
||||
; X86-NEXT: retl
|
||||
%t0 = and i32 %x, 4294901760 ; 0xFFFF0000
|
||||
|
|
|
@ -147,13 +147,19 @@ define <32 x i16> @illegal_no_extract_mul(<32 x i16> %i) nounwind {
|
|||
|
||||
; Result would undershift
|
||||
define <4 x i64> @no_extract_shl(<4 x i64> %i) nounwind {
|
||||
; CHECK-LABEL: no_extract_shl:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsllq $11, %ymm0, %ymm1
|
||||
; CHECK-NEXT: vpsllq $24, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpsrlq $50, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
; X86-LABEL: no_extract_shl:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpsllq $24, %ymm0, %ymm1
|
||||
; X86-NEXT: vpsrlq $39, %ymm0, %ymm0
|
||||
; X86-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: no_extract_shl:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpsllq $24, %ymm0, %ymm1
|
||||
; X64-NEXT: vpsrlq $39, %ymm0, %ymm0
|
||||
; X64-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%lhs_mul = shl <4 x i64> %i, <i64 11, i64 11, i64 11, i64 11>
|
||||
%rhs_mul = shl <4 x i64> %i, <i64 24, i64 24, i64 24, i64 24>
|
||||
%lhs_shift = lshr <4 x i64> %lhs_mul, <i64 50, i64 50, i64 50, i64 50>
|
||||
|
|
|
@ -135,21 +135,21 @@ define i64 @no_extract_shl(i64 %i) nounwind {
|
|||
; X86-LABEL: no_extract_shl:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl %edx, %eax
|
||||
; X86-NEXT: shll $5, %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %eax, %edx
|
||||
; X86-NEXT: shldl $10, %ecx, %edx
|
||||
; X86-NEXT: shll $10, %ecx
|
||||
; X86-NEXT: shrl $25, %eax
|
||||
; X86-NEXT: shrl $20, %eax
|
||||
; X86-NEXT: andl $127, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: no_extract_shl:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: shlq $5, %rax
|
||||
; X64-NEXT: shlq $10, %rdi
|
||||
; X64-NEXT: shrq $57, %rax
|
||||
; X64-NEXT: shlq $10, %rax
|
||||
; X64-NEXT: shrq $52, %rdi
|
||||
; X64-NEXT: andl $127, %edi
|
||||
; X64-NEXT: orq %rdi, %rax
|
||||
; X64-NEXT: retq
|
||||
%lhs_mul = shl i64 %i, 5
|
||||
|
|
|
@ -337,17 +337,25 @@ define i8 @test_i8_lshr_lshr_1(i8 %a0) {
|
|||
; X86-LABEL: test_i8_lshr_lshr_1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: shlb $3, %al
|
||||
; X86-NEXT: shrb $5, %al
|
||||
; X86-NEXT: shrb $2, %al
|
||||
; X86-NEXT: andb $7, %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_i8_lshr_lshr_1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-NEXT: leal (,%rdi,8), %eax
|
||||
; X64-NEXT: shrb $5, %al
|
||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: retq
|
||||
; X64-MASK-LABEL: test_i8_lshr_lshr_1:
|
||||
; X64-MASK: # %bb.0:
|
||||
; X64-MASK-NEXT: movl %edi, %eax
|
||||
; X64-MASK-NEXT: shrb $2, %al
|
||||
; X64-MASK-NEXT: andb $7, %al
|
||||
; X64-MASK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-MASK-NEXT: retq
|
||||
;
|
||||
; X64-SHIFT-LABEL: test_i8_lshr_lshr_1:
|
||||
; X64-SHIFT: # %bb.0:
|
||||
; X64-SHIFT-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-SHIFT-NEXT: leal (,%rdi,8), %eax
|
||||
; X64-SHIFT-NEXT: shrb $5, %al
|
||||
; X64-SHIFT-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-SHIFT-NEXT: retq
|
||||
%1 = shl i8 %a0, 3
|
||||
%2 = lshr i8 %1, 5
|
||||
ret i8 %2
|
||||
|
@ -357,17 +365,25 @@ define i8 @test_i8_lshr_lshr_2(i8 %a0) {
|
|||
; X86-LABEL: test_i8_lshr_lshr_2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: shlb $5, %al
|
||||
; X86-NEXT: shrb $3, %al
|
||||
; X86-NEXT: shlb $2, %al
|
||||
; X86-NEXT: andb $28, %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_i8_lshr_lshr_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shlb $5, %al
|
||||
; X64-NEXT: shrb $3, %al
|
||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: retq
|
||||
; X64-MASK-LABEL: test_i8_lshr_lshr_2:
|
||||
; X64-MASK: # %bb.0:
|
||||
; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-MASK-NEXT: leal (,%rdi,4), %eax
|
||||
; X64-MASK-NEXT: andb $28, %al
|
||||
; X64-MASK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-MASK-NEXT: retq
|
||||
;
|
||||
; X64-SHIFT-LABEL: test_i8_lshr_lshr_2:
|
||||
; X64-SHIFT: # %bb.0:
|
||||
; X64-SHIFT-NEXT: movl %edi, %eax
|
||||
; X64-SHIFT-NEXT: shlb $5, %al
|
||||
; X64-SHIFT-NEXT: shrb $3, %al
|
||||
; X64-SHIFT-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-SHIFT-NEXT: retq
|
||||
%1 = shl i8 %a0, 5
|
||||
%2 = lshr i8 %1, 3
|
||||
ret i8 %2
|
||||
|
@ -476,16 +492,23 @@ define i32 @test_i32_lshr_lshr_1(i32 %a0) {
|
|||
; X86-LABEL: test_i32_lshr_lshr_1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: shll $3, %eax
|
||||
; X86-NEXT: shrl $5, %eax
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_i32_lshr_lshr_1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-NEXT: leal (,%rdi,8), %eax
|
||||
; X64-NEXT: shrl $5, %eax
|
||||
; X64-NEXT: retq
|
||||
; X64-MASK-LABEL: test_i32_lshr_lshr_1:
|
||||
; X64-MASK: # %bb.0:
|
||||
; X64-MASK-NEXT: movl %edi, %eax
|
||||
; X64-MASK-NEXT: shrl $2, %eax
|
||||
; X64-MASK-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF
|
||||
; X64-MASK-NEXT: retq
|
||||
;
|
||||
; X64-SHIFT-LABEL: test_i32_lshr_lshr_1:
|
||||
; X64-SHIFT: # %bb.0:
|
||||
; X64-SHIFT-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-SHIFT-NEXT: leal (,%rdi,8), %eax
|
||||
; X64-SHIFT-NEXT: shrl $5, %eax
|
||||
; X64-SHIFT-NEXT: retq
|
||||
%1 = shl i32 %a0, 3
|
||||
%2 = lshr i32 %1, 5
|
||||
ret i32 %2
|
||||
|
@ -495,16 +518,23 @@ define i32 @test_i32_lshr_lshr_2(i32 %a0) {
|
|||
; X86-LABEL: test_i32_lshr_lshr_2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: shll $5, %eax
|
||||
; X86-NEXT: shrl $3, %eax
|
||||
; X86-NEXT: shll $2, %eax
|
||||
; X86-NEXT: andl $536870908, %eax # imm = 0x1FFFFFFC
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_i32_lshr_lshr_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shll $5, %eax
|
||||
; X64-NEXT: shrl $3, %eax
|
||||
; X64-NEXT: retq
|
||||
; X64-MASK-LABEL: test_i32_lshr_lshr_2:
|
||||
; X64-MASK: # %bb.0:
|
||||
; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-MASK-NEXT: leal (,%rdi,4), %eax
|
||||
; X64-MASK-NEXT: andl $536870908, %eax # imm = 0x1FFFFFFC
|
||||
; X64-MASK-NEXT: retq
|
||||
;
|
||||
; X64-SHIFT-LABEL: test_i32_lshr_lshr_2:
|
||||
; X64-SHIFT: # %bb.0:
|
||||
; X64-SHIFT-NEXT: movl %edi, %eax
|
||||
; X64-SHIFT-NEXT: shll $5, %eax
|
||||
; X64-SHIFT-NEXT: shrl $3, %eax
|
||||
; X64-SHIFT-NEXT: retq
|
||||
%1 = shl i32 %a0, 5
|
||||
%2 = lshr i32 %1, 3
|
||||
ret i32 %2
|
||||
|
@ -556,17 +586,23 @@ define i64 @test_i64_lshr_lshr_1(i64 %a0) {
|
|||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: shldl $3, %eax, %edx
|
||||
; X86-NEXT: shll $3, %eax
|
||||
; X86-NEXT: shrdl $5, %edx, %eax
|
||||
; X86-NEXT: shrl $5, %edx
|
||||
; X86-NEXT: shrdl $2, %edx, %eax
|
||||
; X86-NEXT: shrl $2, %edx
|
||||
; X86-NEXT: andl $134217727, %edx # imm = 0x7FFFFFF
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_i64_lshr_lshr_1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: leaq (,%rdi,8), %rax
|
||||
; X64-NEXT: shrq $5, %rax
|
||||
; X64-NEXT: retq
|
||||
; X64-MASK-LABEL: test_i64_lshr_lshr_1:
|
||||
; X64-MASK: # %bb.0:
|
||||
; X64-MASK-NEXT: shrq $2, %rdi
|
||||
; X64-MASK-NEXT: movabsq $576460752303423487, %rax # imm = 0x7FFFFFFFFFFFFFF
|
||||
; X64-MASK-NEXT: andq %rdi, %rax
|
||||
; X64-MASK-NEXT: retq
|
||||
;
|
||||
; X64-SHIFT-LABEL: test_i64_lshr_lshr_1:
|
||||
; X64-SHIFT: # %bb.0:
|
||||
; X64-SHIFT-NEXT: leaq (,%rdi,8), %rax
|
||||
; X64-SHIFT-NEXT: shrq $5, %rax
|
||||
; X64-SHIFT-NEXT: retq
|
||||
%1 = shl i64 %a0, 3
|
||||
%2 = lshr i64 %1, 5
|
||||
ret i64 %2
|
||||
|
@ -577,20 +613,24 @@ define i64 @test_i64_lshr_lshr_2(i64 %a0) {
|
|||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: shldl $5, %eax, %edx
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: shll $5, %ecx
|
||||
; X86-NEXT: shrl $27, %eax
|
||||
; X86-NEXT: shldl $29, %ecx, %eax
|
||||
; X86-NEXT: shrl $3, %edx
|
||||
; X86-NEXT: shldl $2, %eax, %edx
|
||||
; X86-NEXT: shll $2, %eax
|
||||
; X86-NEXT: andl $536870911, %edx # imm = 0x1FFFFFFF
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_i64_lshr_lshr_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: shlq $5, %rax
|
||||
; X64-NEXT: shrq $3, %rax
|
||||
; X64-NEXT: retq
|
||||
; X64-MASK-LABEL: test_i64_lshr_lshr_2:
|
||||
; X64-MASK: # %bb.0:
|
||||
; X64-MASK-NEXT: leaq (,%rdi,4), %rcx
|
||||
; X64-MASK-NEXT: movabsq $2305843009213693948, %rax # imm = 0x1FFFFFFFFFFFFFFC
|
||||
; X64-MASK-NEXT: andq %rcx, %rax
|
||||
; X64-MASK-NEXT: retq
|
||||
;
|
||||
; X64-SHIFT-LABEL: test_i64_lshr_lshr_2:
|
||||
; X64-SHIFT: # %bb.0:
|
||||
; X64-SHIFT-NEXT: movq %rdi, %rax
|
||||
; X64-SHIFT-NEXT: shlq $5, %rax
|
||||
; X64-SHIFT-NEXT: shrq $3, %rax
|
||||
; X64-SHIFT-NEXT: retq
|
||||
%1 = shl i64 %a0, 5
|
||||
%2 = lshr i64 %1, 3
|
||||
ret i64 %2
|
||||
|
|
Loading…
Reference in New Issue