[SelectionDAG][DAGCombiner] Reuse exist node by reassociate

When already have (op N0, N2), reassociate (op (op N0, N1), N2) to (op (op N0, N2), N1) to reuse the exist (op N0, N2)

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D122539
This commit is contained in:
chenglin.bi 2022-06-24 23:14:20 +08:00
parent 186bea3750
commit 8c74205642
6 changed files with 333 additions and 232 deletions

View File

@ -1008,33 +1008,62 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
// (load/store (add, (add, x, offset1), offset2)) ->
// (load/store (add, x, offset1+offset2)).
// (load/store (add, (add, x, y), offset2)) ->
// (load/store (add, (add, x, offset2), y)).
if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
return false;
if (N0.hasOneUse())
return false;
auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N1);
if (!C1 || !C2)
if (!C2)
return false;
const APInt &C1APIntVal = C1->getAPIntValue();
const APInt &C2APIntVal = C2->getAPIntValue();
if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
if (C2APIntVal.getSignificantBits() > 64)
return false;
const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
if (CombinedValueIntVal.getBitWidth() > 64)
return false;
const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
if (N0.hasOneUse())
return false;
for (SDNode *Node : N->uses()) {
auto LoadStore = dyn_cast<MemSDNode>(Node);
if (LoadStore) {
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
const APInt &C1APIntVal = C1->getAPIntValue();
const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
if (CombinedValueIntVal.getSignificantBits() > 64)
return false;
const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
for (SDNode *Node : N->uses()) {
if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
EVT VT = LoadStore->getMemoryVT();
unsigned AS = LoadStore->getAddressSpace();
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
continue;
// Would x[offset1+offset2] still be a legal addressing mode?
AM.BaseOffs = CombinedValue;
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
return true;
}
}
} else {
if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
return false;
for (SDNode *Node : N->uses()) {
auto *LoadStore = dyn_cast<MemSDNode>(Node);
if (!LoadStore)
return false;
// Is x[offset2] a legal addressing mode? If so then
// reassociating the constants breaks address pattern
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
@ -1042,13 +1071,9 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
unsigned AS = LoadStore->getAddressSpace();
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
continue;
// Would x[offset1+offset2] still be a legal addressing mode?
AM.BaseOffs = CombinedValue;
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
return true;
return false;
}
return true;
}
return false;
@ -1099,6 +1124,28 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
return N00;
}
if (TLI.isReassocProfitable(DAG, N0, N1)) {
if (N1 != N01) {
// Reassociate if (op N00, N1) already exist
if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
// if Op (Op N00, N1), N01 already exist
// we need to stop reassciate to avoid dead loop
if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
}
}
if (N1 != N00) {
// Reassociate if (op N01, N1) already exist
if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
// if Op (Op N01, N1), N00 already exist
// we need to stop reassciate to avoid dead loop
if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
}
}
}
return SDValue();
}

View File

@ -5,17 +5,15 @@
define amdgpu_ps float @xor3_i1_const(float inreg %arg1, i32 inreg %arg2) {
; GCN-LABEL: xor3_i1_const:
; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000
; GCN-NEXT: s_mov_b32 m0, s1
; GCN-NEXT: v_cmp_lt_f32_e64 s[2:3], s0, 0
; GCN-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000
; GCN-NEXT: v_cmp_nlt_f32_e64 s[2:3], s0, 0
; GCN-NEXT: v_interp_p2_f32 v0, v0, attr0.x
; GCN-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v1
; GCN-NEXT: v_cmp_gt_f32_e64 s[0:1], 0, v0
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], -1
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GCN-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3]
; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], vcc
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], s[0:1]
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
; GCN-NEXT: ; return to shader part epilog

View File

@ -0,0 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s
target triple = "wasm32-unknown-unknown"
define i32 @reassociate_xor(float %x, float %y) {
; CHECK-LABEL: reassociate_xor:
; CHECK: .functype reassociate_xor (f32, f32) -> (i32)
; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: local.set 2
; CHECK-NEXT: block
; CHECK-NEXT: local.get 1
; CHECK-NEXT: f32.const 0x1p-23
; CHECK-NEXT: f32.le
; CHECK-NEXT: local.get 0
; CHECK-NEXT: f32.const 0x1p-23
; CHECK-NEXT: f32.gt
; CHECK-NEXT: i32.ne
; CHECK-NEXT: br_if 0 # 0: down to label0
; CHECK-NEXT: # %bb.1: # %if.then.i
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.set 2
; CHECK-NEXT: .LBB0_2: # %if.end.i
; CHECK-NEXT: end_block # label0:
; CHECK-NEXT: local.get 2
; CHECK-NEXT: # fallthrough-return
entry: ; preds = %if.then, %entry
%cmp0 = fcmp ule float %x, 0x3E80000000000000
%cmp1 = fcmp ugt float %y, 0x3E80000000000000
%cmp2 = xor i1 %cmp0, %cmp1
br i1 %cmp2, label %if.end.i, label %if.then.i
if.then.i: ; preds = %if.end
br label %if.end.i
if.end.i: ; preds = %if.then.i, %if.end
%s = phi i32 [ 1, %entry ], [ 0, %if.then.i ]
ret i32 %s
}

View File

@ -12,222 +12,222 @@ define fastcc i64 @foo() nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movq X(%rip), %r8
; CHECK-NEXT: movq X(%rip), %r10
; CHECK-NEXT: movq X(%rip), %r9
; CHECK-NEXT: movq X(%rip), %r12
; CHECK-NEXT: movq X(%rip), %r15
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: movq X(%rip), %rdx
; CHECK-NEXT: movq X(%rip), %r12
; CHECK-NEXT: movq X(%rip), %r14
; CHECK-NEXT: movq X(%rip), %r11
; CHECK-NEXT: movq X(%rip), %rdx
; CHECK-NEXT: addq %r15, %rdx
; CHECK-NEXT: movq X(%rip), %rsi
; CHECK-NEXT: bswapq %rsi
; CHECK-NEXT: leaq (%r11,%r14), %rbx
; CHECK-NEXT: addq %r15, %rbx
; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: addq %rsi, %rbx
; CHECK-NEXT: leaq (%r9,%r10), %rdx
; CHECK-NEXT: addq %rdx, %rdx
; CHECK-NEXT: addq %r8, %rdx
; CHECK-NEXT: movq X(%rip), %rdi
; CHECK-NEXT: addq %rbx, %r12
; CHECK-NEXT: addq %r8, %rdx
; CHECK-NEXT: addq %rbx, %rdx
; CHECK-NEXT: bswapq %rdi
; CHECK-NEXT: leaq (%r15,%r14), %rsi
; CHECK-NEXT: addq %r12, %rsi
; CHECK-NEXT: addq %r11, %rdi
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: leaq (%r10,%r8), %rsi
; CHECK-NEXT: addq %rsi, %rsi
; CHECK-NEXT: addq %rdx, %rsi
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %r12, %rdi
; CHECK-NEXT: addq %rdi, %r9
; CHECK-NEXT: addq %rdx, %rsi
; CHECK-NEXT: addq %rdi, %rsi
; CHECK-NEXT: bswapq %rbx
; CHECK-NEXT: leaq (%r12,%r15), %rdi
; CHECK-NEXT: addq %r9, %rdi
; CHECK-NEXT: addq %r14, %rbx
; CHECK-NEXT: addq %rdi, %rbx
; CHECK-NEXT: leaq (%rdx,%r8), %rdi
; CHECK-NEXT: addq %rdi, %rdi
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: movq X(%rip), %rcx
; CHECK-NEXT: addq %r9, %rbx
; CHECK-NEXT: addq %rbx, %r10
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: addq %rbx, %rdi
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: bswapq %rcx
; CHECK-NEXT: leaq (%r9,%r12), %rax
; CHECK-NEXT: addq %r10, %rax
; CHECK-NEXT: addq %r15, %rcx
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: leaq (%rsi,%rdx), %r11
; CHECK-NEXT: addq %r11, %r11
; CHECK-NEXT: addq %rdi, %r11
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %r10, %rcx
; CHECK-NEXT: addq %rcx, %r8
; CHECK-NEXT: addq %rdi, %r11
; CHECK-NEXT: addq %rcx, %r11
; CHECK-NEXT: bswapq %rbx
; CHECK-NEXT: leaq (%r10,%r9), %rcx
; CHECK-NEXT: addq %r8, %rcx
; CHECK-NEXT: addq %r12, %rbx
; CHECK-NEXT: addq %rcx, %rbx
; CHECK-NEXT: leaq (%rdi,%rsi), %r14
; CHECK-NEXT: addq %r14, %r14
; CHECK-NEXT: addq %r11, %r14
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: addq %r8, %rbx
; CHECK-NEXT: addq %rbx, %rdx
; CHECK-NEXT: addq %r11, %r14
; CHECK-NEXT: addq %rbx, %r14
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: leaq (%r8,%r10), %rbx
; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: addq %r9, %rax
; CHECK-NEXT: addq %rbx, %rax
; CHECK-NEXT: leaq (%r11,%rdi), %r9
; CHECK-NEXT: addq %r9, %r9
; CHECK-NEXT: addq %r14, %r9
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: addq %rax, %rsi
; CHECK-NEXT: addq %r14, %r9
; CHECK-NEXT: addq %rax, %r9
; CHECK-NEXT: bswapq %rbx
; CHECK-NEXT: leaq (%rdx,%r8), %rax
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: addq %r10, %rbx
; CHECK-NEXT: addq %rax, %rbx
; CHECK-NEXT: leaq (%r14,%r11), %r10
; CHECK-NEXT: leaq (%r11,%r14), %rsi
; CHECK-NEXT: addq %r12, %rsi
; CHECK-NEXT: addq %rdi, %rsi
; CHECK-NEXT: addq %rcx, %rsi
; CHECK-NEXT: leaq (%r15,%r9), %r8
; CHECK-NEXT: leaq (%r8,%rax), %r10
; CHECK-NEXT: addq %rsi, %rdx
; CHECK-NEXT: addq %r10, %r10
; CHECK-NEXT: addq %r9, %r10
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: addq %rsi, %rbx
; CHECK-NEXT: addq %rbx, %rdi
; CHECK-NEXT: addq %r9, %r10
; CHECK-NEXT: addq %rbx, %r10
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: leaq (%rsi,%rdx), %rbx
; CHECK-NEXT: addq %rdi, %rbx
; CHECK-NEXT: addq %r8, %rax
; CHECK-NEXT: addq %rbx, %rax
; CHECK-NEXT: leaq (%r9,%r14), %r8
; CHECK-NEXT: bswapq %rbx
; CHECK-NEXT: addq %rsi, %r10
; CHECK-NEXT: addq %r11, %rbx
; CHECK-NEXT: leaq (%r12,%r14), %rcx
; CHECK-NEXT: addq %rdx, %rcx
; CHECK-NEXT: addq %rcx, %rbx
; CHECK-NEXT: addq %r8, %r8
; CHECK-NEXT: addq %r10, %r8
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %rdi, %rax
; CHECK-NEXT: addq %rax, %r11
; CHECK-NEXT: addq %r10, %r8
; CHECK-NEXT: addq %rax, %r8
; CHECK-NEXT: bswapq %rbx
; CHECK-NEXT: leaq (%rdi,%rsi), %rax
; CHECK-NEXT: addq %r11, %rax
; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: addq %rax, %rbx
; CHECK-NEXT: leaq (%r10,%r9), %r15
; CHECK-NEXT: addq %r15, %r15
; CHECK-NEXT: addq %r8, %r15
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: addq %r11, %rbx
; CHECK-NEXT: addq %rbx, %r14
; CHECK-NEXT: addq %r8, %r15
; CHECK-NEXT: addq %rbx, %r15
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: leaq (%r11,%rdi), %rbx
; CHECK-NEXT: addq %r14, %rbx
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: addq %rbx, %rax
; CHECK-NEXT: leaq (%r8,%r10), %rsi
; CHECK-NEXT: addq %rsi, %rsi
; CHECK-NEXT: addq %r15, %rsi
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %r14, %rax
; CHECK-NEXT: addq %rax, %r9
; CHECK-NEXT: addq %r15, %rsi
; CHECK-NEXT: addq %rax, %rsi
; CHECK-NEXT: bswapq %rbx
; CHECK-NEXT: leaq (%r14,%r11), %rax
; CHECK-NEXT: addq %r9, %rax
; CHECK-NEXT: addq %rdi, %rbx
; CHECK-NEXT: addq %rax, %rbx
; CHECK-NEXT: leaq (%r15,%r8), %r12
; CHECK-NEXT: addq %r12, %r12
; CHECK-NEXT: addq %rsi, %r12
; CHECK-NEXT: movq X(%rip), %rcx
; CHECK-NEXT: addq %r9, %rbx
; CHECK-NEXT: addq %rbx, %r10
; CHECK-NEXT: addq %rsi, %r12
; CHECK-NEXT: addq %rbx, %r12
; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: addq %rbx, %rax
; CHECK-NEXT: addq %r10, %r8
; CHECK-NEXT: addq %rbx, %r8
; CHECK-NEXT: bswapq %rcx
; CHECK-NEXT: leaq (%r9,%r14), %rax
; CHECK-NEXT: addq %r10, %rax
; CHECK-NEXT: addq %r11, %rcx
; CHECK-NEXT: leaq (%rdx,%r12), %rsi
; CHECK-NEXT: addq %rax, %rsi
; CHECK-NEXT: addq %r14, %rcx
; CHECK-NEXT: addq %rsi, %rcx
; CHECK-NEXT: leaq (%r10,%r9), %rbx
; CHECK-NEXT: addq %rbx, %rbx
; CHECK-NEXT: addq %r8, %rbx
; CHECK-NEXT: movq X(%rip), %rdi
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: leaq (%rsi,%r15), %rax
; CHECK-NEXT: addq %rax, %rax
; CHECK-NEXT: addq %r12, %rax
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %rcx, %r15
; CHECK-NEXT: addq %r8, %rbx
; CHECK-NEXT: addq %rcx, %rbx
; CHECK-NEXT: bswapq %rdi
; CHECK-NEXT: leaq (%rax,%rdx), %rcx
; CHECK-NEXT: addq %r15, %rcx
; CHECK-NEXT: addq %r12, %rdi
; CHECK-NEXT: addq %rcx, %rdi
; CHECK-NEXT: leaq (%r8,%r10), %r12
; CHECK-NEXT: addq %r12, %r12
; CHECK-NEXT: addq %rbx, %r12
; CHECK-NEXT: movq X(%rip), %rcx
; CHECK-NEXT: addq %r15, %rdi
; CHECK-NEXT: addq %rdi, %r9
; CHECK-NEXT: addq %rbx, %r12
; CHECK-NEXT: addq %rdi, %r12
; CHECK-NEXT: bswapq %rcx
; CHECK-NEXT: leaq (%r15,%rax), %rdi
; CHECK-NEXT: addq %r9, %rdi
; CHECK-NEXT: addq %rdx, %rcx
; CHECK-NEXT: addq %rdi, %rcx
; CHECK-NEXT: leaq (%rbx,%r8), %r13
; CHECK-NEXT: addq %r13, %r13
; CHECK-NEXT: addq %r12, %r13
; CHECK-NEXT: movq X(%rip), %rdx
; CHECK-NEXT: addq %r9, %rcx
; CHECK-NEXT: addq %rcx, %r10
; CHECK-NEXT: addq %r12, %r13
; CHECK-NEXT: addq %rcx, %r13
; CHECK-NEXT: bswapq %rdx
; CHECK-NEXT: leaq (%r9,%r15), %rcx
; CHECK-NEXT: addq %r10, %rcx
; CHECK-NEXT: addq %rcx, %r8
; CHECK-NEXT: addq %r12, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: bswapq %rbx
; CHECK-NEXT: addq %rax, %rdx
; CHECK-NEXT: addq %rcx, %rdx
; CHECK-NEXT: leaq (%r12,%rbx), %r14
; CHECK-NEXT: addq %r14, %r14
; CHECK-NEXT: addq %r13, %r14
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: addq %r10, %rdx
; CHECK-NEXT: addq %rdx, %r8
; CHECK-NEXT: addq %r13, %r14
; CHECK-NEXT: addq %rdx, %r14
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: leaq (%r10,%r9), %rcx
; CHECK-NEXT: addq %r8, %rcx
; CHECK-NEXT: addq %r14, %rbx
; CHECK-NEXT: addq %rcx, %rbx
; CHECK-NEXT: leaq (%r12,%rsi), %rcx
; CHECK-NEXT: addq %rcx, %rcx
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: movq X(%rip), %rdx
; CHECK-NEXT: addq %r8, %rbx
; CHECK-NEXT: addq %rbx, %r15
; CHECK-NEXT: addq %r15, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: leaq (%r13,%r12), %r11
; CHECK-NEXT: addq %r11, %r11
; CHECK-NEXT: addq %r14, %r11
; CHECK-NEXT: movq X(%rip), %rcx
; CHECK-NEXT: addq %r8, %rax
; CHECK-NEXT: addq %rax, %rbx
; CHECK-NEXT: addq %r14, %r11
; CHECK-NEXT: addq %rax, %r11
; CHECK-NEXT: bswapq %rcx
; CHECK-NEXT: leaq (%r8,%r10), %rax
; CHECK-NEXT: addq %rbx, %rax
; CHECK-NEXT: addq %r9, %rcx
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: leaq (%r14,%r13), %r9
; CHECK-NEXT: addq %r9, %r9
; CHECK-NEXT: addq %r11, %r9
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: addq %rbx, %rcx
; CHECK-NEXT: bswapq %rdx
; CHECK-NEXT: leaq (%r8,%r10), %rbx
; CHECK-NEXT: addq %r15, %rbx
; CHECK-NEXT: addq %r9, %rdx
; CHECK-NEXT: addq %rbx, %rdx
; CHECK-NEXT: leaq (%rax,%r12), %rbx
; CHECK-NEXT: addq %rbx, %rbx
; CHECK-NEXT: addq %rcx, %rbx
; CHECK-NEXT: addq %r15, %rdx
; CHECK-NEXT: addq %rdx, %rsi
; CHECK-NEXT: addq %rcx, %rbx
; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: movq X(%rip), %rdx
; CHECK-NEXT: bswapq %rdx
; CHECK-NEXT: addq %rcx, %r12
; CHECK-NEXT: addq %r11, %r9
; CHECK-NEXT: addq %rcx, %r9
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: leaq (%rbx,%r8), %rcx
; CHECK-NEXT: addq %r12, %rcx
; CHECK-NEXT: addq %r10, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: leaq (%r11,%r14), %r10
; CHECK-NEXT: addq %r10, %r10
; CHECK-NEXT: addq %r9, %r10
; CHECK-NEXT: movq X(%rip), %rsi
; CHECK-NEXT: addq %r12, %rax
; CHECK-NEXT: addq %rax, %r13
; CHECK-NEXT: addq %r9, %r10
; CHECK-NEXT: addq %rax, %r10
; CHECK-NEXT: bswapq %rsi
; CHECK-NEXT: leaq (%r12,%rbx), %rax
; CHECK-NEXT: addq %r13, %rax
; CHECK-NEXT: addq %r8, %rsi
; CHECK-NEXT: addq %rax, %rsi
; CHECK-NEXT: leaq (%r9,%r11), %rdx
; CHECK-NEXT: addq %rdx, %rdx
; CHECK-NEXT: addq %r10, %rdx
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: addq %r13, %rsi
; CHECK-NEXT: addq %rsi, %r14
; CHECK-NEXT: addq %r10, %rdx
; CHECK-NEXT: leaq (%r15,%r8), %rdi
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: addq %rdi, %rdx
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: addq %rcx, %rcx
; CHECK-NEXT: addq %rbx, %rcx
; CHECK-NEXT: addq %rbx, %rcx
; CHECK-NEXT: addq %rsi, %rdx
; CHECK-NEXT: addq %rdx, %r12
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: leaq (%r13,%r12), %rsi
; CHECK-NEXT: addq %r14, %rsi
; CHECK-NEXT: addq %rbx, %rax
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: leaq (%r10,%r9), %r8
; CHECK-NEXT: addq %r8, %r8
; CHECK-NEXT: addq %rdx, %r8
; CHECK-NEXT: movq X(%rip), %rsi
; CHECK-NEXT: addq %r14, %rax
; CHECK-NEXT: addq %rax, %r11
; CHECK-NEXT: addq %rdx, %r8
; CHECK-NEXT: addq %rax, %r8
; CHECK-NEXT: bswapq %rsi
; CHECK-NEXT: leaq (%r14,%r13), %rax
; CHECK-NEXT: addq %r11, %rax
; CHECK-NEXT: addq %r12, %rsi
; CHECK-NEXT: addq %rax, %rsi
; CHECK-NEXT: leaq (%rdx,%r10), %rax
; CHECK-NEXT: addq %rax, %rax
; CHECK-NEXT: addq %r8, %rax
; CHECK-NEXT: movq X(%rip), %rdi
; CHECK-NEXT: addq %r11, %rsi
; CHECK-NEXT: addq %rsi, %r9
; CHECK-NEXT: addq %r8, %rax
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: bswapq %rdi
; CHECK-NEXT: leaq (%r11,%r14), %rsi
; CHECK-NEXT: addq %r9, %rsi
; CHECK-NEXT: addq %r13, %rdi
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: leaq (%r8,%rdx), %rsi
; CHECK-NEXT: addq %rsi, %rsi
; CHECK-NEXT: addq %rax, %rsi
; CHECK-NEXT: movq X(%rip), %rcx
; CHECK-NEXT: addq %r9, %rdi
; CHECK-NEXT: addq %rdi, %r10
; CHECK-NEXT: addq %rax, %rsi
; CHECK-NEXT: addq %rdi, %rsi
; CHECK-NEXT: bswapq %rcx
; CHECK-NEXT: leaq (%r9,%r11), %rdi
; CHECK-NEXT: addq %r10, %rdi
; CHECK-NEXT: addq %r14, %rcx
; CHECK-NEXT: addq %rdi, %rcx
; CHECK-NEXT: leaq (%rax,%r8), %rdi
; CHECK-NEXT: addq %rdi, %rdi
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: addq %r10, %rcx
; CHECK-NEXT: addq %rcx, %rdx
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: addq %rcx, %rdi
; CHECK-NEXT: movq X(%rip), %rcx
; CHECK-NEXT: bswapq %rcx
; CHECK-NEXT: addq %r11, %rcx
; CHECK-NEXT: leaq (%r10,%r9), %rbx
; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: addq %rbx, %rcx
; CHECK-NEXT: addq %rax, %rsi
; CHECK-NEXT: addq %rsi, %rsi
; CHECK-NEXT: addq %rdi, %rsi
; CHECK-NEXT: addq %rdi, %rsi
; CHECK-NEXT: addq %rdx, %rcx
; CHECK-NEXT: addq %rcx, %r8
; CHECK-NEXT: addq %rcx, %rsi
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: addq %r15, %rsi
; CHECK-NEXT: addq %r10, %rdx
; CHECK-NEXT: movq %rax, X(%rip)
; CHECK-NEXT: addq %r9, %rax
; CHECK-NEXT: addq %r8, %rdx
; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: addq %r8, %rax
; CHECK-NEXT: addq %r12, %rsi
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: addq %r12, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: retq

View File

@ -162,33 +162,29 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
define i1 @ctpop_trunc_non_power2(i255 %x) nounwind {
; CHECK-LABEL: ctpop_trunc_non_power2:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movabsq $9223372036854775807, %r8 # imm = 0x7FFFFFFFFFFFFFFF
; CHECK-NEXT: movq %rcx, %r9
; CHECK-NEXT: andq %r8, %r9
; CHECK-NEXT: movq %rdi, %r11
; CHECK-NEXT: addq $-1, %r11
; CHECK-NEXT: movq %rsi, %r10
; CHECK-NEXT: adcq $-1, %r10
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: movq %rdi, %r10
; CHECK-NEXT: addq $-1, %r10
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: adcq $-1, %rax
; CHECK-NEXT: movq %rcx, %rbx
; CHECK-NEXT: adcq %r8, %rbx
; CHECK-NEXT: andq %rdi, %r11
; CHECK-NEXT: andq %rdx, %rax
; CHECK-NEXT: movq %rdx, %r11
; CHECK-NEXT: adcq $-1, %r11
; CHECK-NEXT: adcq %r8, %rcx
; CHECK-NEXT: andq %rdi, %r10
; CHECK-NEXT: andq %rdx, %r11
; CHECK-NEXT: orq %r10, %r11
; CHECK-NEXT: andq %r9, %rcx
; CHECK-NEXT: andq %rsi, %rax
; CHECK-NEXT: orq %rcx, %rax
; CHECK-NEXT: orq %r11, %rax
; CHECK-NEXT: andq %rsi, %r10
; CHECK-NEXT: andq %r8, %rbx
; CHECK-NEXT: andq %rcx, %rbx
; CHECK-NEXT: orq %r10, %rbx
; CHECK-NEXT: orq %rax, %rbx
; CHECK-NEXT: sete %cl
; CHECK-NEXT: orq %rdx, %rdi
; CHECK-NEXT: orq %rsi, %r9
; CHECK-NEXT: orq %rdi, %r9
; CHECK-NEXT: setne %al
; CHECK-NEXT: andb %cl, %al
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%a = call i255 @llvm.ctpop.i255(i255 %x)
%b = trunc i255 %a to i8 ; largest value from ctpop is 255, fits in 8 bits.

View File

@ -0,0 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-linux-generic < %s | FileCheck %s
define void @reassociation_gt64bit(i32 %x, i32 %y, ptr %s) {
; CHECK-LABEL: reassociation_gt64bit:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: movq %rcx, (%rdx)
; CHECK-NEXT: movw $64, 8(%rdx)
; CHECK-NEXT: retq
%zextx = zext i32 %x to i80
%zexty = zext i32 %y to i80
%add1 = add i80 %zextx, 1180591620717411303424
%add2 = add i80 %add1, %zexty
store i80 %add2, ptr %s
ret void
}