forked from OSchip/llvm-project
[X86] Preserve memory refs when folding loads into divides.
This is similar to what we already do for multiplies. Without this we can't unfold and hoist an invariant load. llvm-svn: 317732
This commit is contained in:
parent
55029d811f
commit
61f81f9637
|
@ -2885,11 +2885,15 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
||||||
if (foldedLoad) {
|
if (foldedLoad) {
|
||||||
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
|
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
|
||||||
InFlag };
|
InFlag };
|
||||||
SDNode *CNode =
|
MachineSDNode *CNode =
|
||||||
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
|
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
|
||||||
InFlag = SDValue(CNode, 1);
|
InFlag = SDValue(CNode, 1);
|
||||||
// Update the chain.
|
// Update the chain.
|
||||||
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
|
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
|
||||||
|
// Record the mem-refs
|
||||||
|
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
|
||||||
|
MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
|
||||||
|
CNode->setMemRefs(MemOp, MemOp + 1);
|
||||||
} else {
|
} else {
|
||||||
InFlag =
|
InFlag =
|
||||||
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
|
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
; REQUIRES: asserts
|
; REQUIRES: asserts
|
||||||
; RUN: llc -mcpu=haswell < %s -stats -O2 2>&1 | grep "4 machinelicm.*hoisted"
|
; RUN: llc -mcpu=haswell < %s -stats -O2 2>&1 | grep "7 machinelicm.*hoisted"
|
||||||
; For test:
|
; For test:
|
||||||
; 2 invariant loads, 1 for OBJC_SELECTOR_REFERENCES_
|
; 2 invariant loads, 1 for OBJC_SELECTOR_REFERENCES_
|
||||||
; and 1 for objc_msgSend from the GOT
|
; and 1 for objc_msgSend from the GOT
|
||||||
; For test_multi_def:
|
; For test_multi_def:
|
||||||
; 2 invariant load (full multiply, both loads should be hoisted.)
|
; 2 invariant load (full multiply, both loads should be hoisted.)
|
||||||
|
; For test_div_def:
|
||||||
|
; 2 invariant load (full divide, both loads should be hoisted.) 1 additional instruction for a zeroing edx that gets hoisted and then rematerialized.
|
||||||
|
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
target triple = "x86_64-apple-macosx10.7.2"
|
target triple = "x86_64-apple-macosx10.7.2"
|
||||||
|
@ -60,4 +62,30 @@ exit:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @test_div_def(i32* dereferenceable(8) %x1,
|
||||||
|
i32* dereferenceable(8) %x2,
|
||||||
|
i32* %y, i32 %count) nounwind {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.check:
|
||||||
|
%inc = add nsw i32 %i, 1
|
||||||
|
%done = icmp sge i32 %inc, %count
|
||||||
|
br i1 %done, label %exit, label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%i = phi i32 [ 0, %entry ], [ %inc, %for.check ]
|
||||||
|
%x1_load = load i32, i32* %x1, align 8, !invariant.load !0
|
||||||
|
%x2_load = load i32, i32* %x2, align 8, !invariant.load !0
|
||||||
|
%x_quot = udiv i32 %x1_load, %x2_load
|
||||||
|
%y_elem = getelementptr inbounds i32, i32* %y, i32 %i
|
||||||
|
%y_load = load i32, i32* %y_elem, align 8
|
||||||
|
%y_plus = add i32 %x_quot, %y_load
|
||||||
|
store i32 %y_plus, i32* %y_elem, align 8
|
||||||
|
br label %for.check
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
!0 = !{}
|
!0 = !{}
|
||||||
|
|
Loading…
Reference in New Issue