forked from OSchip/llvm-project
[CodeGen] Add support for multiple memory operands in MachineInstr::mayAlias
Summary: To support all targets, the mayAlias member function needs to support instructions with multiple operands. This revision also changes the order of the emitted instructions in some test cases. Reviewers: efriedma, hfinkel, craig.topper, dmgreen Reviewed By: efriedma Subscribers: MatzeB, dmgreen, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80161
This commit is contained in:
parent
689e616ed0
commit
7019cea26d
|
@ -1228,81 +1228,88 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
|
|||
if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
|
||||
return false;
|
||||
|
||||
// FIXME: Need to handle multiple memory operands to support all targets.
|
||||
if (!hasOneMemOperand() || !Other.hasOneMemOperand())
|
||||
if (memoperands_empty() || Other.memoperands_empty())
|
||||
return true;
|
||||
|
||||
MachineMemOperand *MMOa = *memoperands_begin();
|
||||
MachineMemOperand *MMOb = *Other.memoperands_begin();
|
||||
auto HasAlias = [&](const MachineMemOperand &MMOa,
|
||||
const MachineMemOperand &MMOb) {
|
||||
// The following interface to AA is fashioned after DAGCombiner::isAlias
|
||||
// and operates with MachineMemOperand offset with some important
|
||||
// assumptions:
|
||||
// - LLVM fundamentally assumes flat address spaces.
|
||||
// - MachineOperand offset can *only* result from legalization and
|
||||
// cannot affect queries other than the trivial case of overlap
|
||||
// checking.
|
||||
// - These offsets never wrap and never step outside
|
||||
// of allocated objects.
|
||||
// - There should never be any negative offsets here.
|
||||
//
|
||||
// FIXME: Modify API to hide this math from "user"
|
||||
// Even before we go to AA we can reason locally about some
|
||||
// memory objects. It can save compile time, and possibly catch some
|
||||
// corner cases not currently covered.
|
||||
|
||||
// The following interface to AA is fashioned after DAGCombiner::isAlias
|
||||
// and operates with MachineMemOperand offset with some important
|
||||
// assumptions:
|
||||
// - LLVM fundamentally assumes flat address spaces.
|
||||
// - MachineOperand offset can *only* result from legalization and
|
||||
// cannot affect queries other than the trivial case of overlap
|
||||
// checking.
|
||||
// - These offsets never wrap and never step outside
|
||||
// of allocated objects.
|
||||
// - There should never be any negative offsets here.
|
||||
//
|
||||
// FIXME: Modify API to hide this math from "user"
|
||||
// Even before we go to AA we can reason locally about some
|
||||
// memory objects. It can save compile time, and possibly catch some
|
||||
// corner cases not currently covered.
|
||||
int64_t OffsetA = MMOa.getOffset();
|
||||
int64_t OffsetB = MMOb.getOffset();
|
||||
int64_t MinOffset = std::min(OffsetA, OffsetB);
|
||||
|
||||
int64_t OffsetA = MMOa->getOffset();
|
||||
int64_t OffsetB = MMOb->getOffset();
|
||||
int64_t MinOffset = std::min(OffsetA, OffsetB);
|
||||
uint64_t WidthA = MMOa.getSize();
|
||||
uint64_t WidthB = MMOb.getSize();
|
||||
bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
|
||||
bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
|
||||
|
||||
uint64_t WidthA = MMOa->getSize();
|
||||
uint64_t WidthB = MMOb->getSize();
|
||||
bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
|
||||
bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
|
||||
const Value *ValA = MMOa.getValue();
|
||||
const Value *ValB = MMOb.getValue();
|
||||
bool SameVal = (ValA && ValB && (ValA == ValB));
|
||||
if (!SameVal) {
|
||||
const PseudoSourceValue *PSVa = MMOa.getPseudoValue();
|
||||
const PseudoSourceValue *PSVb = MMOb.getPseudoValue();
|
||||
if (PSVa && ValB && !PSVa->mayAlias(&MFI))
|
||||
return false;
|
||||
if (PSVb && ValA && !PSVb->mayAlias(&MFI))
|
||||
return false;
|
||||
if (PSVa && PSVb && (PSVa == PSVb))
|
||||
SameVal = true;
|
||||
}
|
||||
|
||||
const Value *ValA = MMOa->getValue();
|
||||
const Value *ValB = MMOb->getValue();
|
||||
bool SameVal = (ValA && ValB && (ValA == ValB));
|
||||
if (!SameVal) {
|
||||
const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
|
||||
const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
|
||||
if (PSVa && ValB && !PSVa->mayAlias(&MFI))
|
||||
return false;
|
||||
if (PSVb && ValA && !PSVb->mayAlias(&MFI))
|
||||
return false;
|
||||
if (PSVa && PSVb && (PSVa == PSVb))
|
||||
SameVal = true;
|
||||
}
|
||||
if (SameVal) {
|
||||
if (!KnownWidthA || !KnownWidthB)
|
||||
return true;
|
||||
int64_t MaxOffset = std::max(OffsetA, OffsetB);
|
||||
int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
|
||||
return (MinOffset + LowWidth > MaxOffset);
|
||||
}
|
||||
|
||||
if (SameVal) {
|
||||
if (!KnownWidthA || !KnownWidthB)
|
||||
if (!AA)
|
||||
return true;
|
||||
int64_t MaxOffset = std::max(OffsetA, OffsetB);
|
||||
int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
|
||||
return (MinOffset + LowWidth > MaxOffset);
|
||||
|
||||
if (!ValA || !ValB)
|
||||
return true;
|
||||
|
||||
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
|
||||
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
|
||||
|
||||
int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
|
||||
: MemoryLocation::UnknownSize;
|
||||
int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
|
||||
: MemoryLocation::UnknownSize;
|
||||
|
||||
AliasResult AAResult =
|
||||
AA->alias(MemoryLocation(ValA, OverlapA,
|
||||
UseTBAA ? MMOa.getAAInfo() : AAMDNodes()),
|
||||
MemoryLocation(ValB, OverlapB,
|
||||
UseTBAA ? MMOb.getAAInfo() : AAMDNodes()));
|
||||
|
||||
return (AAResult != NoAlias);
|
||||
};
|
||||
|
||||
for (auto &&MMOa : memoperands()) {
|
||||
for (auto &&MMOb : Other.memoperands()) {
|
||||
if (HasAlias(*MMOa, *MMOb))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!AA)
|
||||
return true;
|
||||
|
||||
if (!ValA || !ValB)
|
||||
return true;
|
||||
|
||||
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
|
||||
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
|
||||
|
||||
int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
|
||||
: MemoryLocation::UnknownSize;
|
||||
int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
|
||||
: MemoryLocation::UnknownSize;
|
||||
|
||||
AliasResult AAResult = AA->alias(
|
||||
MemoryLocation(ValA, OverlapA,
|
||||
UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
|
||||
MemoryLocation(ValB, OverlapB,
|
||||
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
|
||||
|
||||
return (AAResult != NoAlias);
|
||||
return false;
|
||||
}
|
||||
|
||||
/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
|
||||
|
|
|
@ -544,9 +544,14 @@ static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) {
|
|||
void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
|
||||
unsigned Latency) {
|
||||
if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
|
||||
LLVM_DEBUG(dbgs() << "Adding chain dependency\n from: " << *SUb->getInstr()
|
||||
<< " to: " << *SUa->getInstr());
|
||||
SDep Dep(SUa, SDep::MayAliasMem);
|
||||
Dep.setLatency(Latency);
|
||||
SUb->addPred(Dep);
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "Not adding chain dependency\n from: "
|
||||
<< *SUb->getInstr() << " to: " << *SUa->getInstr());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,11 +19,11 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg
|
|||
; A53-NEXT: mov x19, x8
|
||||
; A53-NEXT: mov w0, w1
|
||||
; A53-NEXT: mov w9, #256
|
||||
; A53-NEXT: stp x2, x3, [x8, #32]
|
||||
; A53-NEXT: mov x2, x8
|
||||
; A53-NEXT: str q0, [x19, #16]!
|
||||
; A53-NEXT: str w1, [x19]
|
||||
; A53-NEXT: mov w1, #4
|
||||
; A53-NEXT: stp x2, x3, [x8, #32]
|
||||
; A53-NEXT: mov x2, x8
|
||||
; A53-NEXT: str q0, [x8]
|
||||
; A53-NEXT: strh w9, [x8, #24]
|
||||
; A53-NEXT: str wzr, [x8, #20]
|
||||
|
|
|
@ -503,12 +503,12 @@ define void @conv_v8f16_to_i128( <8 x half> %a, i128* %store ) {
|
|||
; CHECK-NEXT: vmov.32 r3, d16[1]
|
||||
; CHECK-NEXT: vmov.32 r1, d16[0]
|
||||
; CHECK-NEXT: subs r12, r12, #1
|
||||
; CHECK-NEXT: str r12, [r0, #12]
|
||||
; CHECK-NEXT: sbcs r2, r2, #0
|
||||
; CHECK-NEXT: str r2, [r0, #8]
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: sbc r1, r1, #0
|
||||
; CHECK-NEXT: stm r0, {r1, r3}
|
||||
; CHECK-NEXT: str r2, [r0, #8]
|
||||
; CHECK-NEXT: str r12, [r0, #12]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
; CHECK: ********** MI Scheduling **********
|
||||
; We need second, post-ra scheduling to have VLDM instruction combined from single-loads
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK: VLDMDIA_UPD
|
||||
; CHECK: SU(1):{{.*}}VLDMDIA_UPD
|
||||
; CHECK: rdefs left
|
||||
; CHECK-NEXT: Latency : 6
|
||||
; CHECK: Successors:
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
; We need second, post-ra scheduling to have VSTM instruction combined from single-stores
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK: schedule starting
|
||||
; CHECK: VSTMDIA_UPD
|
||||
; CHECK: SU(2):{{.*}}VSTMDIA_UPD
|
||||
; CHECK: rdefs left
|
||||
; CHECK-NEXT: Latency : 4
|
||||
; CHECK: Successors:
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
; We need second, post-ra scheduling to have VSTM instruction combined from single-stores
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK: schedule starting
|
||||
; CHECK: VSTMDIA
|
||||
; CHECK: SU(3):{{.*}}VSTMDIA
|
||||
; CHECK: rdefs left
|
||||
; CHECK-NEXT: Latency : 2
|
||||
|
||||
|
|
|
@ -1092,6 +1092,7 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
|
|||
; CHECK-NEXT: ldrd lr, r10, [r12, #24]
|
||||
; CHECK-NEXT: vstrb.8 q0, [r11], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r8], #32
|
||||
; CHECK-NEXT: strd r11, r1, [sp, #24] @ 8-byte Folded Spill
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r8, #-28]
|
||||
; CHECK-NEXT: vmul.f32 q0, q0, r0
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r8, #-24]
|
||||
|
@ -1103,13 +1104,12 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
|
|||
; CHECK-NEXT: vfma.f32 q0, q4, r6
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r8, #-8]
|
||||
; CHECK-NEXT: vfma.f32 q0, q5, r5
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r8, #-4]
|
||||
; CHECK-NEXT: vfma.f32 q0, q2, r3
|
||||
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: vfma.f32 q0, q2, r3
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r8, #-4]
|
||||
; CHECK-NEXT: vfma.f32 q0, q3, lr
|
||||
; CHECK-NEXT: strd r11, r1, [sp, #24] @ 8-byte Folded Spill
|
||||
; CHECK-NEXT: vfma.f32 q0, q1, r10
|
||||
; CHECK-NEXT: cmp r0, #16
|
||||
; CHECK-NEXT: vfma.f32 q0, q1, r10
|
||||
; CHECK-NEXT: blo .LBB16_7
|
||||
; CHECK-NEXT: @ %bb.5: @ %for.body.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
|
|
|
@ -168,16 +168,14 @@ define dso_local i32 @e() #0 {
|
|||
; CHECK-NEXT: vmov q1, q4
|
||||
; CHECK-NEXT: vmov s1, r7
|
||||
; CHECK-NEXT: vmov.32 q1[1], r6
|
||||
; CHECK-NEXT: mov.w r10, #0
|
||||
; CHECK-NEXT: vmov.32 q1[2], r5
|
||||
; CHECK-NEXT: vmov.32 q5[0], r7
|
||||
; CHECK-NEXT: vmov.32 q1[2], r5
|
||||
; CHECK-NEXT: vmov s9, r4
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: strd r0, r10, [sp, #24]
|
||||
; CHECK-NEXT: vdup.32 q6, r7
|
||||
; CHECK-NEXT: vstrw.32 q1, [sp, #76]
|
||||
; CHECK-NEXT: vmov q1, q5
|
||||
; CHECK-NEXT: vmov s9, r4
|
||||
; CHECK-NEXT: vmov.32 q1[1], r7
|
||||
; CHECK-NEXT: vdup.32 q6, r7
|
||||
; CHECK-NEXT: vmov.f32 s2, s1
|
||||
; CHECK-NEXT: vmov.f32 s8, s0
|
||||
; CHECK-NEXT: vmov.32 q1[2], r6
|
||||
|
@ -185,6 +183,7 @@ define dso_local i32 @e() #0 {
|
|||
; CHECK-NEXT: vmov q7, q6
|
||||
; CHECK-NEXT: vmov.f32 s10, s1
|
||||
; CHECK-NEXT: mov.w r8, #4
|
||||
; CHECK-NEXT: mov.w r10, #0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: vmov.32 q3[0], r4
|
||||
; CHECK-NEXT: vmov.32 q7[1], r4
|
||||
|
@ -192,6 +191,7 @@ define dso_local i32 @e() #0 {
|
|||
; CHECK-NEXT: vmov.f32 s11, s3
|
||||
; CHECK-NEXT: movs r1, #64
|
||||
; CHECK-NEXT: strh.w r8, [sp, #390]
|
||||
; CHECK-NEXT: strd r0, r10, [sp, #24]
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #44]
|
||||
; CHECK-NEXT: str r0, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q2, [r0]
|
||||
|
|
|
@ -24,8 +24,8 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) {
|
|||
; CHECK-NEXT: vmov.f32 s9, s6
|
||||
; CHECK-NEXT: vmov.f32 s10, s0
|
||||
; CHECK-NEXT: vmov.f32 s11, s5
|
||||
; CHECK-NEXT: strd r2, r0, [r1, #16]
|
||||
; CHECK-NEXT: vstrw.32 q2, [r1]
|
||||
; CHECK-NEXT: strd r2, r0, [r1, #16]
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <2 x i32>, <2 x i32>* %src, i32 0
|
||||
|
|
|
@ -8,17 +8,17 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
|
|||
; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; THUMBV7-NEXT: .pad #44
|
||||
; THUMBV7-NEXT: sub sp, #44
|
||||
; THUMBV7-NEXT: ldrd r4, r7, [sp, #88]
|
||||
; THUMBV7-NEXT: mov r5, r3
|
||||
; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
|
||||
; THUMBV7-NEXT: movs r0, #0
|
||||
; THUMBV7-NEXT: strd r4, r7, [sp]
|
||||
; THUMBV7-NEXT: mov r1, r3
|
||||
; THUMBV7-NEXT: ldrd r4, r7, [sp, #88]
|
||||
; THUMBV7-NEXT: mov r5, r3
|
||||
; THUMBV7-NEXT: strd r0, r0, [sp, #8]
|
||||
; THUMBV7-NEXT: mov r1, r3
|
||||
; THUMBV7-NEXT: mov r6, r2
|
||||
; THUMBV7-NEXT: mov r0, r2
|
||||
; THUMBV7-NEXT: movs r2, #0
|
||||
; THUMBV7-NEXT: movs r3, #0
|
||||
; THUMBV7-NEXT: strd r4, r7, [sp]
|
||||
; THUMBV7-NEXT: bl __multi3
|
||||
; THUMBV7-NEXT: strd r1, r0, [sp, #32] @ 8-byte Folded Spill
|
||||
; THUMBV7-NEXT: strd r3, r2, [sp, #24] @ 8-byte Folded Spill
|
||||
|
|
|
@ -0,0 +1,144 @@
|
|||
# RUN: llc -mtriple=i686-- -o - -run-pass=machine-scheduler -debug %s 2>&1 | FileCheck %s
|
||||
# REQUIRES: asserts
|
||||
|
||||
--- |
|
||||
%struct.Macroblock.0.1.2.3.6.17 = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock.0.1.2.3.6.17*, %struct.Macroblock.0.1.2.3.6.17*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
|
||||
|
||||
define void @stepsystem(i32 %x) {
|
||||
entry:
|
||||
%0 = load i32, i32* undef, align 8
|
||||
%inc = add i32 %x, 1
|
||||
store i32 %inc, i32* undef, align 8
|
||||
store <2 x double> <double 0xD47D42AEA2879F2E, double 0xD47D42AEA2879F2E>, <2 x double>* undef, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @dct_chroma() {
|
||||
cond_true2732.preheader:
|
||||
%tmp2666 = getelementptr %struct.Macroblock.0.1.2.3.6.17, %struct.Macroblock.0.1.2.3.6.17* null, i32 0, i32 13
|
||||
%tmp2667.us.us = load i64, i64* %tmp2666, align 4
|
||||
%tmp2670.us.us = load i64, i64* null, align 4
|
||||
%tmp2675.us.us = shl i64 %tmp2670.us.us, 0
|
||||
%tmp2675not.us.us = xor i64 %tmp2675.us.us, -1
|
||||
%tmp2676.us.us = and i64 %tmp2667.us.us, %tmp2675not.us.us
|
||||
store i64 %tmp2676.us.us, i64* %tmp2666, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
...
|
||||
---
|
||||
name: stepsystem
|
||||
alignment: 16
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers:
|
||||
- { id: 0, class: gr32, preferred-register: '' }
|
||||
- { id: 1, class: gr32, preferred-register: '' }
|
||||
- { id: 2, class: gr32, preferred-register: '' }
|
||||
- { id: 3, class: gr32, preferred-register: '' }
|
||||
- { id: 4, class: gr32, preferred-register: '' }
|
||||
liveins: []
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 4
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
- { id: 0, type: default, offset: 0, size: 4, alignment: 4, stack-id: default,
|
||||
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
stack: []
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%1:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0)
|
||||
%1:gr32 = INC32r %1, implicit-def dead $eflags
|
||||
MOV32mr undef %2:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `i32* undef`, align 8)
|
||||
MOV32mi undef %3:gr32, 1, $noreg, 0, $noreg, -729988434 :: (store 4 into `<2 x double>* undef` + 12)
|
||||
MOV32mi undef %4:gr32, 1, $noreg, 0, $noreg, -1568170194 :: (store 4 into `<2 x double>* undef` + 8, align 8)
|
||||
RET 0
|
||||
|
||||
# CHECK-LABEL: stepsystem
|
||||
# CHECK: Not adding chain dependency{{[[:space:]]*}}from: MOV32mi {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to: MOV32mi {{.*}} :: (store 4 {{.*}})
|
||||
# CHECK: Adding chain dependency{{[[:space:]]*}}from: MOV32mi {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to: MOV32mr {{.*}} :: (store 4 {{.*}})
|
||||
...
|
||||
---
|
||||
name: dct_chroma
|
||||
alignment: 16
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers:
|
||||
- { id: 0, class: gr32, preferred-register: '' }
|
||||
- { id: 1, class: gr32, preferred-register: '' }
|
||||
- { id: 2, class: gr32, preferred-register: '' }
|
||||
- { id: 3, class: gr32, preferred-register: '' }
|
||||
- { id: 4, class: gr32, preferred-register: '' }
|
||||
liveins: []
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 1
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack: []
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.cond_true2732.preheader:
|
||||
%4:gr32 = MOV32rm $noreg, 1, $noreg, 0, $noreg :: (load 4 from `i64* null`)
|
||||
%2:gr32 = MOV32rm $noreg, 1, $noreg, 4, $noreg :: (load 4 from `i64* null` + 4)
|
||||
%2:gr32 = NOT32r %2
|
||||
%4:gr32 = NOT32r %4
|
||||
%4:gr32 = AND32rm %4, $noreg, 1, $noreg, 356, $noreg, implicit-def dead $eflags :: (load 4 from %ir.tmp2666)
|
||||
AND32mr $noreg, 1, $noreg, 360, $noreg, %2, implicit-def dead $eflags :: (store 4 into %ir.tmp2666 + 4), (load 4 from %ir.tmp2666 + 4)
|
||||
MOV32mr $noreg, 1, $noreg, 356, $noreg, %4 :: (store 4 into %ir.tmp2666)
|
||||
RET 0
|
||||
|
||||
# Chain dependencies should not be systematically added when at least one of
|
||||
# the instructions has more than one memory operand. It should only be added
|
||||
# where it would be needed.
|
||||
# CHECK-LABEL: dct_chroma
|
||||
# CHECK: Not adding chain dependency{{[[:space:]]*}}from: MOV32mr {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to: AND32mr {{.*}} :: (store 4 {{.*}}), (load 4 {{.*}})
|
||||
# CHECK: Adding chain dependency{{[[:space:]]*}}from: AND32mr {{.*}} :: (store 4 {{.*}}), (load 4 {{.*}}){{[[:space:]]*}}to: %{{.*}} = MOV32rm {{.*}} :: (load 4 {{.*}})
|
||||
|
|
@ -17,13 +17,12 @@ cond_true2732.preheader: ; preds = %entry
|
|||
store i64 %tmp2676.us.us, i64* %tmp2666
|
||||
ret i32 0
|
||||
|
||||
; INTEL: and {{e..}}, dword ptr [356]
|
||||
; INTEL: and dword ptr [360], {{e..}}
|
||||
; FIXME: mov dword ptr [356], {{e..}}
|
||||
; The above line comes out as 'mov 360, eax', but when the register is ecx it works?
|
||||
; INTEL: and {{e..}}, dword ptr [356]
|
||||
; INTEL: mov dword ptr [356], {{e..}}
|
||||
|
||||
; ATT: andl 356, %{{e..}}
|
||||
; ATT: andl %{{e..}}, 360
|
||||
; ATT: andl 356, %{{e..}}
|
||||
; ATT: movl %{{e..}}, 356
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue