[MachineInstr] Add support for instructions with multiple memory operands.

- Basically iterate each pair of memory operands from both instructions
  and return true if any of them may alias.
- The exception are memory instructions without any memory operand. They
  may touch everything and could alias to any memory instruction.

Differential Revision: https://reviews.llvm.org/D89447
This commit is contained in:
Michael Liao 2020-10-12 10:01:40 -04:00
parent cb9d0e8819
commit 4b11201592
9 changed files with 114 additions and 85 deletions

View File

@ -1737,6 +1737,21 @@ public:
return 5;
}
/// Return the maximal number of alias checks on memory operands. For
/// instructions with more than one memory operands, the alias check on a
/// single MachineInstr pair has quadratic overhead and results in
/// unacceptable performance in the worst case. The limit here is to clamp
/// that maximal checks performed. Usually, that's the product of memory
/// operand numbers from that pair of MachineInstr to be checked. For
/// instance, with two MachineInstrs with 4 and 5 memory operands
/// correspondingly, a total of 20 checks are required. With this limit set to
/// 16, their alias check is skipped. We choose to limit the product instead
/// of the individual instruction as targets may have special MachineInstrs
/// with a considerably high number of memory operands, such as `ldm` in ARM.
/// Setting this limit per MachineInstr would result in either too high
/// overhead or too rigid restriction.
virtual unsigned getMemOperandAACheckLimit() const { return 16; }
/// Return an array that contains the ids of the target indices (used for the
/// TargetIndex machine operand) and their names.
///

View File

@ -1276,81 +1276,96 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
return false;
// FIXME: Need to handle multiple memory operands to support all targets.
if (!hasOneMemOperand() || !Other.hasOneMemOperand())
// Memory operations without memory operands may access anything. Be
// conservative and assume `MayAlias`.
if (memoperands_empty() || Other.memoperands_empty())
return true;
MachineMemOperand *MMOa = *memoperands_begin();
MachineMemOperand *MMOb = *Other.memoperands_begin();
// Skip if there are too many memory operands.
auto NumChecks = getNumMemOperands() * Other.getNumMemOperands();
if (NumChecks > TII->getMemOperandAACheckLimit())
return true;
// The following interface to AA is fashioned after DAGCombiner::isAlias
// and operates with MachineMemOperand offset with some important
// assumptions:
// - LLVM fundamentally assumes flat address spaces.
// - MachineOperand offset can *only* result from legalization and
// cannot affect queries other than the trivial case of overlap
// checking.
// - These offsets never wrap and never step outside
// of allocated objects.
// - There should never be any negative offsets here.
//
// FIXME: Modify API to hide this math from "user"
// Even before we go to AA we can reason locally about some
// memory objects. It can save compile time, and possibly catch some
// corner cases not currently covered.
auto HasAlias = [MFI, AA, UseTBAA](const MachineMemOperand *MMOa,
const MachineMemOperand *MMOb) {
// The following interface to AA is fashioned after DAGCombiner::isAlias
// and operates with MachineMemOperand offset with some important
// assumptions:
// - LLVM fundamentally assumes flat address spaces.
// - MachineOperand offset can *only* result from legalization and
// cannot affect queries other than the trivial case of overlap
// checking.
// - These offsets never wrap and never step outside
// of allocated objects.
// - There should never be any negative offsets here.
//
// FIXME: Modify API to hide this math from "user"
// Even before we go to AA we can reason locally about some
// memory objects. It can save compile time, and possibly catch some
// corner cases not currently covered.
int64_t OffsetA = MMOa->getOffset();
int64_t OffsetB = MMOb->getOffset();
int64_t MinOffset = std::min(OffsetA, OffsetB);
int64_t OffsetA = MMOa->getOffset();
int64_t OffsetB = MMOb->getOffset();
int64_t MinOffset = std::min(OffsetA, OffsetB);
uint64_t WidthA = MMOa->getSize();
uint64_t WidthB = MMOb->getSize();
bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
uint64_t WidthA = MMOa->getSize();
uint64_t WidthB = MMOb->getSize();
bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
const Value *ValA = MMOa->getValue();
const Value *ValB = MMOb->getValue();
bool SameVal = (ValA && ValB && (ValA == ValB));
if (!SameVal) {
const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
if (PSVa && ValB && !PSVa->mayAlias(&MFI))
return false;
if (PSVb && ValA && !PSVb->mayAlias(&MFI))
return false;
if (PSVa && PSVb && (PSVa == PSVb))
SameVal = true;
}
const Value *ValA = MMOa->getValue();
const Value *ValB = MMOb->getValue();
bool SameVal = (ValA && ValB && (ValA == ValB));
if (!SameVal) {
const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
if (PSVa && ValB && !PSVa->mayAlias(&MFI))
return false;
if (PSVb && ValA && !PSVb->mayAlias(&MFI))
return false;
if (PSVa && PSVb && (PSVa == PSVb))
SameVal = true;
}
if (SameVal) {
if (!KnownWidthA || !KnownWidthB)
if (SameVal) {
if (!KnownWidthA || !KnownWidthB)
return true;
int64_t MaxOffset = std::max(OffsetA, OffsetB);
int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
return (MinOffset + LowWidth > MaxOffset);
}
if (!AA)
return true;
int64_t MaxOffset = std::max(OffsetA, OffsetB);
int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
return (MinOffset + LowWidth > MaxOffset);
}
if (!AA)
return true;
if (!ValA || !ValB)
return true;
if (!ValA || !ValB)
return true;
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
: MemoryLocation::UnknownSize;
int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
: MemoryLocation::UnknownSize;
int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
: MemoryLocation::UnknownSize;
int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
: MemoryLocation::UnknownSize;
AliasResult AAResult =
AA->alias(MemoryLocation(ValA, OverlapA,
UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
MemoryLocation(ValB, OverlapB,
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
AliasResult AAResult = AA->alias(
MemoryLocation(ValA, OverlapA,
UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
MemoryLocation(ValB, OverlapB,
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
return (AAResult != NoAlias);
};
return (AAResult != NoAlias);
// Check each pair of memory operands from both instructions, which can't
// alias only if all pairs won't alias.
for (auto *MMOa : memoperands())
for (auto *MMOb : Other.memoperands())
if (HasAlias(MMOa, MMOb))
return true;
return false;
}
/// hasOrderedMemoryRef - Return true if this instruction may have an ordered

View File

@ -19,11 +19,11 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg
; A53-NEXT: mov x19, x8
; A53-NEXT: mov w0, w1
; A53-NEXT: mov w9, #256
; A53-NEXT: stp x2, x3, [x8, #32]
; A53-NEXT: mov x2, x8
; A53-NEXT: str q0, [x19, #16]!
; A53-NEXT: str w1, [x19]
; A53-NEXT: mov w1, #4
; A53-NEXT: stp x2, x3, [x8, #32]
; A53-NEXT: mov x2, x8
; A53-NEXT: str q0, [x8]
; A53-NEXT: strh w9, [x8, #24]
; A53-NEXT: str wzr, [x8, #20]

View File

@ -503,12 +503,12 @@ define void @conv_v8f16_to_i128( <8 x half> %a, i128* %store ) {
; CHECK-NEXT: vmov.32 r3, d16[1]
; CHECK-NEXT: vmov.32 r1, d16[0]
; CHECK-NEXT: subs r12, r12, #1
; CHECK-NEXT: str r12, [r0, #12]
; CHECK-NEXT: sbcs r2, r2, #0
; CHECK-NEXT: str r2, [r0, #8]
; CHECK-NEXT: sbcs r3, r3, #0
; CHECK-NEXT: sbc r1, r1, #0
; CHECK-NEXT: stm r0, {r1, r3}
; CHECK-NEXT: str r2, [r0, #8]
; CHECK-NEXT: str r12, [r0, #12]
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:

View File

@ -1094,6 +1094,7 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
; CHECK-NEXT: ldrd r11, r8, [r12, #24]
; CHECK-NEXT: vstrb.8 q0, [r9], #16
; CHECK-NEXT: vldrw.u32 q0, [r5], #32
; CHECK-NEXT: strd r9, r1, [sp, #24] @ 8-byte Folded Spill
; CHECK-NEXT: vldrw.u32 q1, [r5, #-28]
; CHECK-NEXT: vmul.f32 q0, q0, r7
; CHECK-NEXT: vldrw.u32 q6, [r5, #-24]
@ -1105,13 +1106,12 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
; CHECK-NEXT: vfma.f32 q0, q4, r6
; CHECK-NEXT: vldrw.u32 q3, [r5, #-8]
; CHECK-NEXT: vfma.f32 q0, q5, r3
; CHECK-NEXT: vldrw.u32 q1, [r5, #-4]
; CHECK-NEXT: vfma.f32 q0, q2, lr
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: vfma.f32 q0, q2, lr
; CHECK-NEXT: vldrw.u32 q1, [r5, #-4]
; CHECK-NEXT: vfma.f32 q0, q3, r11
; CHECK-NEXT: strd r9, r1, [sp, #24] @ 8-byte Folded Spill
; CHECK-NEXT: vfma.f32 q0, q1, r8
; CHECK-NEXT: cmp r0, #16
; CHECK-NEXT: vfma.f32 q0, q1, r8
; CHECK-NEXT: blo .LBB16_7
; CHECK-NEXT: @ %bb.5: @ %for.body.preheader
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1

View File

@ -168,16 +168,14 @@ define dso_local i32 @e() #0 {
; CHECK-NEXT: vmov q1, q4
; CHECK-NEXT: vmov s1, r7
; CHECK-NEXT: vmov.32 q1[1], r6
; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: vmov.32 q1[2], r5
; CHECK-NEXT: vmov.32 q5[0], r7
; CHECK-NEXT: vmov.32 q1[2], r5
; CHECK-NEXT: vmov s9, r4
; CHECK-NEXT: vmov.32 q1[3], r4
; CHECK-NEXT: strd r0, r10, [sp, #24]
; CHECK-NEXT: vdup.32 q6, r7
; CHECK-NEXT: vstrw.32 q1, [sp, #76]
; CHECK-NEXT: vmov q1, q5
; CHECK-NEXT: vmov s9, r4
; CHECK-NEXT: vmov.32 q1[1], r7
; CHECK-NEXT: vdup.32 q6, r7
; CHECK-NEXT: vmov.f32 s2, s1
; CHECK-NEXT: vmov.f32 s8, s0
; CHECK-NEXT: vmov.32 q1[2], r6
@ -185,6 +183,7 @@ define dso_local i32 @e() #0 {
; CHECK-NEXT: vmov q7, q6
; CHECK-NEXT: vmov.f32 s10, s1
; CHECK-NEXT: mov.w r8, #4
; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: vmov.32 q1[3], r4
; CHECK-NEXT: vmov.32 q3[0], r4
; CHECK-NEXT: vmov.32 q7[1], r4
@ -192,6 +191,7 @@ define dso_local i32 @e() #0 {
; CHECK-NEXT: vmov.f32 s11, s3
; CHECK-NEXT: movs r1, #64
; CHECK-NEXT: strh.w r8, [sp, #390]
; CHECK-NEXT: strd r0, r10, [sp, #24]
; CHECK-NEXT: vstrw.32 q0, [sp, #44]
; CHECK-NEXT: str r0, [r0]
; CHECK-NEXT: vstrw.32 q2, [r0]

View File

@ -24,8 +24,8 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) {
; CHECK-NEXT: vmov.f32 s9, s6
; CHECK-NEXT: vmov.f32 s10, s0
; CHECK-NEXT: vmov.f32 s11, s5
; CHECK-NEXT: strd r2, r0, [r1, #16]
; CHECK-NEXT: vstrw.32 q2, [r1]
; CHECK-NEXT: strd r2, r0, [r1, #16]
; CHECK-NEXT: pop {r4, pc}
entry:
%s1 = getelementptr <2 x i32>, <2 x i32>* %src, i32 0

View File

@ -8,17 +8,17 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; THUMBV7-NEXT: .pad #44
; THUMBV7-NEXT: sub sp, #44
; THUMBV7-NEXT: ldrd r4, r7, [sp, #88]
; THUMBV7-NEXT: mov r5, r3
; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
; THUMBV7-NEXT: movs r0, #0
; THUMBV7-NEXT: strd r4, r7, [sp]
; THUMBV7-NEXT: mov r1, r3
; THUMBV7-NEXT: ldrd r4, r7, [sp, #88]
; THUMBV7-NEXT: mov r5, r3
; THUMBV7-NEXT: strd r0, r0, [sp, #8]
; THUMBV7-NEXT: mov r1, r3
; THUMBV7-NEXT: mov r6, r2
; THUMBV7-NEXT: mov r0, r2
; THUMBV7-NEXT: movs r2, #0
; THUMBV7-NEXT: movs r3, #0
; THUMBV7-NEXT: strd r4, r7, [sp]
; THUMBV7-NEXT: bl __multi3
; THUMBV7-NEXT: strd r1, r0, [sp, #32] @ 8-byte Folded Spill
; THUMBV7-NEXT: strd r3, r2, [sp, #24] @ 8-byte Folded Spill

View File

@ -17,13 +17,12 @@ cond_true2732.preheader: ; preds = %entry
store i64 %tmp2676.us.us, i64* %tmp2666
ret i32 0
; INTEL: and {{e..}}, dword ptr [356]
; INTEL: and dword ptr [360], {{e..}}
; FIXME: mov dword ptr [356], {{e..}}
; The above line comes out as 'mov 360, eax', but when the register is ecx it works?
; INTEL-DAG: and {{e..}}, dword ptr [356]
; INTEL-DAG: and dword ptr [360], {{e..}}
; INTEL: mov dword ptr [356], {{e..}}
; ATT: andl 356, %{{e..}}
; ATT: andl %{{e..}}, 360
; ATT-DAG: andl 356, %{{e..}}
; ATT-DAG: andl %{{e..}}, 360
; ATT: movl %{{e..}}, 356
}