forked from OSchip/llvm-project
[MachineInstr] Add support for instructions with multiple memory operands.
- Basically iterate each pair of memory operands from both instructions and return true if any of them may alias. - The exception are memory instructions without any memory operand. They may touch everything and could alias to any memory instruction. Differential Revision: https://reviews.llvm.org/D89447
This commit is contained in:
parent
cb9d0e8819
commit
4b11201592
|
@ -1737,6 +1737,21 @@ public:
|
|||
return 5;
|
||||
}
|
||||
|
||||
/// Return the maximal number of alias checks on memory operands. For
|
||||
/// instructions with more than one memory operands, the alias check on a
|
||||
/// single MachineInstr pair has quadratic overhead and results in
|
||||
/// unacceptable performance in the worst case. The limit here is to clamp
|
||||
/// that maximal checks performed. Usually, that's the product of memory
|
||||
/// operand numbers from that pair of MachineInstr to be checked. For
|
||||
/// instance, with two MachineInstrs with 4 and 5 memory operands
|
||||
/// correspondingly, a total of 20 checks are required. With this limit set to
|
||||
/// 16, their alias check is skipped. We choose to limit the product instead
|
||||
/// of the individual instruction as targets may have special MachineInstrs
|
||||
/// with a considerably high number of memory operands, such as `ldm` in ARM.
|
||||
/// Setting this limit per MachineInstr would result in either too high
|
||||
/// overhead or too rigid restriction.
|
||||
virtual unsigned getMemOperandAACheckLimit() const { return 16; }
|
||||
|
||||
/// Return an array that contains the ids of the target indices (used for the
|
||||
/// TargetIndex machine operand) and their names.
|
||||
///
|
||||
|
|
|
@ -1276,13 +1276,18 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
|
|||
if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
|
||||
return false;
|
||||
|
||||
// FIXME: Need to handle multiple memory operands to support all targets.
|
||||
if (!hasOneMemOperand() || !Other.hasOneMemOperand())
|
||||
// Memory operations without memory operands may access anything. Be
|
||||
// conservative and assume `MayAlias`.
|
||||
if (memoperands_empty() || Other.memoperands_empty())
|
||||
return true;
|
||||
|
||||
MachineMemOperand *MMOa = *memoperands_begin();
|
||||
MachineMemOperand *MMOb = *Other.memoperands_begin();
|
||||
// Skip if there are too many memory operands.
|
||||
auto NumChecks = getNumMemOperands() * Other.getNumMemOperands();
|
||||
if (NumChecks > TII->getMemOperandAACheckLimit())
|
||||
return true;
|
||||
|
||||
auto HasAlias = [MFI, AA, UseTBAA](const MachineMemOperand *MMOa,
|
||||
const MachineMemOperand *MMOb) {
|
||||
// The following interface to AA is fashioned after DAGCombiner::isAlias
|
||||
// and operates with MachineMemOperand offset with some important
|
||||
// assumptions:
|
||||
|
@ -1344,13 +1349,23 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
|
|||
int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
|
||||
: MemoryLocation::UnknownSize;
|
||||
|
||||
AliasResult AAResult = AA->alias(
|
||||
MemoryLocation(ValA, OverlapA,
|
||||
AliasResult AAResult =
|
||||
AA->alias(MemoryLocation(ValA, OverlapA,
|
||||
UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
|
||||
MemoryLocation(ValB, OverlapB,
|
||||
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
|
||||
|
||||
return (AAResult != NoAlias);
|
||||
};
|
||||
|
||||
// Check each pair of memory operands from both instructions, which can't
|
||||
// alias only if all pairs won't alias.
|
||||
for (auto *MMOa : memoperands())
|
||||
for (auto *MMOb : Other.memoperands())
|
||||
if (HasAlias(MMOa, MMOb))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
|
||||
|
|
|
@ -19,11 +19,11 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg
|
|||
; A53-NEXT: mov x19, x8
|
||||
; A53-NEXT: mov w0, w1
|
||||
; A53-NEXT: mov w9, #256
|
||||
; A53-NEXT: stp x2, x3, [x8, #32]
|
||||
; A53-NEXT: mov x2, x8
|
||||
; A53-NEXT: str q0, [x19, #16]!
|
||||
; A53-NEXT: str w1, [x19]
|
||||
; A53-NEXT: mov w1, #4
|
||||
; A53-NEXT: stp x2, x3, [x8, #32]
|
||||
; A53-NEXT: mov x2, x8
|
||||
; A53-NEXT: str q0, [x8]
|
||||
; A53-NEXT: strh w9, [x8, #24]
|
||||
; A53-NEXT: str wzr, [x8, #20]
|
||||
|
|
|
@ -503,12 +503,12 @@ define void @conv_v8f16_to_i128( <8 x half> %a, i128* %store ) {
|
|||
; CHECK-NEXT: vmov.32 r3, d16[1]
|
||||
; CHECK-NEXT: vmov.32 r1, d16[0]
|
||||
; CHECK-NEXT: subs r12, r12, #1
|
||||
; CHECK-NEXT: str r12, [r0, #12]
|
||||
; CHECK-NEXT: sbcs r2, r2, #0
|
||||
; CHECK-NEXT: str r2, [r0, #8]
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: sbc r1, r1, #0
|
||||
; CHECK-NEXT: stm r0, {r1, r3}
|
||||
; CHECK-NEXT: str r2, [r0, #8]
|
||||
; CHECK-NEXT: str r12, [r0, #12]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
|
|
|
@ -1094,6 +1094,7 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
|
|||
; CHECK-NEXT: ldrd r11, r8, [r12, #24]
|
||||
; CHECK-NEXT: vstrb.8 q0, [r9], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r5], #32
|
||||
; CHECK-NEXT: strd r9, r1, [sp, #24] @ 8-byte Folded Spill
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r5, #-28]
|
||||
; CHECK-NEXT: vmul.f32 q0, q0, r7
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r5, #-24]
|
||||
|
@ -1105,13 +1106,12 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
|
|||
; CHECK-NEXT: vfma.f32 q0, q4, r6
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r5, #-8]
|
||||
; CHECK-NEXT: vfma.f32 q0, q5, r3
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r5, #-4]
|
||||
; CHECK-NEXT: vfma.f32 q0, q2, lr
|
||||
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: vfma.f32 q0, q2, lr
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r5, #-4]
|
||||
; CHECK-NEXT: vfma.f32 q0, q3, r11
|
||||
; CHECK-NEXT: strd r9, r1, [sp, #24] @ 8-byte Folded Spill
|
||||
; CHECK-NEXT: vfma.f32 q0, q1, r8
|
||||
; CHECK-NEXT: cmp r0, #16
|
||||
; CHECK-NEXT: vfma.f32 q0, q1, r8
|
||||
; CHECK-NEXT: blo .LBB16_7
|
||||
; CHECK-NEXT: @ %bb.5: @ %for.body.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
|
|
|
@ -168,16 +168,14 @@ define dso_local i32 @e() #0 {
|
|||
; CHECK-NEXT: vmov q1, q4
|
||||
; CHECK-NEXT: vmov s1, r7
|
||||
; CHECK-NEXT: vmov.32 q1[1], r6
|
||||
; CHECK-NEXT: mov.w r10, #0
|
||||
; CHECK-NEXT: vmov.32 q1[2], r5
|
||||
; CHECK-NEXT: vmov.32 q5[0], r7
|
||||
; CHECK-NEXT: vmov.32 q1[2], r5
|
||||
; CHECK-NEXT: vmov s9, r4
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: strd r0, r10, [sp, #24]
|
||||
; CHECK-NEXT: vdup.32 q6, r7
|
||||
; CHECK-NEXT: vstrw.32 q1, [sp, #76]
|
||||
; CHECK-NEXT: vmov q1, q5
|
||||
; CHECK-NEXT: vmov s9, r4
|
||||
; CHECK-NEXT: vmov.32 q1[1], r7
|
||||
; CHECK-NEXT: vdup.32 q6, r7
|
||||
; CHECK-NEXT: vmov.f32 s2, s1
|
||||
; CHECK-NEXT: vmov.f32 s8, s0
|
||||
; CHECK-NEXT: vmov.32 q1[2], r6
|
||||
|
@ -185,6 +183,7 @@ define dso_local i32 @e() #0 {
|
|||
; CHECK-NEXT: vmov q7, q6
|
||||
; CHECK-NEXT: vmov.f32 s10, s1
|
||||
; CHECK-NEXT: mov.w r8, #4
|
||||
; CHECK-NEXT: mov.w r10, #0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: vmov.32 q3[0], r4
|
||||
; CHECK-NEXT: vmov.32 q7[1], r4
|
||||
|
@ -192,6 +191,7 @@ define dso_local i32 @e() #0 {
|
|||
; CHECK-NEXT: vmov.f32 s11, s3
|
||||
; CHECK-NEXT: movs r1, #64
|
||||
; CHECK-NEXT: strh.w r8, [sp, #390]
|
||||
; CHECK-NEXT: strd r0, r10, [sp, #24]
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #44]
|
||||
; CHECK-NEXT: str r0, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q2, [r0]
|
||||
|
|
|
@ -24,8 +24,8 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) {
|
|||
; CHECK-NEXT: vmov.f32 s9, s6
|
||||
; CHECK-NEXT: vmov.f32 s10, s0
|
||||
; CHECK-NEXT: vmov.f32 s11, s5
|
||||
; CHECK-NEXT: strd r2, r0, [r1, #16]
|
||||
; CHECK-NEXT: vstrw.32 q2, [r1]
|
||||
; CHECK-NEXT: strd r2, r0, [r1, #16]
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <2 x i32>, <2 x i32>* %src, i32 0
|
||||
|
|
|
@ -8,17 +8,17 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
|
|||
; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; THUMBV7-NEXT: .pad #44
|
||||
; THUMBV7-NEXT: sub sp, #44
|
||||
; THUMBV7-NEXT: ldrd r4, r7, [sp, #88]
|
||||
; THUMBV7-NEXT: mov r5, r3
|
||||
; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
|
||||
; THUMBV7-NEXT: movs r0, #0
|
||||
; THUMBV7-NEXT: strd r4, r7, [sp]
|
||||
; THUMBV7-NEXT: mov r1, r3
|
||||
; THUMBV7-NEXT: ldrd r4, r7, [sp, #88]
|
||||
; THUMBV7-NEXT: mov r5, r3
|
||||
; THUMBV7-NEXT: strd r0, r0, [sp, #8]
|
||||
; THUMBV7-NEXT: mov r1, r3
|
||||
; THUMBV7-NEXT: mov r6, r2
|
||||
; THUMBV7-NEXT: mov r0, r2
|
||||
; THUMBV7-NEXT: movs r2, #0
|
||||
; THUMBV7-NEXT: movs r3, #0
|
||||
; THUMBV7-NEXT: strd r4, r7, [sp]
|
||||
; THUMBV7-NEXT: bl __multi3
|
||||
; THUMBV7-NEXT: strd r1, r0, [sp, #32] @ 8-byte Folded Spill
|
||||
; THUMBV7-NEXT: strd r3, r2, [sp, #24] @ 8-byte Folded Spill
|
||||
|
|
|
@ -17,13 +17,12 @@ cond_true2732.preheader: ; preds = %entry
|
|||
store i64 %tmp2676.us.us, i64* %tmp2666
|
||||
ret i32 0
|
||||
|
||||
; INTEL: and {{e..}}, dword ptr [356]
|
||||
; INTEL: and dword ptr [360], {{e..}}
|
||||
; FIXME: mov dword ptr [356], {{e..}}
|
||||
; The above line comes out as 'mov 360, eax', but when the register is ecx it works?
|
||||
; INTEL-DAG: and {{e..}}, dword ptr [356]
|
||||
; INTEL-DAG: and dword ptr [360], {{e..}}
|
||||
; INTEL: mov dword ptr [356], {{e..}}
|
||||
|
||||
; ATT: andl 356, %{{e..}}
|
||||
; ATT: andl %{{e..}}, 360
|
||||
; ATT-DAG: andl 356, %{{e..}}
|
||||
; ATT-DAG: andl %{{e..}}, 360
|
||||
; ATT: movl %{{e..}}, 356
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue