forked from OSchip/llvm-project
GlobalISel: Allow forming atomic/volatile G_ZEXTLOAD
SelectionDAG has a target hook, getExtendForAtomicOps, which it uses in the computeKnownBits implementation for ATOMIC_LOAD. This is pretty ugly (as is having a separate load opcode for atomics), so instead allow making use of atomic zextload. Enable this for AArch64 since the DAG path defaults in to the zext behavior. The tablegen changes are pretty ugly, but partially helps migrate SelectionDAG from using ISD::ATOMIC_LOAD to regular ISD::LOAD with atomic memory operands. For now the DAG emitter will emit matchers for patterns which the DAG will not produce. I'm still a bit confused by the intent of the isLoad/isStore/isAtomic bits. The DAG implementation rejects trying to use any of these in combination. For now I've opted to make the isLoad checks also check isAtomic, although I think having isLoad and isAtomic set on these makes most sense.
This commit is contained in:
parent
0d7161af89
commit
1ee6ce9bad
|
@ -684,6 +684,10 @@ Only G_LOAD is valid if the result is a vector type. If the result is larger
|
|||
than the memory size, the high elements are undefined (i.e. this is not a
|
||||
per-element, vector anyextload)
|
||||
|
||||
Unlike in SelectionDAG, atomic loads are expressed with the same
|
||||
opcodes as regular loads. G_LOAD, G_SEXTLOAD and G_ZEXTLOAD may all
|
||||
have atomic memory operands.
|
||||
|
||||
G_INDEXED_LOAD
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
|
|
|
@ -184,6 +184,8 @@ def : GINodeEquiv<G_STORE, st> { let CheckMMOIsNonAtomic = true; }
|
|||
def : GINodeEquiv<G_LOAD, atomic_load> {
|
||||
let CheckMMOIsNonAtomic = false;
|
||||
let CheckMMOIsAtomic = true;
|
||||
let IfSignExtend = G_SEXTLOAD;
|
||||
let IfZeroExtend = G_ZEXTLOAD;
|
||||
}
|
||||
|
||||
// Operands are swapped for atomic_store vs. regular store
|
||||
|
|
|
@ -1627,18 +1627,34 @@ defm atomic_load_umax : binary_atomic_op<atomic_load_umax>;
|
|||
defm atomic_store : binary_atomic_op<atomic_store>;
|
||||
defm atomic_cmp_swap : ternary_atomic_op<atomic_cmp_swap>;
|
||||
|
||||
/// Atomic load which zeroes the excess high bits.
|
||||
def atomic_load_zext :
|
||||
PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> {
|
||||
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
|
||||
let IsZeroExtLoad = true;
|
||||
}
|
||||
|
||||
/// Atomic load which sign extends the excess high bits.
|
||||
def atomic_load_sext :
|
||||
PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> {
|
||||
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
|
||||
let IsSignExtLoad = true;
|
||||
}
|
||||
|
||||
def atomic_load_8 :
|
||||
PatFrag<(ops node:$ptr),
|
||||
(atomic_load node:$ptr)> {
|
||||
let IsAtomic = true;
|
||||
let MemoryVT = i8;
|
||||
}
|
||||
|
||||
def atomic_load_16 :
|
||||
PatFrag<(ops node:$ptr),
|
||||
(atomic_load node:$ptr)> {
|
||||
let IsAtomic = true;
|
||||
let MemoryVT = i16;
|
||||
}
|
||||
|
||||
def atomic_load_32 :
|
||||
PatFrag<(ops node:$ptr),
|
||||
(atomic_load node:$ptr)> {
|
||||
|
@ -1652,6 +1668,40 @@ def atomic_load_64 :
|
|||
let MemoryVT = i64;
|
||||
}
|
||||
|
||||
def atomic_load_zext_8 :
|
||||
PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> {
|
||||
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
|
||||
let MemoryVT = i8;
|
||||
}
|
||||
|
||||
def atomic_load_zext_16 :
|
||||
PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> {
|
||||
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
|
||||
let MemoryVT = i16;
|
||||
}
|
||||
|
||||
def atomic_load_sext_8 :
|
||||
PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> {
|
||||
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
|
||||
let MemoryVT = i8;
|
||||
}
|
||||
|
||||
def atomic_load_sext_16 :
|
||||
PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> {
|
||||
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
|
||||
let MemoryVT = i16;
|
||||
}
|
||||
|
||||
// Atomic load which zeroes or anyextends the high bits.
|
||||
def atomic_load_az_8 : PatFrags<(ops node:$op),
|
||||
[(atomic_load_8 node:$op),
|
||||
(atomic_load_zext_8 node:$op)]>;
|
||||
|
||||
// Atomic load which zeroes or anyextends the high bits.
|
||||
def atomic_load_az_16 : PatFrags<(ops node:$op),
|
||||
[(atomic_load_16 node:$op),
|
||||
(atomic_load_zext_16 node:$op)]>;
|
||||
|
||||
def nonext_masked_gather :
|
||||
PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
|
||||
(masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{
|
||||
|
|
|
@ -698,13 +698,13 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
|
|||
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI);
|
||||
if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) ||
|
||||
!LoadMI->isSimple())
|
||||
if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
|
||||
return false;
|
||||
|
||||
Register LoadReg = LoadMI->getDstReg();
|
||||
LLT LoadTy = MRI.getType(LoadReg);
|
||||
LLT RegTy = MRI.getType(LoadReg);
|
||||
Register PtrReg = LoadMI->getPointerReg();
|
||||
unsigned RegSize = RegTy.getSizeInBits();
|
||||
uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
|
||||
unsigned MaskSizeBits = MaskVal.countTrailingOnes();
|
||||
|
||||
|
@ -715,7 +715,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
|
|||
|
||||
// If the mask covers the whole destination register, there's nothing to
|
||||
// extend
|
||||
if (MaskSizeBits >= LoadTy.getSizeInBits())
|
||||
if (MaskSizeBits >= RegSize)
|
||||
return false;
|
||||
|
||||
// Most targets cannot deal with loads of size < 8 and need to re-legalize to
|
||||
|
@ -725,17 +725,25 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
|
|||
|
||||
const MachineMemOperand &MMO = LoadMI->getMMO();
|
||||
LegalityQuery::MemDesc MemDesc(MMO);
|
||||
MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
|
||||
|
||||
// Don't modify the memory access size if this is atomic/volatile, but we can
|
||||
// still adjust the opcode to indicate the high bit behavior.
|
||||
if (LoadMI->isSimple())
|
||||
MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
|
||||
else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize)
|
||||
return false;
|
||||
|
||||
if (!isLegalOrBeforeLegalizer(
|
||||
{TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}}))
|
||||
{TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
|
||||
return false;
|
||||
|
||||
MatchInfo = [=](MachineIRBuilder &B) {
|
||||
B.setInstrAndDebugLoc(*LoadMI);
|
||||
auto &MF = B.getMF();
|
||||
auto PtrInfo = MMO.getPointerInfo();
|
||||
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8);
|
||||
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
|
||||
B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
|
||||
LoadMI->eraseFromParent();
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -29,21 +29,21 @@ def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>;
|
|||
|
||||
// An atomic load operation that does not need either acquire or release
|
||||
// semantics.
|
||||
class relaxed_load<PatFrag base>
|
||||
class relaxed_load<PatFrags base>
|
||||
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
let IsAtomicOrderingAcquireOrStronger = 0;
|
||||
}
|
||||
|
||||
// A atomic load operation that actually needs acquire semantics.
|
||||
class acquiring_load<PatFrag base>
|
||||
class acquiring_load<PatFrags base>
|
||||
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
let IsAtomicOrderingAcquire = 1;
|
||||
}
|
||||
|
||||
// An atomic load operation that needs sequential consistency.
|
||||
class seq_cst_load<PatFrag base>
|
||||
class seq_cst_load<PatFrags base>
|
||||
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
let IsAtomicOrderingSequentiallyConsistent = 1;
|
||||
|
@ -63,34 +63,34 @@ let Predicates = [HasLDAPR] in {
|
|||
}
|
||||
|
||||
// 8-bit loads
|
||||
def : Pat<(seq_cst_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
|
||||
def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
|
||||
def : Pat<(seq_cst_load<atomic_load_az_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
|
||||
def : Pat<(acquiring_load<atomic_load_az_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_az_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
|
||||
ro_Wextend8:$offset)),
|
||||
(LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>;
|
||||
def : Pat<(relaxed_load<atomic_load_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
|
||||
ro_Xextend8:$offset)),
|
||||
def : Pat<(relaxed_load<atomic_load_az_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
|
||||
ro_Xextend8:$offset)),
|
||||
(LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>;
|
||||
def : Pat<(relaxed_load<atomic_load_8> (am_indexed8 GPR64sp:$Rn,
|
||||
uimm12s1:$offset)),
|
||||
def : Pat<(relaxed_load<atomic_load_az_8> (am_indexed8 GPR64sp:$Rn,
|
||||
uimm12s1:$offset)),
|
||||
(LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
|
||||
def : Pat<(relaxed_load<atomic_load_8>
|
||||
def : Pat<(relaxed_load<atomic_load_az_8>
|
||||
(am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
|
||||
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
|
||||
|
||||
// 16-bit loads
|
||||
def : Pat<(seq_cst_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
|
||||
def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
|
||||
def : Pat<(seq_cst_load<atomic_load_az_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
|
||||
def : Pat<(acquiring_load<atomic_load_az_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_az_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
|
||||
ro_Wextend16:$extend)),
|
||||
(LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>;
|
||||
def : Pat<(relaxed_load<atomic_load_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
|
||||
ro_Xextend16:$extend)),
|
||||
def : Pat<(relaxed_load<atomic_load_az_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
|
||||
ro_Xextend16:$extend)),
|
||||
(LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>;
|
||||
def : Pat<(relaxed_load<atomic_load_16> (am_indexed16 GPR64sp:$Rn,
|
||||
uimm12s2:$offset)),
|
||||
def : Pat<(relaxed_load<atomic_load_az_16> (am_indexed16 GPR64sp:$Rn,
|
||||
uimm12s2:$offset)),
|
||||
(LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
|
||||
def : Pat<(relaxed_load<atomic_load_16>
|
||||
def : Pat<(relaxed_load<atomic_load_az_16>
|
||||
(am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
|
||||
(LDURHHi GPR64sp:$Rn, simm9:$offset)>;
|
||||
|
||||
|
|
|
@ -260,8 +260,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
|
|||
.maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
|
||||
.maxScalarIf(typeInSet(1, {s128}), 0, s64);
|
||||
|
||||
getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
|
||||
.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))
|
||||
|
||||
for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
|
||||
auto &Actions = getActionDefinitionsBuilder(Op);
|
||||
|
||||
if (Op == G_SEXTLOAD)
|
||||
Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered));
|
||||
|
||||
// Atomics have zero extending behavior.
|
||||
Actions
|
||||
.legalForTypesWithMemDesc({{s32, p0, s8, 8},
|
||||
{s32, p0, s16, 8},
|
||||
{s32, p0, s32, 8},
|
||||
|
@ -278,6 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
|
|||
.unsupportedIfMemSizeNotPow2()
|
||||
// Lower anything left over into G_*EXT and G_LOAD
|
||||
.lower();
|
||||
}
|
||||
|
||||
auto IsPtrVecPred = [=](const LegalityQuery &Query) {
|
||||
const LLT &ValTy = Query.Types[0];
|
||||
|
|
|
@ -161,8 +161,7 @@
|
|||
# DEBUG-NEXT: G_SEXTLOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
|
||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
||||
# DEBUG-NEXT: G_ZEXTLOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
|
||||
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
|
||||
# DEBUG-NEXT: G_ZEXTLOAD (opcode 80): 2 type indices, 0 imm indices
|
||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
||||
# DEBUG-NEXT: G_INDEXED_LOAD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
|
||||
|
|
|
@ -88,12 +88,12 @@ body: |
|
|||
...
|
||||
|
||||
---
|
||||
name: test_load_s32_atomic
|
||||
name: test_load_mask_s8_s32_atomic
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; CHECK-LABEL: name: test_load_s32_atomic
|
||||
; CHECK-LABEL: name: test_load_mask_s8_s32_atomic
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
|
@ -108,6 +108,49 @@ body: |
|
|||
$w0 = COPY %3
|
||||
...
|
||||
|
||||
# The mask is equal to the memory size.
|
||||
---
|
||||
name: test_load_mask_s16_s16_atomic
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; CHECK-LABEL: name: test_load_mask_s16_s16_atomic
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load seq_cst (s16))
|
||||
; CHECK-NEXT: $w0 = COPY [[ZEXTLOAD]](s32)
|
||||
%0:_(p0) = COPY $x0
|
||||
%1:_(s32) = G_CONSTANT i32 65535
|
||||
%2:_(s32) = G_LOAD %0 :: (load seq_cst (s16))
|
||||
%3:_(s32) = G_AND %2, %1
|
||||
$w0 = COPY %3
|
||||
...
|
||||
|
||||
# The mask is smaller than the memory size which must be preserved, so
|
||||
# there's little point to folding.
|
||||
---
|
||||
name: test_load_mask_s8_s16_atomic
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; CHECK-LABEL: name: test_load_mask_s8_s16_atomic
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
|
||||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load seq_cst (s16))
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
|
||||
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
|
||||
%0:_(p0) = COPY $x0
|
||||
%1:_(s32) = G_CONSTANT i32 255
|
||||
%2:_(s32) = G_LOAD %0 :: (load seq_cst (s16))
|
||||
%3:_(s32) = G_AND %2, %1
|
||||
$w0 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_mask_size_equals_dst_size
|
||||
tracksRegLiveness: true
|
||||
|
@ -272,13 +315,32 @@ body: |
|
|||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
|
||||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile load (s8))
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
|
||||
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
|
||||
; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (volatile load (s8))
|
||||
; CHECK-NEXT: $w0 = COPY [[ZEXTLOAD]](s32)
|
||||
%0:_(p0) = COPY $x0
|
||||
%1:_(s32) = G_CONSTANT i32 255
|
||||
%2:_(s32) = G_LOAD %0 :: (volatile load (s8))
|
||||
%3:_(s32) = G_AND %2, %1
|
||||
$w0 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_volatile_mask_smaller_mem
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; CHECK-LABEL: name: test_volatile_mask_smaller_mem
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
|
||||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile load (s16))
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
|
||||
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
|
||||
%0:_(p0) = COPY $x0
|
||||
%1:_(s32) = G_CONSTANT i32 255
|
||||
%2:_(s32) = G_LOAD %0 :: (volatile load (s16))
|
||||
%3:_(s32) = G_AND %2, %1
|
||||
$w0 = COPY %3
|
||||
...
|
||||
|
|
|
@ -133,3 +133,163 @@ body: |
|
|||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: zextload_s32_from_s8_atomic_unordered
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: zextload_s32_from_s8_atomic_unordered
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load unordered (s8))
|
||||
; CHECK-NEXT: $w0 = COPY [[LDRBBui]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load unordered (s8))
|
||||
$w0 = COPY %2
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: zextload_s32_from_s8_atomic_monotonic
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: zextload_s32_from_s8_atomic_monotonic
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load monotonic (s8))
|
||||
; CHECK-NEXT: $w0 = COPY [[LDRBBui]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load monotonic (s8))
|
||||
$w0 = COPY %2
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: zextload_s32_from_s8_atomic_acquire
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: zextload_s32_from_s8_atomic_acquire
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK-NEXT: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load acquire (s8))
|
||||
; CHECK-NEXT: $w0 = COPY [[LDARB]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load acquire (s8))
|
||||
$w0 = COPY %2
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: zextload_s32_from_s8_atomic_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: zextload_s32_from_s8_atomic_seq_cst
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK-NEXT: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load seq_cst (s8))
|
||||
; CHECK-NEXT: $w0 = COPY [[LDARB]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load seq_cst (s8))
|
||||
$w0 = COPY %2
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: zextload_s32_from_s16_atomic_unordered
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: zextload_s32_from_s16_atomic_unordered
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load unordered (s16))
|
||||
; CHECK-NEXT: $w0 = COPY [[LDRHHui]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load unordered (s16))
|
||||
$w0 = COPY %2
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: zextload_s32_from_s16_atomic_monotonic
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: zextload_s32_from_s16_atomic_monotonic
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load monotonic (s16))
|
||||
; CHECK-NEXT: $w0 = COPY [[LDRHHui]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load monotonic (s16))
|
||||
$w0 = COPY %2
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: zextload_s32_from_s16_atomic_acquire
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: zextload_s32_from_s16_atomic_acquire
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK-NEXT: [[LDARH:%[0-9]+]]:gpr32 = LDARH [[COPY]] :: (load acquire (s16))
|
||||
; CHECK-NEXT: $w0 = COPY [[LDARH]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load acquire (s16))
|
||||
$w0 = COPY %2
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: zextload_s32_from_s16_atomic_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: zextload_s32_from_s16_atomic_seq_cst
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK-NEXT: [[LDARH:%[0-9]+]]:gpr32 = LDARH [[COPY]] :: (load seq_cst (s16))
|
||||
; CHECK-NEXT: $w0 = COPY [[LDARH]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load seq_cst (s16))
|
||||
$w0 = COPY %2
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
|
|
|
@ -141,10 +141,8 @@ body: |
|
|||
; CHECK: liveins: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1)
|
||||
; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 255
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], %k
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
|
||||
; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1)
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s32) = G_LOAD %0 :: (volatile load (s8), align 1, addrspace 1)
|
||||
%k:_(s32) = G_CONSTANT i32 255
|
||||
|
@ -183,10 +181,8 @@ body: |
|
|||
; CHECK: liveins: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1)
|
||||
; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 65535
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], %k
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
|
||||
; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1)
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s32) = G_LOAD %0 :: (volatile load (s16), align 2, addrspace 1)
|
||||
%k:_(s32) = G_CONSTANT i32 65535
|
||||
|
|
|
@ -977,12 +977,15 @@ std::string TreePredicateFn::getPredCode() const {
|
|||
if (isAnyExtLoad())
|
||||
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
|
||||
"IsAnyExtLoad requires IsLoad");
|
||||
if (isSignExtLoad())
|
||||
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
|
||||
"IsSignExtLoad requires IsLoad");
|
||||
if (isZeroExtLoad())
|
||||
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
|
||||
"IsZeroExtLoad requires IsLoad");
|
||||
|
||||
if (!isAtomic()) {
|
||||
if (isSignExtLoad())
|
||||
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
|
||||
"IsSignExtLoad requires IsLoad or IsAtomic");
|
||||
if (isZeroExtLoad())
|
||||
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
|
||||
"IsZeroExtLoad requires IsLoad or IsAtomic");
|
||||
}
|
||||
}
|
||||
|
||||
if (isStore()) {
|
||||
|
@ -1003,8 +1006,9 @@ std::string TreePredicateFn::getPredCode() const {
|
|||
if (isAtomic()) {
|
||||
if (getMemoryVT() == nullptr && !isAtomicOrderingMonotonic() &&
|
||||
getAddressSpaces() == nullptr &&
|
||||
!isAtomicOrderingAcquire() && !isAtomicOrderingRelease() &&
|
||||
!isAtomicOrderingAcquireRelease() &&
|
||||
// FIXME: Should atomic loads be IsLoad, IsAtomic, or both?
|
||||
!isZeroExtLoad() && !isSignExtLoad() && !isAtomicOrderingAcquire() &&
|
||||
!isAtomicOrderingRelease() && !isAtomicOrderingAcquireRelease() &&
|
||||
!isAtomicOrderingSequentiallyConsistent() &&
|
||||
!isAtomicOrderingAcquireOrStronger() &&
|
||||
!isAtomicOrderingReleaseOrStronger() &&
|
||||
|
@ -1105,6 +1109,10 @@ std::string TreePredicateFn::getPredCode() const {
|
|||
Code += "if (isReleaseOrStronger(cast<AtomicSDNode>(N)->getMergedOrdering())) "
|
||||
"return false;\n";
|
||||
|
||||
// TODO: Handle atomic sextload/zextload normally when ATOMIC_LOAD is removed.
|
||||
if (isAtomic() && (isZeroExtLoad() || isSignExtLoad()))
|
||||
Code += "return false;\n";
|
||||
|
||||
if (isLoad() || isStore()) {
|
||||
StringRef SDNodeName = isLoad() ? "LoadSDNode" : "StoreSDNode";
|
||||
|
||||
|
|
|
@ -3785,10 +3785,12 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
|
|||
|
||||
for (const TreePredicateCall &Call : N->getPredicateCalls()) {
|
||||
const TreePredicateFn &Predicate = Call.Fn;
|
||||
if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() &&
|
||||
if (!Equiv.isValueUnset("IfSignExtend") &&
|
||||
(Predicate.isLoad() || Predicate.isAtomic()) &&
|
||||
Predicate.isSignExtLoad())
|
||||
return &Target.getInstruction(Equiv.getValueAsDef("IfSignExtend"));
|
||||
if (!Equiv.isValueUnset("IfZeroExtend") && Predicate.isLoad() &&
|
||||
if (!Equiv.isValueUnset("IfZeroExtend") &&
|
||||
(Predicate.isLoad() || Predicate.isAtomic()) &&
|
||||
Predicate.isZeroExtLoad())
|
||||
return &Target.getInstruction(Equiv.getValueAsDef("IfZeroExtend"));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue