GlobalISel: Allow forming atomic/volatile G_ZEXTLOAD

SelectionDAG has a target hook, getExtendForAtomicOps, which it uses
in the computeKnownBits implementation for ATOMIC_LOAD. This is pretty
ugly (as is having a separate load opcode for atomics), so instead
allow making use of atomic zextload. Enable this for AArch64 since the
DAG path defaults in to the zext behavior.

The tablegen changes are pretty ugly, but partially helps migrate
SelectionDAG from using ISD::ATOMIC_LOAD to regular ISD::LOAD with
atomic memory operands. For now the DAG emitter will emit matchers for
patterns which the DAG will not produce.

I'm still a bit confused by the intent of the isLoad/isStore/isAtomic
bits. The DAG implementation rejects trying to use any of these in
combination. For now I've opted to make the isLoad checks also check
isAtomic, although I think having isLoad and isAtomic set on these
makes most sense.
This commit is contained in:
Matt Arsenault 2022-04-09 14:06:04 -04:00
parent 0d7161af89
commit 1ee6ce9bad
12 changed files with 353 additions and 54 deletions

View File

@ -684,6 +684,10 @@ Only G_LOAD is valid if the result is a vector type. If the result is larger
than the memory size, the high elements are undefined (i.e. this is not a
per-element, vector anyextload)
Unlike in SelectionDAG, atomic loads are expressed with the same
opcodes as regular loads. G_LOAD, G_SEXTLOAD and G_ZEXTLOAD may all
have atomic memory operands.
G_INDEXED_LOAD
^^^^^^^^^^^^^^

View File

@ -184,6 +184,8 @@ def : GINodeEquiv<G_STORE, st> { let CheckMMOIsNonAtomic = true; }
def : GINodeEquiv<G_LOAD, atomic_load> {
let CheckMMOIsNonAtomic = false;
let CheckMMOIsAtomic = true;
let IfSignExtend = G_SEXTLOAD;
let IfZeroExtend = G_ZEXTLOAD;
}
// Operands are swapped for atomic_store vs. regular store

View File

@ -1627,18 +1627,34 @@ defm atomic_load_umax : binary_atomic_op<atomic_load_umax>;
defm atomic_store : binary_atomic_op<atomic_store>;
defm atomic_cmp_swap : ternary_atomic_op<atomic_cmp_swap>;
/// Atomic load which zeroes the excess high bits.
def atomic_load_zext :
PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> {
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
let IsZeroExtLoad = true;
}
/// Atomic load which sign extends the excess high bits.
def atomic_load_sext :
PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> {
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
let IsSignExtLoad = true;
}
def atomic_load_8 :
PatFrag<(ops node:$ptr),
(atomic_load node:$ptr)> {
let IsAtomic = true;
let MemoryVT = i8;
}
def atomic_load_16 :
PatFrag<(ops node:$ptr),
(atomic_load node:$ptr)> {
let IsAtomic = true;
let MemoryVT = i16;
}
def atomic_load_32 :
PatFrag<(ops node:$ptr),
(atomic_load node:$ptr)> {
@ -1652,6 +1668,40 @@ def atomic_load_64 :
let MemoryVT = i64;
}
def atomic_load_zext_8 :
PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> {
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
let MemoryVT = i8;
}
def atomic_load_zext_16 :
PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> {
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
let MemoryVT = i16;
}
def atomic_load_sext_8 :
PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> {
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
let MemoryVT = i8;
}
def atomic_load_sext_16 :
PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> {
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
let MemoryVT = i16;
}
// Atomic load which zeroes or anyextends the high bits.
def atomic_load_az_8 : PatFrags<(ops node:$op),
[(atomic_load_8 node:$op),
(atomic_load_zext_8 node:$op)]>;
// Atomic load which zeroes or anyextends the high bits.
def atomic_load_az_16 : PatFrags<(ops node:$op),
[(atomic_load_16 node:$op),
(atomic_load_zext_16 node:$op)]>;
def nonext_masked_gather :
PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
(masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{

View File

@ -698,13 +698,13 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
Register SrcReg = MI.getOperand(1).getReg();
GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI);
if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) ||
!LoadMI->isSimple())
if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
return false;
Register LoadReg = LoadMI->getDstReg();
LLT LoadTy = MRI.getType(LoadReg);
LLT RegTy = MRI.getType(LoadReg);
Register PtrReg = LoadMI->getPointerReg();
unsigned RegSize = RegTy.getSizeInBits();
uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
unsigned MaskSizeBits = MaskVal.countTrailingOnes();
@ -715,7 +715,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
// If the mask covers the whole destination register, there's nothing to
// extend
if (MaskSizeBits >= LoadTy.getSizeInBits())
if (MaskSizeBits >= RegSize)
return false;
// Most targets cannot deal with loads of size < 8 and need to re-legalize to
@ -725,17 +725,25 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
const MachineMemOperand &MMO = LoadMI->getMMO();
LegalityQuery::MemDesc MemDesc(MMO);
MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
// Don't modify the memory access size if this is atomic/volatile, but we can
// still adjust the opcode to indicate the high bit behavior.
if (LoadMI->isSimple())
MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize)
return false;
if (!isLegalOrBeforeLegalizer(
{TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}}))
{TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
return false;
MatchInfo = [=](MachineIRBuilder &B) {
B.setInstrAndDebugLoc(*LoadMI);
auto &MF = B.getMF();
auto PtrInfo = MMO.getPointerInfo();
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8);
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
LoadMI->eraseFromParent();
};
return true;
}

View File

@ -29,21 +29,21 @@ def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>;
// An atomic load operation that does not need either acquire or release
// semantics.
class relaxed_load<PatFrag base>
class relaxed_load<PatFrags base>
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
let IsAtomic = 1;
let IsAtomicOrderingAcquireOrStronger = 0;
}
// A atomic load operation that actually needs acquire semantics.
class acquiring_load<PatFrag base>
class acquiring_load<PatFrags base>
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
let IsAtomic = 1;
let IsAtomicOrderingAcquire = 1;
}
// An atomic load operation that needs sequential consistency.
class seq_cst_load<PatFrag base>
class seq_cst_load<PatFrags base>
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
let IsAtomic = 1;
let IsAtomicOrderingSequentiallyConsistent = 1;
@ -63,34 +63,34 @@ let Predicates = [HasLDAPR] in {
}
// 8-bit loads
def : Pat<(seq_cst_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
def : Pat<(seq_cst_load<atomic_load_az_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
def : Pat<(acquiring_load<atomic_load_az_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_az_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend8:$offset)),
(LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>;
def : Pat<(relaxed_load<atomic_load_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend8:$offset)),
def : Pat<(relaxed_load<atomic_load_az_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend8:$offset)),
(LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>;
def : Pat<(relaxed_load<atomic_load_8> (am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset)),
def : Pat<(relaxed_load<atomic_load_az_8> (am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset)),
(LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
def : Pat<(relaxed_load<atomic_load_8>
def : Pat<(relaxed_load<atomic_load_az_8>
(am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
// 16-bit loads
def : Pat<(seq_cst_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
def : Pat<(seq_cst_load<atomic_load_az_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
def : Pat<(acquiring_load<atomic_load_az_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_az_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend)),
(LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>;
def : Pat<(relaxed_load<atomic_load_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend)),
def : Pat<(relaxed_load<atomic_load_az_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend)),
(LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>;
def : Pat<(relaxed_load<atomic_load_16> (am_indexed16 GPR64sp:$Rn,
uimm12s2:$offset)),
def : Pat<(relaxed_load<atomic_load_az_16> (am_indexed16 GPR64sp:$Rn,
uimm12s2:$offset)),
(LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
def : Pat<(relaxed_load<atomic_load_16>
def : Pat<(relaxed_load<atomic_load_az_16>
(am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
(LDURHHi GPR64sp:$Rn, simm9:$offset)>;

View File

@ -260,8 +260,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
.maxScalarIf(typeInSet(1, {s128}), 0, s64);
getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))
for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
auto &Actions = getActionDefinitionsBuilder(Op);
if (Op == G_SEXTLOAD)
Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered));
// Atomics have zero extending behavior.
Actions
.legalForTypesWithMemDesc({{s32, p0, s8, 8},
{s32, p0, s16, 8},
{s32, p0, s32, 8},
@ -278,6 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.unsupportedIfMemSizeNotPow2()
// Lower anything left over into G_*EXT and G_LOAD
.lower();
}
auto IsPtrVecPred = [=](const LegalityQuery &Query) {
const LLT &ValTy = Query.Types[0];

View File

@ -161,8 +161,7 @@
# DEBUG-NEXT: G_SEXTLOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_ZEXTLOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: G_ZEXTLOAD (opcode 80): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_INDEXED_LOAD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices

View File

@ -88,12 +88,12 @@ body: |
...
---
name: test_load_s32_atomic
name: test_load_mask_s8_s32_atomic
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: test_load_s32_atomic
; CHECK-LABEL: name: test_load_mask_s8_s32_atomic
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
@ -108,6 +108,49 @@ body: |
$w0 = COPY %3
...
# The mask is equal to the memory size.
---
name: test_load_mask_s16_s16_atomic
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: test_load_mask_s16_s16_atomic
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load seq_cst (s16))
; CHECK-NEXT: $w0 = COPY [[ZEXTLOAD]](s32)
%0:_(p0) = COPY $x0
%1:_(s32) = G_CONSTANT i32 65535
%2:_(s32) = G_LOAD %0 :: (load seq_cst (s16))
%3:_(s32) = G_AND %2, %1
$w0 = COPY %3
...
# The mask is smaller than the memory size which must be preserved, so
# there's little point to folding.
---
name: test_load_mask_s8_s16_atomic
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: test_load_mask_s8_s16_atomic
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load seq_cst (s16))
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
%0:_(p0) = COPY $x0
%1:_(s32) = G_CONSTANT i32 255
%2:_(s32) = G_LOAD %0 :: (load seq_cst (s16))
%3:_(s32) = G_AND %2, %1
$w0 = COPY %3
...
---
name: test_load_mask_size_equals_dst_size
tracksRegLiveness: true
@ -272,13 +315,32 @@ body: |
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile load (s8))
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (volatile load (s8))
; CHECK-NEXT: $w0 = COPY [[ZEXTLOAD]](s32)
%0:_(p0) = COPY $x0
%1:_(s32) = G_CONSTANT i32 255
%2:_(s32) = G_LOAD %0 :: (volatile load (s8))
%3:_(s32) = G_AND %2, %1
$w0 = COPY %3
...
---
name: test_volatile_mask_smaller_mem
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: test_volatile_mask_smaller_mem
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile load (s16))
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
%0:_(p0) = COPY $x0
%1:_(s32) = G_CONSTANT i32 255
%2:_(s32) = G_LOAD %0 :: (volatile load (s16))
%3:_(s32) = G_AND %2, %1
$w0 = COPY %3
...

View File

@ -133,3 +133,163 @@ body: |
RET_ReallyLR implicit $w0
...
---
name: zextload_s32_from_s8_atomic_unordered
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: zextload_s32_from_s8_atomic_unordered
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load unordered (s8))
; CHECK-NEXT: $w0 = COPY [[LDRBBui]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(p0) = COPY $x0
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load unordered (s8))
$w0 = COPY %2
RET_ReallyLR implicit $w0
...
---
name: zextload_s32_from_s8_atomic_monotonic
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: zextload_s32_from_s8_atomic_monotonic
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load monotonic (s8))
; CHECK-NEXT: $w0 = COPY [[LDRBBui]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(p0) = COPY $x0
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load monotonic (s8))
$w0 = COPY %2
RET_ReallyLR implicit $w0
...
---
name: zextload_s32_from_s8_atomic_acquire
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: zextload_s32_from_s8_atomic_acquire
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load acquire (s8))
; CHECK-NEXT: $w0 = COPY [[LDARB]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(p0) = COPY $x0
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load acquire (s8))
$w0 = COPY %2
RET_ReallyLR implicit $w0
...
---
name: zextload_s32_from_s8_atomic_seq_cst
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: zextload_s32_from_s8_atomic_seq_cst
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load seq_cst (s8))
; CHECK-NEXT: $w0 = COPY [[LDARB]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(p0) = COPY $x0
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load seq_cst (s8))
$w0 = COPY %2
RET_ReallyLR implicit $w0
...
---
name: zextload_s32_from_s16_atomic_unordered
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: zextload_s32_from_s16_atomic_unordered
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load unordered (s16))
; CHECK-NEXT: $w0 = COPY [[LDRHHui]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(p0) = COPY $x0
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load unordered (s16))
$w0 = COPY %2
RET_ReallyLR implicit $w0
...
---
name: zextload_s32_from_s16_atomic_monotonic
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: zextload_s32_from_s16_atomic_monotonic
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load monotonic (s16))
; CHECK-NEXT: $w0 = COPY [[LDRHHui]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(p0) = COPY $x0
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load monotonic (s16))
$w0 = COPY %2
RET_ReallyLR implicit $w0
...
---
name: zextload_s32_from_s16_atomic_acquire
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: zextload_s32_from_s16_atomic_acquire
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDARH:%[0-9]+]]:gpr32 = LDARH [[COPY]] :: (load acquire (s16))
; CHECK-NEXT: $w0 = COPY [[LDARH]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(p0) = COPY $x0
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load acquire (s16))
$w0 = COPY %2
RET_ReallyLR implicit $w0
...
---
name: zextload_s32_from_s16_atomic_seq_cst
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: zextload_s32_from_s16_atomic_seq_cst
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDARH:%[0-9]+]]:gpr32 = LDARH [[COPY]] :: (load seq_cst (s16))
; CHECK-NEXT: $w0 = COPY [[LDARH]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(p0) = COPY $x0
%2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load seq_cst (s16))
$w0 = COPY %2
RET_ReallyLR implicit $w0
...

View File

@ -141,10 +141,8 @@ body: |
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1)
; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], %k
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1)
; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_LOAD %0 :: (volatile load (s8), align 1, addrspace 1)
%k:_(s32) = G_CONSTANT i32 255
@ -183,10 +181,8 @@ body: |
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1)
; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], %k
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1)
; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_LOAD %0 :: (volatile load (s16), align 2, addrspace 1)
%k:_(s32) = G_CONSTANT i32 65535

View File

@ -977,12 +977,15 @@ std::string TreePredicateFn::getPredCode() const {
if (isAnyExtLoad())
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
"IsAnyExtLoad requires IsLoad");
if (isSignExtLoad())
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
"IsSignExtLoad requires IsLoad");
if (isZeroExtLoad())
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
"IsZeroExtLoad requires IsLoad");
if (!isAtomic()) {
if (isSignExtLoad())
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
"IsSignExtLoad requires IsLoad or IsAtomic");
if (isZeroExtLoad())
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
"IsZeroExtLoad requires IsLoad or IsAtomic");
}
}
if (isStore()) {
@ -1003,8 +1006,9 @@ std::string TreePredicateFn::getPredCode() const {
if (isAtomic()) {
if (getMemoryVT() == nullptr && !isAtomicOrderingMonotonic() &&
getAddressSpaces() == nullptr &&
!isAtomicOrderingAcquire() && !isAtomicOrderingRelease() &&
!isAtomicOrderingAcquireRelease() &&
// FIXME: Should atomic loads be IsLoad, IsAtomic, or both?
!isZeroExtLoad() && !isSignExtLoad() && !isAtomicOrderingAcquire() &&
!isAtomicOrderingRelease() && !isAtomicOrderingAcquireRelease() &&
!isAtomicOrderingSequentiallyConsistent() &&
!isAtomicOrderingAcquireOrStronger() &&
!isAtomicOrderingReleaseOrStronger() &&
@ -1105,6 +1109,10 @@ std::string TreePredicateFn::getPredCode() const {
Code += "if (isReleaseOrStronger(cast<AtomicSDNode>(N)->getMergedOrdering())) "
"return false;\n";
// TODO: Handle atomic sextload/zextload normally when ATOMIC_LOAD is removed.
if (isAtomic() && (isZeroExtLoad() || isSignExtLoad()))
Code += "return false;\n";
if (isLoad() || isStore()) {
StringRef SDNodeName = isLoad() ? "LoadSDNode" : "StoreSDNode";

View File

@ -3785,10 +3785,12 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
for (const TreePredicateCall &Call : N->getPredicateCalls()) {
const TreePredicateFn &Predicate = Call.Fn;
if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() &&
if (!Equiv.isValueUnset("IfSignExtend") &&
(Predicate.isLoad() || Predicate.isAtomic()) &&
Predicate.isSignExtLoad())
return &Target.getInstruction(Equiv.getValueAsDef("IfSignExtend"));
if (!Equiv.isValueUnset("IfZeroExtend") && Predicate.isLoad() &&
if (!Equiv.isValueUnset("IfZeroExtend") &&
(Predicate.isLoad() || Predicate.isAtomic()) &&
Predicate.isZeroExtLoad())
return &Target.getInstruction(Equiv.getValueAsDef("IfZeroExtend"));
}