From 1ee6ce9bad4d7d61e5c6d37ebd5bfa89b91096c6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 9 Apr 2022 14:06:04 -0400 Subject: [PATCH] GlobalISel: Allow forming atomic/volatile G_ZEXTLOAD SelectionDAG has a target hook, getExtendForAtomicOps, which it uses in the computeKnownBits implementation for ATOMIC_LOAD. This is pretty ugly (as is having a separate load opcode for atomics), so instead allow making use of atomic zextload. Enable this for AArch64 since the DAG path defaults in to the zext behavior. The tablegen changes are pretty ugly, but partially helps migrate SelectionDAG from using ISD::ATOMIC_LOAD to regular ISD::LOAD with atomic memory operands. For now the DAG emitter will emit matchers for patterns which the DAG will not produce. I'm still a bit confused by the intent of the isLoad/isStore/isAtomic bits. The DAG implementation rejects trying to use any of these in combination. For now I've opted to make the isLoad checks also check isAtomic, although I think having isLoad and isAtomic set on these makes most sense. --- llvm/docs/GlobalISel/GenericOpcode.rst | 4 + .../Target/GlobalISel/SelectionDAGCompat.td | 2 + .../include/llvm/Target/TargetSelectionDAG.td | 50 ++++++ .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 22 ++- .../lib/Target/AArch64/AArch64InstrAtomics.td | 38 ++--- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 12 +- .../GlobalISel/legalizer-info-validation.mir | 3 +- .../prelegalizer-combiner-load-and-mask.mir | 74 +++++++- .../AArch64/GlobalISel/select-zextload.mir | 160 ++++++++++++++++++ ...stlegalizer-combiner-zextload-from-and.mir | 12 +- llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 24 ++- llvm/utils/TableGen/GlobalISelEmitter.cpp | 6 +- 12 files changed, 353 insertions(+), 54 deletions(-) diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index fdf0b7b4732a..3c07a85b9496 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -684,6 +684,10 @@ Only G_LOAD is valid if the result is a vector type. If the result is larger than the memory size, the high elements are undefined (i.e. this is not a per-element, vector anyextload) +Unlike in SelectionDAG, atomic loads are expressed with the same +opcodes as regular loads. G_LOAD, G_SEXTLOAD and G_ZEXTLOAD may all +have atomic memory operands. + G_INDEXED_LOAD ^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index b1a07e873878..ef4fc85b245d 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -184,6 +184,8 @@ def : GINodeEquiv { let CheckMMOIsNonAtomic = true; } def : GINodeEquiv { let CheckMMOIsNonAtomic = false; let CheckMMOIsAtomic = true; + let IfSignExtend = G_SEXTLOAD; + let IfZeroExtend = G_ZEXTLOAD; } // Operands are swapped for atomic_store vs. regular store diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 0f796a30d571..171fdb1b98e0 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1627,18 +1627,34 @@ defm atomic_load_umax : binary_atomic_op; defm atomic_store : binary_atomic_op; defm atomic_cmp_swap : ternary_atomic_op; +/// Atomic load which zeroes the excess high bits. +def atomic_load_zext : + PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let IsZeroExtLoad = true; +} + +/// Atomic load which sign extends the excess high bits. +def atomic_load_sext : + PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let IsSignExtLoad = true; +} + def atomic_load_8 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; let MemoryVT = i8; } + def atomic_load_16 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; let MemoryVT = i16; } + def atomic_load_32 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { @@ -1652,6 +1668,40 @@ def atomic_load_64 : let MemoryVT = i64; } +def atomic_load_zext_8 : + PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i8; +} + +def atomic_load_zext_16 : + PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i16; +} + +def atomic_load_sext_8 : + PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i8; +} + +def atomic_load_sext_16 : + PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i16; +} + +// Atomic load which zeroes or anyextends the high bits. +def atomic_load_az_8 : PatFrags<(ops node:$op), + [(atomic_load_8 node:$op), + (atomic_load_zext_8 node:$op)]>; + +// Atomic load which zeroes or anyextends the high bits. +def atomic_load_az_16 : PatFrags<(ops node:$op), + [(atomic_load_16 node:$op), + (atomic_load_zext_16 node:$op)]>; + def nonext_masked_gather : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 2c94f87804ac..8a0579d25b16 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -698,13 +698,13 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, Register SrcReg = MI.getOperand(1).getReg(); GAnyLoad *LoadMI = getOpcodeDef(SrcReg, MRI); - if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) || - !LoadMI->isSimple()) + if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg())) return false; Register LoadReg = LoadMI->getDstReg(); - LLT LoadTy = MRI.getType(LoadReg); + LLT RegTy = MRI.getType(LoadReg); Register PtrReg = LoadMI->getPointerReg(); + unsigned RegSize = RegTy.getSizeInBits(); uint64_t LoadSizeBits = LoadMI->getMemSizeInBits(); unsigned MaskSizeBits = MaskVal.countTrailingOnes(); @@ -715,7 +715,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, // If the mask covers the whole destination register, there's nothing to // extend - if (MaskSizeBits >= LoadTy.getSizeInBits()) + if (MaskSizeBits >= RegSize) return false; // Most targets cannot deal with loads of size < 8 and need to re-legalize to @@ -725,17 +725,25 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, const MachineMemOperand &MMO = LoadMI->getMMO(); LegalityQuery::MemDesc MemDesc(MMO); - MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); + + // Don't modify the memory access size if this is atomic/volatile, but we can + // still adjust the opcode to indicate the high bit behavior. + if (LoadMI->isSimple()) + MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); + else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize) + return false; + if (!isLegalOrBeforeLegalizer( - {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}})) + {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}})) return false; MatchInfo = [=](MachineIRBuilder &B) { B.setInstrAndDebugLoc(*LoadMI); auto &MF = B.getMF(); auto PtrInfo = MMO.getPointerInfo(); - auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy); B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO); + LoadMI->eraseFromParent(); }; return true; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td index c477a44b13b2..6839e73796a6 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -29,21 +29,21 @@ def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>; // An atomic load operation that does not need either acquire or release // semantics. -class relaxed_load +class relaxed_load : PatFrag<(ops node:$ptr), (base node:$ptr)> { let IsAtomic = 1; let IsAtomicOrderingAcquireOrStronger = 0; } // A atomic load operation that actually needs acquire semantics. -class acquiring_load +class acquiring_load : PatFrag<(ops node:$ptr), (base node:$ptr)> { let IsAtomic = 1; let IsAtomicOrderingAcquire = 1; } // An atomic load operation that needs sequential consistency. -class seq_cst_load +class seq_cst_load : PatFrag<(ops node:$ptr), (base node:$ptr)> { let IsAtomic = 1; let IsAtomicOrderingSequentiallyConsistent = 1; @@ -63,34 +63,34 @@ let Predicates = [HasLDAPR] in { } // 8-bit loads -def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; -def : Pat<(relaxed_load (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, +def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)), (LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>; -def : Pat<(relaxed_load (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend8:$offset)), +def : Pat<(relaxed_load (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend8:$offset)), (LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>; -def : Pat<(relaxed_load (am_indexed8 GPR64sp:$Rn, - uimm12s1:$offset)), +def : Pat<(relaxed_load (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; -def : Pat<(relaxed_load +def : Pat<(relaxed_load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), (LDURBBi GPR64sp:$Rn, simm9:$offset)>; // 16-bit loads -def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; -def : Pat<(relaxed_load (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, +def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)), (LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; -def : Pat<(relaxed_load (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend)), +def : Pat<(relaxed_load (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), (LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; -def : Pat<(relaxed_load (am_indexed16 GPR64sp:$Rn, - uimm12s2:$offset)), +def : Pat<(relaxed_load (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; -def : Pat<(relaxed_load +def : Pat<(relaxed_load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), (LDURHHi GPR64sp:$Rn, simm9:$offset)>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 74ec9373ce9e..10f85103e468 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -260,8 +260,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32) .maxScalarIf(typeInSet(1, {s128}), 0, s64); - getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) - .lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)) + + for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) { + auto &Actions = getActionDefinitionsBuilder(Op); + + if (Op == G_SEXTLOAD) + Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)); + + // Atomics have zero extending behavior. + Actions .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8}, @@ -278,6 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .unsupportedIfMemSizeNotPow2() // Lower anything left over into G_*EXT and G_LOAD .lower(); + } auto IsPtrVecPred = [=](const LegalityQuery &Query) { const LLT &ValTy = Query.Types[0]; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 8d5fde6927ed..db5a4c0a684c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -161,8 +161,7 @@ # DEBUG-NEXT: G_SEXTLOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: G_ZEXTLOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: G_ZEXTLOAD (opcode 80): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_INDEXED_LOAD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-and-mask.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-and-mask.mir index e7fef25465af..a284483765ad 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-and-mask.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-and-mask.mir @@ -88,12 +88,12 @@ body: | ... --- -name: test_load_s32_atomic +name: test_load_mask_s8_s32_atomic tracksRegLiveness: true body: | bb.0: liveins: $x0 - ; CHECK-LABEL: name: test_load_s32_atomic + ; CHECK-LABEL: name: test_load_mask_s8_s32_atomic ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 @@ -108,6 +108,49 @@ body: | $w0 = COPY %3 ... +# The mask is equal to the memory size. +--- +name: test_load_mask_s16_s16_atomic +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: test_load_mask_s16_s16_atomic + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load seq_cst (s16)) + ; CHECK-NEXT: $w0 = COPY [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_CONSTANT i32 65535 + %2:_(s32) = G_LOAD %0 :: (load seq_cst (s16)) + %3:_(s32) = G_AND %2, %1 + $w0 = COPY %3 +... + +# The mask is smaller than the memory size which must be preserved, so +# there's little point to folding. +--- +name: test_load_mask_s8_s16_atomic +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: test_load_mask_s8_s16_atomic + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load seq_cst (s16)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_CONSTANT i32 255 + %2:_(s32) = G_LOAD %0 :: (load seq_cst (s16)) + %3:_(s32) = G_AND %2, %1 + $w0 = COPY %3 +... + --- name: test_load_mask_size_equals_dst_size tracksRegLiveness: true @@ -272,13 +315,32 @@ body: | ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile load (s8)) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CHECK-NEXT: $w0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (volatile load (s8)) + ; CHECK-NEXT: $w0 = COPY [[ZEXTLOAD]](s32) %0:_(p0) = COPY $x0 %1:_(s32) = G_CONSTANT i32 255 %2:_(s32) = G_LOAD %0 :: (volatile load (s8)) %3:_(s32) = G_AND %2, %1 $w0 = COPY %3 ... + +--- +name: test_volatile_mask_smaller_mem +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: test_volatile_mask_smaller_mem + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile load (s16)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_CONSTANT i32 255 + %2:_(s32) = G_LOAD %0 :: (volatile load (s16)) + %3:_(s32) = G_AND %2, %1 + $w0 = COPY %3 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir index 4e746ed08e7f..5b36d7ae5c91 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir @@ -133,3 +133,163 @@ body: | RET_ReallyLR implicit $w0 ... + +--- +name: zextload_s32_from_s8_atomic_unordered +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s8_atomic_unordered + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load unordered (s8)) + ; CHECK-NEXT: $w0 = COPY [[LDRBBui]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load unordered (s8)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s8_atomic_monotonic +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s8_atomic_monotonic + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load monotonic (s8)) + ; CHECK-NEXT: $w0 = COPY [[LDRBBui]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load monotonic (s8)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s8_atomic_acquire +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s8_atomic_acquire + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load acquire (s8)) + ; CHECK-NEXT: $w0 = COPY [[LDARB]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load acquire (s8)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s8_atomic_seq_cst +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s8_atomic_seq_cst + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load seq_cst (s8)) + ; CHECK-NEXT: $w0 = COPY [[LDARB]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load seq_cst (s8)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s16_atomic_unordered +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s16_atomic_unordered + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load unordered (s16)) + ; CHECK-NEXT: $w0 = COPY [[LDRHHui]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load unordered (s16)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s16_atomic_monotonic +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s16_atomic_monotonic + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load monotonic (s16)) + ; CHECK-NEXT: $w0 = COPY [[LDRHHui]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load monotonic (s16)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s16_atomic_acquire +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s16_atomic_acquire + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDARH:%[0-9]+]]:gpr32 = LDARH [[COPY]] :: (load acquire (s16)) + ; CHECK-NEXT: $w0 = COPY [[LDARH]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load acquire (s16)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s16_atomic_seq_cst +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s16_atomic_seq_cst + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDARH:%[0-9]+]]:gpr32 = LDARH [[COPY]] :: (load seq_cst (s16)) + ; CHECK-NEXT: $w0 = COPY [[LDARH]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load seq_cst (s16)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir index 0b55aec8bef9..73e06de1923d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir @@ -141,10 +141,8 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1) - ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], %k - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (volatile load (s8), align 1, addrspace 1) %k:_(s32) = G_CONSTANT i32 255 @@ -183,10 +181,8 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1) - ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], %k - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (volatile load (s16), align 2, addrspace 1) %k:_(s32) = G_CONSTANT i32 65535 diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index 0f37875a3196..c15728ac7d23 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -977,12 +977,15 @@ std::string TreePredicateFn::getPredCode() const { if (isAnyExtLoad()) PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), "IsAnyExtLoad requires IsLoad"); - if (isSignExtLoad()) - PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), - "IsSignExtLoad requires IsLoad"); - if (isZeroExtLoad()) - PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), - "IsZeroExtLoad requires IsLoad"); + + if (!isAtomic()) { + if (isSignExtLoad()) + PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), + "IsSignExtLoad requires IsLoad or IsAtomic"); + if (isZeroExtLoad()) + PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), + "IsZeroExtLoad requires IsLoad or IsAtomic"); + } } if (isStore()) { @@ -1003,8 +1006,9 @@ std::string TreePredicateFn::getPredCode() const { if (isAtomic()) { if (getMemoryVT() == nullptr && !isAtomicOrderingMonotonic() && getAddressSpaces() == nullptr && - !isAtomicOrderingAcquire() && !isAtomicOrderingRelease() && - !isAtomicOrderingAcquireRelease() && + // FIXME: Should atomic loads be IsLoad, IsAtomic, or both? + !isZeroExtLoad() && !isSignExtLoad() && !isAtomicOrderingAcquire() && + !isAtomicOrderingRelease() && !isAtomicOrderingAcquireRelease() && !isAtomicOrderingSequentiallyConsistent() && !isAtomicOrderingAcquireOrStronger() && !isAtomicOrderingReleaseOrStronger() && @@ -1105,6 +1109,10 @@ std::string TreePredicateFn::getPredCode() const { Code += "if (isReleaseOrStronger(cast(N)->getMergedOrdering())) " "return false;\n"; + // TODO: Handle atomic sextload/zextload normally when ATOMIC_LOAD is removed. + if (isAtomic() && (isZeroExtLoad() || isSignExtLoad())) + Code += "return false;\n"; + if (isLoad() || isStore()) { StringRef SDNodeName = isLoad() ? "LoadSDNode" : "StoreSDNode"; diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index f92e19736295..123db21af8d4 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -3785,10 +3785,12 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const { for (const TreePredicateCall &Call : N->getPredicateCalls()) { const TreePredicateFn &Predicate = Call.Fn; - if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() && + if (!Equiv.isValueUnset("IfSignExtend") && + (Predicate.isLoad() || Predicate.isAtomic()) && Predicate.isSignExtLoad()) return &Target.getInstruction(Equiv.getValueAsDef("IfSignExtend")); - if (!Equiv.isValueUnset("IfZeroExtend") && Predicate.isLoad() && + if (!Equiv.isValueUnset("IfZeroExtend") && + (Predicate.isLoad() || Predicate.isAtomic()) && Predicate.isZeroExtLoad()) return &Target.getInstruction(Equiv.getValueAsDef("IfZeroExtend")); }