GlobalISel: Lower non-byte loads and stores

Previously we didn't preserve the memory type and had to blindly
interpret a number of bytes. Now that non-byte memory accesses are
representable, we can handle these correctly.

Ported from DAG version (minus some weird special case i1 legality
checking which I don't fully understand, and we don't have a way to
query for)

For now, this is NFC and the test changes are placeholders. Since the
legality queries are still relying on byte-flattened memory sizes, the
legalizer can't actually see these non-byte accesses. This keeps this
change self contained without merging it with the larger patch to
switch to LLT memory queries.
This commit is contained in:
Matt Arsenault 2021-06-08 17:11:12 -04:00
parent 748e0b07dc
commit a601b308d9
3 changed files with 268 additions and 7 deletions

View File

@ -2762,7 +2762,51 @@ LegalizerHelper::lowerLoad(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
Register PtrReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
auto &MMO = **MI.memoperands_begin();
MachineMemOperand &MMO = **MI.memoperands_begin();
LLT MemTy = MMO.getMemoryType();
MachineFunction &MF = MIRBuilder.getMF();
if (MemTy.isVector())
return UnableToLegalize;
unsigned MemSizeInBits = MemTy.getSizeInBits();
unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
if (MemSizeInBits != MemStoreSizeInBits) {
// Promote to a byte-sized load if not loading an integral number of
// bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
MachineMemOperand *NewMMO =
MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
Register LoadReg = DstReg;
LLT LoadTy = DstTy;
// If this wasn't already an extending load, we need to widen the result
// register to avoid creating a load with a narrower result than the source.
if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
LoadTy = WideMemTy;
LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
}
if (MI.getOpcode() == TargetOpcode::G_SEXTLOAD) {
auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
} else if (MI.getOpcode() == TargetOpcode::G_ZEXTLOAD ||
WideMemTy == DstTy) {
auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
// The extra bits are guaranteed to be zero, since we stored them that
// way. A zext load from Wide thus automatically gives zext from MemVT.
MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
} else {
MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
}
if (DstTy != LoadTy)
MIRBuilder.buildTrunc(DstReg, LoadReg);
MI.eraseFromParent();
return Legalized;
}
if (DstTy.getSizeInBits() != MMO.getSizeInBits())
return UnableToLegalize;
@ -2831,20 +2875,46 @@ LegalizerHelper::lowerStore(MachineInstr &MI) {
Register SrcReg = MI.getOperand(0).getReg();
Register PtrReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
MachineFunction &MF = MIRBuilder.getMF();
MachineMemOperand &MMO = **MI.memoperands_begin();
if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
return UnableToLegalize;
LLT MemTy = MMO.getMemoryType();
if (SrcTy.isVector())
return UnableToLegalize;
if (isPowerOf2_32(SrcTy.getSizeInBits()))
unsigned StoreWidth = MemTy.getSizeInBits();
unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
if (StoreWidth != StoreSizeInBits) {
// Promote to a byte-sized store with upper bits zero if not
// storing an integral number of bytes. For example, promote
// TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
LLT WideTy = LLT::scalar(StoreSizeInBits);
if (StoreSizeInBits > SrcTy.getSizeInBits()) {
// Avoid creating a store with a narrower source than result.
SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
SrcTy = WideTy;
}
auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
MachineMemOperand *NewMMO =
MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
MI.eraseFromParent();
return Legalized;
}
if (isPowerOf2_32(MemTy.getSizeInBits()))
return UnableToLegalize; // Don't know what we're being asked to do.
// Extend to the next pow-2.
const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
const LLT ExtendTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits()));
auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
// Obtain the smaller value by shifting away the larger value.
uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
@ -2857,7 +2927,6 @@ LegalizerHelper::lowerStore(MachineInstr &MI) {
auto SmallPtr =
MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
MachineFunction &MF = MIRBuilder.getMF();
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
MachineMemOperand *SmallMMO =

View File

@ -6,6 +6,7 @@
# FIXME: Run with and without unaligned access turned on
# ERR-NOT: remark
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s32) = G_SEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_sextload_global_i32_i24)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i16_from_v2s8)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s8)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s16)
@ -14,6 +15,100 @@
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s128) = G_SEXTLOAD %0:_(p1) :: (load (s64), addrspace 1) (in function: test_sextload_global_s128_8)
# ERR-NOT: remark
---
name: test_sextload_global_i32_i1
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX8-LABEL: name: test_sextload_global_i32_i1
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1)
; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
; GFX6-LABEL: name: test_sextload_global_i32_i1
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1)
; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_SEXTLOAD %0 :: (load (s1), addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_sextload_global_i32_i7
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX8-LABEL: name: test_sextload_global_i32_i7
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1)
; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
; GFX6-LABEL: name: test_sextload_global_i32_i7
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1)
; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_SEXTLOAD %0 :: (load (s7), addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_sextload_global_i32_i24
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX8-LABEL: name: test_sextload_global_i32_i24
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
; GFX6-LABEL: name: test_sextload_global_i32_i24
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_SEXTLOAD %0 :: (load (s24), addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_sextload_global_i32_i30
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX8-LABEL: name: test_sextload_global_i32_i30
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1)
; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
; GFX6-LABEL: name: test_sextload_global_i32_i30
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1)
; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_SEXTLOAD %0 :: (load (s30), addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_sextload_global_i32_i31
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX8-LABEL: name: test_sextload_global_i32_i31
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1)
; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
; GFX6-LABEL: name: test_sextload_global_i32_i31
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1)
; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_SEXTLOAD %0 :: (load (s31), addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_sextload_global_i32_i8
body: |
@ -32,6 +127,7 @@ body: |
%1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_sextload_global_i32_i16
body: |

View File

@ -6,6 +6,7 @@
# FIXME: Run with and without unaligned access turned on
# ERR-NOT: remark
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s32) = G_ZEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_zextload_global_i32_i24)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i16_from_2)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i32_from_2)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s32), addrspace 1) (in function: test_zextload_global_v2i32_from_4)
@ -14,6 +15,101 @@
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s128) = G_ZEXTLOAD %0:_(p1) :: (load (s64), addrspace 1) (in function: test_zextload_global_s128_8)
# ERR-NOT: remark
---
name: test_zextload_global_i32_i1
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX8-LABEL: name: test_zextload_global_i32_i1
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1)
; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
; GFX6-LABEL: name: test_zextload_global_i32_i1
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1)
; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s1), addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_zextload_global_i32_i7
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX8-LABEL: name: test_zextload_global_i32_i7
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1)
; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
; GFX6-LABEL: name: test_zextload_global_i32_i7
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1)
; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s7), addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_zextload_global_i32_i24
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX8-LABEL: name: test_zextload_global_i32_i24
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
; GFX6-LABEL: name: test_zextload_global_i32_i24
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s24), addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_zextload_global_i32_i30
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX8-LABEL: name: test_zextload_global_i32_i30
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1)
; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
; GFX6-LABEL: name: test_zextload_global_i32_i30
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1)
; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s30), addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_zextload_global_i32_i31
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX8-LABEL: name: test_zextload_global_i32_i31
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1)
; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
; GFX6-LABEL: name: test_zextload_global_i32_i31
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1)
; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s31), addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_zextload_global_i32_i8
body: |