[AArch64][GlobalISel] Legalization and ISel support for load/stores of vectors of pointers.

Loads and store of values with type like <2 x p0> currently don't get imported
because SelectionDAG has no knowledge of pointer types. To leverage the existing
support for vector load/stores, we can bitcast the value to have s64 element
types instead. We do this as a custom legalization.

This patch also adds support for general loads of <2 x s64>, and relaxes some
type conditions on selecting G_BITCAST.

Differential Revision: https://reviews.llvm.org/D60534

llvm-svn: 358221
This commit is contained in:
Amara Emerson 2019-04-11 20:32:24 +00:00
parent 994023a3f1
commit b956051415
8 changed files with 260 additions and 25 deletions

View File

@ -1658,11 +1658,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_BITCAST:
// Imported SelectionDAG rules can handle every bitcast except those that
// bitcast from a type to the same type. Ideally, these shouldn't occur
// but we might not run an optimizer that deletes them.
if (MRI.getType(I.getOperand(0).getReg()) ==
MRI.getType(I.getOperand(1).getReg()))
// but we might not run an optimizer that deletes them. The other exception
// is bitcasts involving pointer types, as SelectionDAG has no knowledge
// of them.
return selectCopy(I, TII, MRI, TRI, RBI);
return false;
case TargetOpcode::G_SELECT: {
if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {

View File

@ -21,6 +21,8 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
#define DEBUG_TYPE "aarch64-legalinfo"
using namespace llvm;
using namespace LegalizeActions;
using namespace LegalizeMutations;
@ -208,13 +210,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
// Lower anything left over into G_*EXT and G_LOAD
.lower();
auto IsPtrVecPred = [=](const LegalityQuery &Query) {
const LLT &ValTy = Query.Types[0];
if (!ValTy.isVector())
return false;
const LLT EltTy = ValTy.getElementType();
return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
};
getActionDefinitionsBuilder(G_LOAD)
.legalForTypesWithMemDesc({{s8, p0, 8, 8},
{s16, p0, 16, 8},
{s32, p0, 32, 8},
{s64, p0, 64, 8},
{p0, p0, 64, 8},
{v2s32, p0, 64, 8}})
{v2s32, p0, 64, 8},
{v2s64, p0, 128, 8}})
// These extends are also legal
.legalForTypesWithMemDesc({{s32, p0, 8, 8},
{s32, p0, 16, 8}})
@ -228,7 +239,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
})
.clampMaxNumElements(0, s32, 2)
.clampMaxNumElements(0, s64, 1);
.clampMaxNumElements(0, s64, 1)
.customIf(IsPtrVecPred);
getActionDefinitionsBuilder(G_STORE)
.legalForTypesWithMemDesc({{s8, p0, 8, 8},
@ -248,7 +260,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
})
.clampMaxNumElements(0, s32, 2)
.clampMaxNumElements(0, s64, 1);
.clampMaxNumElements(0, s64, 1)
.customIf(IsPtrVecPred);
// Constants
getActionDefinitionsBuilder(G_CONSTANT)
@ -357,7 +370,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
// number of bits but it's what the previous code described and fixing
// it breaks tests.
.legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
v8s16, v4s16, v2s16, v4s32, v2s32, v2s64});
v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
v2p0});
getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
@ -541,11 +555,53 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
return false;
case TargetOpcode::G_VAARG:
return legalizeVaArg(MI, MRI, MIRBuilder);
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
}
llvm_unreachable("expected switch to return");
}
bool AArch64LegalizerInfo::legalizeLoadStore(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const {
assert(MI.getOpcode() == TargetOpcode::G_STORE ||
MI.getOpcode() == TargetOpcode::G_LOAD);
// Here we just try to handle vector loads/stores where our value type might
// have pointer elements, which the SelectionDAG importer can't handle. To
// allow the existing patterns for s64 to fire for p0, we just try to bitcast
// the value to use s64 types.
// Custom legalization requires the instruction, if not deleted, must be fully
// legalized. In order to allow further legalization of the inst, we create
// a new instruction and erase the existing one.
unsigned ValReg = MI.getOperand(0).getReg();
const LLT ValTy = MRI.getType(ValReg);
if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
ValTy.getElementType().getAddressSpace() != 0) {
LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
return false;
}
MIRBuilder.setInstr(MI);
unsigned PtrSize = ValTy.getElementType().getSizeInBits();
const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
auto &MMO = **MI.memoperands_begin();
if (MI.getOpcode() == TargetOpcode::G_STORE) {
auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
} else {
unsigned NewReg = MRI.createGenericVirtualRegister(NewTy);
auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
MIRBuilder.buildBitcast({ValReg}, {NewLoad});
}
MI.eraseFromParent();
return true;
}
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {

View File

@ -34,6 +34,9 @@ public:
private:
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
bool legalizeLoadStore(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const;
};
} // End llvm namespace.
#endif

View File

@ -158,7 +158,7 @@ end:
br label %block
}
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %2:_(<2 x p0>), %1:_(p0) :: (store 16 into `<2 x i16*>* undef`) (in function: vector_of_pointers_insertelement)
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %2:_(<2 x p0>) = G_INSERT_VECTOR_ELT %0:_, %3:_(p0), %5:_(s32) (in function: vector_of_pointers_insertelement)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_insertelement
; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_insertelement:
define void @vector_of_pointers_insertelement() {

View File

@ -38,12 +38,8 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 16)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64)
; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY1]](p0) :: (store 16)
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16)
; CHECK: G_STORE [[LOAD]](<2 x s64>), [[COPY1]](p0) :: (store 16)
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16)

View File

@ -0,0 +1,86 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -march=aarch64 -run-pass=legalizer %s -o - | FileCheck %s
--- |
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
define void @store_v2p0(<2 x i8*> %v, <2 x i8*>* %ptr) {
store <2 x i8*> %v, <2 x i8*>* %ptr
ret void
}
define <2 x i8*> @load_v2p0(<2 x i8*>* %ptr) {
%v = load <2 x i8*>, <2 x i8*>* %ptr
ret <2 x i8*> %v
}
define void @load_v2p1(<2 x i8*>* %ptr) { ret void }
...
---
name: store_v2p0
alignment: 2
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.1 (%ir-block.0):
liveins: $q0, $x0
; CHECK-LABEL: name: store_v2p0
; CHECK: liveins: $q0, $x0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<2 x p0>)
; CHECK: G_STORE [[BITCAST]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.ptr)
; CHECK: RET_ReallyLR
%0:_(<2 x p0>) = COPY $q0
%1:_(p0) = COPY $x0
G_STORE %0(<2 x p0>), %1(p0) :: (store 16 into %ir.ptr)
RET_ReallyLR
...
---
name: load_v2p0
alignment: 2
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.1 (%ir-block.0):
liveins: $x0
; CHECK-LABEL: name: load_v2p0
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.ptr)
; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD]](<2 x s64>)
; CHECK: $q0 = COPY [[BITCAST]](<2 x p0>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(p0) = COPY $x0
%1:_(<2 x p0>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr)
$q0 = COPY %1(<2 x p0>)
RET_ReallyLR implicit $q0
...
---
name: load_v2p1
alignment: 2
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.1 (%ir-block.0):
liveins: $x0
; Check that we don't try to bitcast vectors of pointers w/ non-zero addrspaces.
; CHECK-LABEL: name: load_v2p1
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.ptr)
; CHECK: $q0 = COPY [[LOAD]](<2 x p1>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(p0) = COPY $x0
%1:_(<2 x p1>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr)
$q0 = COPY %1(<2 x p1>)
RET_ReallyLR implicit $q0
...

View File

@ -0,0 +1,73 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-- -O0 -run-pass=instruction-select -verify-machineinstrs %s -global-isel-abort=1 -o - | FileCheck %s
--- |
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
define void @store_v2p0(<2 x i8*> %v, <2 x i8*>* %ptr) {
store <2 x i8*> %v, <2 x i8*>* %ptr
ret void
}
define <2 x i8*> @load_v2p0(<2 x i8*>* %ptr) {
%v = load <2 x i8*>, <2 x i8*>* %ptr
ret <2 x i8*> %v
}
...
---
name: store_v2p0
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
- { id: 2, class: fpr }
machineFunctionInfo: {}
body: |
bb.1 (%ir-block.0):
liveins: $q0, $x0
; CHECK-LABEL: name: store_v2p0
; CHECK: liveins: $q0, $x0
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: STRQui [[COPY]], [[COPY1]], 0 :: (store 16 into %ir.ptr)
; CHECK: RET_ReallyLR
%0:fpr(<2 x p0>) = COPY $q0
%1:gpr(p0) = COPY $x0
%2:fpr(<2 x s64>) = G_BITCAST %0(<2 x p0>)
G_STORE %2(<2 x s64>), %1(p0) :: (store 16 into %ir.ptr)
RET_ReallyLR
...
---
name: load_v2p0
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
- { id: 2, class: fpr }
machineFunctionInfo: {}
body: |
bb.1 (%ir-block.0):
liveins: $x0
; CHECK-LABEL: name: load_v2p0
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16 from %ir.ptr)
; CHECK: $q0 = COPY [[LDRQui]]
; CHECK: RET_ReallyLR implicit $q0
%0:gpr(p0) = COPY $x0
%2:fpr(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr)
%1:fpr(<2 x p0>) = G_BITCAST %2(<2 x s64>)
$q0 = COPY %1(<2 x p0>)
RET_ReallyLR implicit $q0
...

View File

@ -32,6 +32,7 @@
define void @load_gep_32_s8_fpr(i8* %addr) { ret void }
define void @load_v2s32(i64 *%addr) { ret void }
define void @load_v2s64(i64 *%addr) { ret void }
...
---
@ -112,8 +113,8 @@ body: |
; CHECK-LABEL: name: load_s16_gpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr)
; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
; CHECK: $w0 = COPY [[T0]]
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
; CHECK: $w0 = COPY [[COPY1]]
%0(p0) = COPY $x0
%1(s16) = G_LOAD %0 :: (load 2 from %ir.addr)
%2:gpr(s32) = G_ANYEXT %1
@ -129,7 +130,7 @@ body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: load_s8_gpr
; CHECK-LABEL: name: load_s8_gpr_anyext
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1 from %ir.addr)
; CHECK: $w0 = COPY [[LDRBBui]]
@ -154,8 +155,8 @@ body: |
; CHECK-LABEL: name: load_s8_gpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1 from %ir.addr)
; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
; CHECK: $w0 = COPY [[T0]]
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
; CHECK: $w0 = COPY [[COPY1]]
%0(p0) = COPY $x0
%1(s8) = G_LOAD %0 :: (load 1 from %ir.addr)
%2:gpr(s32) = G_ANYEXT %1
@ -256,8 +257,8 @@ body: |
; CHECK-LABEL: name: load_gep_64_s16_gpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 32 :: (load 2 from %ir.addr)
; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
; CHECK: $w0 = COPY [[T0]]
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
; CHECK: $w0 = COPY [[COPY1]]
%0(p0) = COPY $x0
%1(s64) = G_CONSTANT i64 64
%2(p0) = G_GEP %0, %1
@ -284,8 +285,8 @@ body: |
; CHECK-LABEL: name: load_gep_1_s8_gpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 1 :: (load 1 from %ir.addr)
; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
; CHECK: $w0 = COPY [[T0]]
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
; CHECK: $w0 = COPY [[COPY1]]
%0(p0) = COPY $x0
%1(s64) = G_CONSTANT i64 1
%2(p0) = G_GEP %0, %1
@ -506,3 +507,24 @@ body: |
%1(<2 x s32>) = G_LOAD %0 :: (load 8 from %ir.addr)
$d0 = COPY %1(<2 x s32>)
...
---
name: load_v2s64
legalized: true
regBankSelected: true
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: load_v2s64
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16 from %ir.addr)
; CHECK: $q0 = COPY [[LDRQui]]
%0(p0) = COPY $x0
%1(<2 x s64>) = G_LOAD %0 :: (load 16 from %ir.addr)
$q0 = COPY %1(<2 x s64>)
...