forked from OSchip/llvm-project
[AArch64]Add support to spill/fill D tuples such as DPair/DTriple/DQuad. There is no test cases for D tuple as the original test cases are too large. As the spill/fill of the D tuple is similar to the Q tuple, the correctness can be guaranteed.
llvm-svn: 198684
This commit is contained in:
parent
e42fd0d34d
commit
7d11d99d20
|
@ -477,12 +477,18 @@ AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||
default:
|
||||
llvm_unreachable("Unknown size for regclass");
|
||||
}
|
||||
} else { // The spill of D tuples is implemented by Q tuples
|
||||
if (RC == &AArch64::QPairRegClass)
|
||||
} else { // For a super register class has more than one sub registers
|
||||
if (AArch64::DPairRegClass.hasSubClassEq(RC))
|
||||
StoreOp = AArch64::ST1x2_8B;
|
||||
else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
|
||||
StoreOp = AArch64::ST1x3_8B;
|
||||
else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
|
||||
StoreOp = AArch64::ST1x4_8B;
|
||||
else if (AArch64::QPairRegClass.hasSubClassEq(RC))
|
||||
StoreOp = AArch64::ST1x2_16B;
|
||||
else if (RC == &AArch64::QTripleRegClass)
|
||||
else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
|
||||
StoreOp = AArch64::ST1x3_16B;
|
||||
else if (RC == &AArch64::QQuadRegClass)
|
||||
else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
|
||||
StoreOp = AArch64::ST1x4_16B;
|
||||
else
|
||||
llvm_unreachable("Unknown reg class");
|
||||
|
@ -537,12 +543,18 @@ AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||
default:
|
||||
llvm_unreachable("Unknown size for regclass");
|
||||
}
|
||||
} else { // The spill of D tuples is implemented by Q tuples
|
||||
if (RC == &AArch64::QPairRegClass)
|
||||
} else { // For a super register class has more than one sub registers
|
||||
if (AArch64::DPairRegClass.hasSubClassEq(RC))
|
||||
LoadOp = AArch64::LD1x2_8B;
|
||||
else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
|
||||
LoadOp = AArch64::LD1x3_8B;
|
||||
else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
|
||||
LoadOp = AArch64::LD1x4_8B;
|
||||
else if (AArch64::QPairRegClass.hasSubClassEq(RC))
|
||||
LoadOp = AArch64::LD1x2_16B;
|
||||
else if (RC == &AArch64::QTripleRegClass)
|
||||
else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
|
||||
LoadOp = AArch64::LD1x3_16B;
|
||||
else if (RC == &AArch64::QQuadRegClass)
|
||||
else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
|
||||
LoadOp = AArch64::LD1x4_16B;
|
||||
else
|
||||
llvm_unreachable("Unknown reg class");
|
||||
|
@ -649,6 +661,17 @@ void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
|
|||
MinOffset = -0x40 * AccessScale;
|
||||
MaxOffset = 0x3f * AccessScale;
|
||||
return;
|
||||
case AArch64::LD1x2_8B: case AArch64::ST1x2_8B:
|
||||
AccessScale = 16;
|
||||
MinOffset = 0;
|
||||
MaxOffset = 0xfff * AccessScale;
|
||||
return;
|
||||
case AArch64::LD1x3_8B: case AArch64::ST1x3_8B:
|
||||
AccessScale = 24;
|
||||
MinOffset = 0;
|
||||
MaxOffset = 0xfff * AccessScale;
|
||||
return;
|
||||
case AArch64::LD1x4_8B: case AArch64::ST1x4_8B:
|
||||
case AArch64::LD1x2_16B: case AArch64::ST1x2_16B:
|
||||
AccessScale = 32;
|
||||
MinOffset = 0;
|
||||
|
|
|
@ -77,7 +77,10 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||
}
|
||||
|
||||
static bool hasFrameOffset(int opcode) {
|
||||
return opcode != AArch64::LD1x2_16B && opcode != AArch64::LD1x3_16B &&
|
||||
return opcode != AArch64::LD1x2_8B && opcode != AArch64::LD1x3_8B &&
|
||||
opcode != AArch64::LD1x4_8B && opcode != AArch64::ST1x2_8B &&
|
||||
opcode != AArch64::ST1x3_8B && opcode != AArch64::ST1x4_8B &&
|
||||
opcode != AArch64::LD1x2_16B && opcode != AArch64::LD1x3_16B &&
|
||||
opcode != AArch64::LD1x4_16B && opcode != AArch64::ST1x2_16B &&
|
||||
opcode != AArch64::ST1x3_16B && opcode != AArch64::ST1x4_16B;
|
||||
}
|
||||
|
|
|
@ -30,7 +30,6 @@ def dsub_0 : SubRegIndex<64>;
|
|||
def dsub_1 : SubRegIndex<64, 64>;
|
||||
def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>;
|
||||
def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>;
|
||||
def dsub_4 : ComposedSubRegIndex<qsub_2, dsub_0>;
|
||||
}
|
||||
|
||||
// Registers are identified with 5-bit ID numbers.
|
||||
|
@ -206,7 +205,7 @@ def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
|
|||
//===----------------------------------------------------------------------===//
|
||||
// Consecutive vector registers
|
||||
//===----------------------------------------------------------------------===//
|
||||
// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D30_D31
|
||||
// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D31_D0
|
||||
def Tuples2D : RegisterTuples<[dsub_0, dsub_1],
|
||||
[(rotl FPR64, 0), (rotl FPR64, 1)]>;
|
||||
|
||||
|
|
|
@ -132,3 +132,44 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*,
|
|||
declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
|
||||
|
||||
declare void @foo()
|
||||
|
||||
; FIXME: We should not generate ld/st for such register spill/fill, because the
|
||||
; test case seems very simple and the register pressure is not high. If the
|
||||
; spill/fill algorithm is optimized, this test case may not be triggered. And
|
||||
; then we can delete it.
|
||||
; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo
|
||||
define <8 x i16> @test_2xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
|
||||
tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
|
||||
tail call void @foo()
|
||||
%sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
|
||||
%1 = bitcast <2 x i64> %sv to <8 x i16>
|
||||
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
|
||||
%3 = mul <8 x i16> %2, %2
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo
|
||||
define <8 x i16> @test_3xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
|
||||
tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
|
||||
tail call void @foo()
|
||||
%sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
|
||||
%1 = bitcast <2 x i64> %sv to <8 x i16>
|
||||
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
|
||||
%3 = mul <8 x i16> %2, %2
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo
|
||||
define <8 x i16> @test_4xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
|
||||
tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
|
||||
tail call void @foo()
|
||||
%sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
|
||||
%1 = bitcast <2 x i64> %sv to <8 x i16>
|
||||
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
|
||||
%3 = mul <8 x i16> %2, %2
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
|
||||
declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
|
||||
declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
|
Loading…
Reference in New Issue