forked from OSchip/llvm-project
parent
6dd2730024
commit
f731a2df6b
|
@ -1035,6 +1035,11 @@ class NI<dag oops, dag iops, string asm, list<dag> pattern>
|
|||
: NeonI<oops, iops, AddrModeNone, IndexModeNone, asm, "", pattern> {
|
||||
}
|
||||
|
||||
class NLdSt<dag oops, dag iops, string asm, list<dag> pattern>
|
||||
: NeonI<oops, iops, AddrMode6, IndexModeNone, asm, "", pattern> {
|
||||
let Inst{31-24} = 0b11110100;
|
||||
}
|
||||
|
||||
class NDataI<dag oops, dag iops, string asm, string cstr, list<dag> pattern>
|
||||
: NeonI<oops, iops, AddrModeNone, IndexModeNone, asm, cstr, pattern> {
|
||||
let Inst{31-25} = 0b1111001;
|
||||
|
|
|
@ -111,6 +111,29 @@ def VSTRQ : NI<(outs), (ins QPR:$src, GPR:$addr),
|
|||
[(store (v2f64 QPR:$src), GPR:$addr)]>;
|
||||
|
||||
|
||||
// VLD1 : Vector Load (multiple single elements)
|
||||
class VLD1D<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
|
||||
: NLdSt<(outs DPR:$dst), (ins addrmode6:$addr),
|
||||
!strconcat(OpcodeStr, "\t${dst:dregsingle}, $addr"),
|
||||
[(set DPR:$dst, (Ty (IntOp addrmode6:$addr, 1)))]>;
|
||||
class VLD1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
|
||||
: NLdSt<(outs QPR:$dst), (ins addrmode6:$addr),
|
||||
!strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"),
|
||||
[(set QPR:$dst, (Ty (IntOp addrmode6:$addr, 1)))]>;
|
||||
|
||||
def VLD1d8 : VLD1D<"vld1.8", v8i8, int_arm_neon_vldi>;
|
||||
def VLD1d16 : VLD1D<"vld1.16", v4i16, int_arm_neon_vldi>;
|
||||
def VLD1d32 : VLD1D<"vld1.32", v2i32, int_arm_neon_vldi>;
|
||||
def VLD1df : VLD1D<"vld1.32", v2f32, int_arm_neon_vldf>;
|
||||
def VLD1d64 : VLD1D<"vld1.64", v1i64, int_arm_neon_vldi>;
|
||||
|
||||
def VLD1q8 : VLD1Q<"vld1.8", v16i8, int_arm_neon_vldi>;
|
||||
def VLD1q16 : VLD1Q<"vld1.16", v8i16, int_arm_neon_vldi>;
|
||||
def VLD1q32 : VLD1Q<"vld1.32", v4i32, int_arm_neon_vldi>;
|
||||
def VLD1qf : VLD1Q<"vld1.32", v4f32, int_arm_neon_vldf>;
|
||||
def VLD1q64 : VLD1Q<"vld1.64", v2i64, int_arm_neon_vldi>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// NEON pattern fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -298,8 +298,10 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
|
|||
unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0
|
||||
unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1
|
||||
O << '{'
|
||||
<< TRI->getAsmName(DRegLo) << "-" << TRI->getAsmName(DRegHi)
|
||||
<< TRI->getAsmName(DRegLo) << ',' << TRI->getAsmName(DRegHi)
|
||||
<< '}';
|
||||
} else if (Modifier && strcmp(Modifier, "dregsingle") == 0) {
|
||||
O << '{' << TRI->getAsmName(Reg) << '}';
|
||||
} else {
|
||||
O << TRI->getAsmName(Reg);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
|
||||
; RUN: grep {vld1\\.8} %t | count 2
|
||||
; RUN: grep {vld1\\.16} %t | count 2
|
||||
; RUN: grep {vld1\\.32} %t | count 4
|
||||
; RUN: grep {vld1\\.64} %t | count 2
|
||||
|
||||
define <8 x i8> @vld1i8(i8* %A) nounwind {
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vldi.v8i8(i8* %A, i32 1)
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @vld1i16(i16* %A) nounwind {
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vldi.v4i16(i16* %A, i32 1)
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @vld1i32(i32* %A) nounwind {
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vldi.v2i32(i32* %A, i32 1)
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x float> @vld1f(float* %A) nounwind {
|
||||
%tmp1 = call <2 x float> @llvm.arm.neon.vldf.v2f32(float* %A, i32 1)
|
||||
ret <2 x float> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @vld1i64(i64* %A) nounwind {
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vldi.v1i64(i64* %A, i32 1)
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @vld1Qi8(i8* %A) nounwind {
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vldi.v16i8(i8* %A, i32 1)
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @vld1Qi16(i16* %A) nounwind {
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vldi.v8i16(i16* %A, i32 1)
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @vld1Qi32(i32* %A) nounwind {
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vldi.v4i32(i32* %A, i32 1)
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x float> @vld1Qf(float* %A) nounwind {
|
||||
%tmp1 = call <4 x float> @llvm.arm.neon.vldf.v4f32(float* %A, i32 1)
|
||||
ret <4 x float> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i64> @vld1Qi64(i64* %A) nounwind {
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vldi.v2i64(i64* %A, i32 1)
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vldi.v8i8(i8*, i32) nounwind readnone
|
||||
declare <4 x i16> @llvm.arm.neon.vldi.v4i16(i16*, i32) nounwind readnone
|
||||
declare <2 x i32> @llvm.arm.neon.vldi.v2i32(i32*, i32) nounwind readnone
|
||||
declare <2 x float> @llvm.arm.neon.vldf.v2f32(float*, i32) nounwind readnone
|
||||
declare <1 x i64> @llvm.arm.neon.vldi.v1i64(i64*, i32) nounwind readnone
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vldi.v16i8(i8*, i32) nounwind readnone
|
||||
declare <8 x i16> @llvm.arm.neon.vldi.v8i16(i16*, i32) nounwind readnone
|
||||
declare <4 x i32> @llvm.arm.neon.vldi.v4i32(i32*, i32) nounwind readnone
|
||||
declare <4 x float> @llvm.arm.neon.vldf.v4f32(float*, i32) nounwind readnone
|
||||
declare <2 x i64> @llvm.arm.neon.vldi.v2i64(i64*, i32) nounwind readnone
|
Loading…
Reference in New Issue