[ARM] Widening loads and narrowing stores

MVE has instructions to widen as it loads, and narrow as it stores. This adds
the required patterns and legalisation to make them work including specifying
that they are legal, patterns to select them and test changes.

Patch by David Sherwood.

Differential Revision: https://reviews.llvm.org/D63839

llvm-svn: 364636
This commit is contained in:
David Green 2019-06-28 09:47:55 +00:00
parent 29ff1b4f46
commit eb7080ac6e
8 changed files with 467 additions and 1016 deletions

View File

@ -226,6 +226,13 @@ void ARMTargetLowering::setAllExpand(MVT VT) {
setOperationAction(Opc, VT, Expand);
}
void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
LegalizeAction Action) {
setLoadExtAction(ISD::EXTLOAD, From, To, Action);
setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
}
void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
@ -277,6 +284,16 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::LOAD, VT, Legal);
setOperationAction(ISD::STORE, VT, Legal);
}
// It is legal to extload from v4i8 to v4i16 or v4i32.
addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
// Some truncating stores are legal too.
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
@ -587,9 +604,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
for (MVT VT : MVT::vector_valuetypes()) {
for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
addAllExtLoads(VT, InnerVT, Expand);
}
setOperationAction(ISD::MULHS, VT, Expand);
@ -13197,7 +13212,9 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
return false;
if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 &&
Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 &&
Ty != MVT::v2f64)
Ty != MVT::v2f64 &&
// These are for truncated stores
Ty != MVT::v4i8 && Ty != MVT::v8i8 && Ty != MVT::v4i16)
return false;
if (Subtarget->isLittle()) {

View File

@ -814,6 +814,7 @@ class VectorType;
MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI,
MachineBasicBlock *MBB) const;
void addMVEVectorTypes(bool HasMVEFP);
void addAllExtLoads(const MVT From, const MVT To, LegalizeAction Action);
void setAllExpand(MVT VT);
};

View File

@ -4201,6 +4201,42 @@ let Predicates = [HasMVEInt, IsBE] in {
def : MVE_unpred_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>;
}
// Widening/Narrowing Loads/Stores
let Predicates = [HasMVEInt] in {
def : Pat<(truncstorevi8 (v8i16 MQPR:$val), t2addrmode_imm7<1>:$addr),
(MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
def : Pat<(truncstorevi8 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr),
(MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
def : Pat<(truncstorevi16 (v4i32 MQPR:$val), t2addrmode_imm7<2>:$addr),
(MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<2>:$addr)>;
}
multiclass MVEExtLoad<string DestLanes, string DestElemBits,
string SrcElemBits, string SrcElemType,
Operand am> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
(!cast<PatFrag>("extloadvi" # SrcElemBits) am:$addr)),
(!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
am:$addr)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
(!cast<PatFrag>("zextloadvi" # SrcElemBits) am:$addr)),
(!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
am:$addr)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
(!cast<PatFrag>("sextloadvi" # SrcElemBits) am:$addr)),
(!cast<Instruction>("MVE_VLDR" # SrcElemType # "S" # DestElemBits)
am:$addr)>;
}
let Predicates = [HasMVEInt] in {
defm : MVEExtLoad<"4", "32", "8", "B", t2addrmode_imm7<1>>;
defm : MVEExtLoad<"8", "16", "8", "B", t2addrmode_imm7<1>>;
defm : MVEExtLoad<"4", "32", "16", "H", t2addrmode_imm7<2>>;
}
// Bit convert patterns
let Predicates = [HasMVEInt] in {

View File

@ -115,14 +115,7 @@ entry:
define i8* @post_ldrhu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh r2, [r0, #6]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh r2, [r0, #8]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh r2, [r0, #10]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vldrh.u32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -138,14 +131,8 @@ entry:
define i8* @post_ldrhu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh.w r2, [r0, #3]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh.w r2, [r0, #5]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh.w r2, [r0, #7]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh.w r2, [r0, #9]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrh.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -161,14 +148,8 @@ entry:
define i8* @post_ldrhu32_2(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh r2, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh r2, [r0, #8]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: adds r2, r0, #2
; CHECK-NEXT: vldrh.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -184,14 +165,8 @@ entry:
define i8* @post_ldrhu32_254(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh.w r2, [r0, #254]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh.w r2, [r0, #256]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh.w r2, [r0, #258]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh.w r2, [r0, #260]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: add.w r2, r0, #254
; CHECK-NEXT: vldrh.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -207,14 +182,8 @@ entry:
define i8* @post_ldrhu32_256(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh.w r2, [r0, #256]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh.w r2, [r0, #258]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh.w r2, [r0, #260]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh.w r2, [r0, #262]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: add.w r2, r0, #256
; CHECK-NEXT: vldrh.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -231,18 +200,9 @@ entry:
define i8* @post_ldrhs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrsh.w r2, [r0, #4]
; CHECK-NEXT: ldrsh.w r3, [r0, #6]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #8]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrsh.w lr, [r0, #10]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: vldrh.s32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <4 x i16>*
@ -256,18 +216,10 @@ entry:
define i8* @post_ldrhs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrsh.w r2, [r0, #3]
; CHECK-NEXT: ldrsh.w r3, [r0, #5]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #7]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrsh.w lr, [r0, #9]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrh.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i16>*
@ -281,18 +233,10 @@ entry:
define i8* @post_ldrhs32_2(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrsh.w r2, [r0, #2]
; CHECK-NEXT: ldrsh.w r3, [r0, #4]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #6]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrsh.w lr, [r0, #8]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: adds r2, r0, #2
; CHECK-NEXT: vldrh.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 2
%0 = bitcast i8* %z to <4 x i16>*
@ -306,18 +250,10 @@ entry:
define i8* @post_ldrhs32_254(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrsh.w r2, [r0, #254]
; CHECK-NEXT: ldrsh.w r3, [r0, #256]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #258]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrsh.w lr, [r0, #260]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: add.w r2, r0, #254
; CHECK-NEXT: vldrh.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 254
%0 = bitcast i8* %z to <4 x i16>*
@ -331,18 +267,10 @@ entry:
define i8* @post_ldrhs32_256(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrsh.w r2, [r0, #256]
; CHECK-NEXT: ldrsh.w r3, [r0, #258]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #260]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrsh.w lr, [r0, #262]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: add.w r2, r0, #256
; CHECK-NEXT: vldrh.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 256
%0 = bitcast i8* %z to <4 x i16>*
@ -437,18 +365,9 @@ entry:
define i8* @post_ldrbu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0, #4]
; CHECK-NEXT: ldrb r3, [r0, #5]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #6]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #7]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: vldrb.u32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <4 x i8>*
@ -462,18 +381,10 @@ entry:
define i8* @post_ldrbu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0, #3]
; CHECK-NEXT: ldrb r3, [r0, #4]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #5]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrb.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i8>*
@ -487,18 +398,10 @@ entry:
define i8* @post_ldrbu32_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb.w r2, [r0, #127]
; CHECK-NEXT: ldrb.w r3, [r0, #128]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #129]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #130]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: add.w r2, r0, #127
; CHECK-NEXT: vldrb.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 127
%0 = bitcast i8* %z to <4 x i8>*
@ -512,18 +415,10 @@ entry:
define i8* @post_ldrbu32_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb.w r2, [r0, #128]
; CHECK-NEXT: ldrb.w r3, [r0, #129]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #130]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #131]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: add.w r2, r0, #128
; CHECK-NEXT: vldrb.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 128
%0 = bitcast i8* %z to <4 x i8>*
@ -538,14 +433,7 @@ entry:
define i8* @post_ldrbs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #5]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #7]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vldrb.s32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -561,14 +449,8 @@ entry:
define i8* @post_ldrbs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #5]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #6]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrb.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -584,14 +466,8 @@ entry:
define i8* @post_ldrbs32_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0, #127]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #128]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #129]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #130]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: add.w r2, r0, #127
; CHECK-NEXT: vldrb.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -607,14 +483,8 @@ entry:
define i8* @post_ldrbs32_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0, #128]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #129]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #130]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #131]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: add.w r2, r0, #128
; CHECK-NEXT: vldrb.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -631,26 +501,9 @@ entry:
define i8* @post_ldrbu16_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0, #4]
; CHECK-NEXT: ldrb r3, [r0, #5]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #6]
; CHECK-NEXT: vmov.16 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #7]
; CHECK-NEXT: vmov.16 q0[2], r12
; CHECK-NEXT: ldrb r2, [r0, #8]
; CHECK-NEXT: vmov.16 q0[3], lr
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrb r2, [r0, #9]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrb r2, [r0, #10]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrb r2, [r0, #11]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: vldrb.u16 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <8 x i8>*
@ -664,26 +517,10 @@ entry:
define i8* @post_ldrbu16_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0, #3]
; CHECK-NEXT: ldrb r3, [r0, #4]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #5]
; CHECK-NEXT: vmov.16 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #6]
; CHECK-NEXT: vmov.16 q0[2], r12
; CHECK-NEXT: ldrb r2, [r0, #7]
; CHECK-NEXT: vmov.16 q0[3], lr
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrb r2, [r0, #8]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrb r2, [r0, #9]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrb r2, [r0, #10]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrb.u16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x i8>*
@ -697,26 +534,10 @@ entry:
define i8* @post_ldrbu16_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb.w r2, [r0, #127]
; CHECK-NEXT: ldrb.w r3, [r0, #128]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #129]
; CHECK-NEXT: vmov.16 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #130]
; CHECK-NEXT: vmov.16 q0[2], r12
; CHECK-NEXT: ldrb.w r2, [r0, #131]
; CHECK-NEXT: vmov.16 q0[3], lr
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrb.w r2, [r0, #132]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrb.w r2, [r0, #133]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrb.w r2, [r0, #134]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: add.w r2, r0, #127
; CHECK-NEXT: vldrb.u16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 127
%0 = bitcast i8* %z to <8 x i8>*
@ -730,26 +551,10 @@ entry:
define i8* @post_ldrbu16_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb.w r2, [r0, #128]
; CHECK-NEXT: ldrb.w r3, [r0, #129]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #130]
; CHECK-NEXT: vmov.16 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #131]
; CHECK-NEXT: vmov.16 q0[2], r12
; CHECK-NEXT: ldrb.w r2, [r0, #132]
; CHECK-NEXT: vmov.16 q0[3], lr
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrb.w r2, [r0, #133]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrb.w r2, [r0, #134]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrb.w r2, [r0, #135]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: add.w r2, r0, #128
; CHECK-NEXT: vldrb.u16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 128
%0 = bitcast i8* %z to <8 x i8>*
@ -764,22 +569,7 @@ entry:
define i8* @post_ldrbs16_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0, #4]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #7]
; CHECK-NEXT: vmov.16 q0[3], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #8]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #9]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #10]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #11]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: vldrb.s16 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -795,22 +585,8 @@ entry:
define i8* @post_ldrbs16_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #4]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[3], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #7]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #8]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #9]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #10]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrb.s16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -826,22 +602,8 @@ entry:
define i8* @post_ldrbs16_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0, #127]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #128]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #129]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #130]
; CHECK-NEXT: vmov.16 q0[3], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #131]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #132]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #133]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #134]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: add.w r2, r0, #127
; CHECK-NEXT: vldrb.s16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -857,22 +619,8 @@ entry:
define i8* @post_ldrbs16_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0, #128]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #129]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #130]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #131]
; CHECK-NEXT: vmov.16 q0[3], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #132]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #133]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #134]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #135]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: add.w r2, r0, #128
; CHECK-NEXT: vldrb.s16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -1096,8 +844,8 @@ entry:
define i8* @post_strh32_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0, #4]
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0, #4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1111,9 +859,9 @@ entry:
define i8* @post_strh32_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str.w r1, [r0, #3]
; CHECK-NEXT: str.w r2, [r0, #7]
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: adds r1, r0, #3
; CHECK-NEXT: vstrh.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1127,9 +875,9 @@ entry:
define i8* @post_strh32_2(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str.w r1, [r0, #2]
; CHECK-NEXT: str.w r2, [r0, #6]
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: adds r1, r0, #2
; CHECK-NEXT: vstrh.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
@ -1143,9 +891,9 @@ entry:
define i8* @post_strh32_254(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str.w r1, [r0, #254]
; CHECK-NEXT: str.w r2, [r0, #258]
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #254
; CHECK-NEXT: vstrh.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 254
@ -1159,8 +907,9 @@ entry:
define i8* @post_strh32_256(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0, #256]
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #256
; CHECK-NEXT: vstrh.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 256
@ -1255,8 +1004,8 @@ entry:
define i8* @post_strb32_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str r1, [r0, #4]
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0, #4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1270,8 +1019,9 @@ entry:
define i8* @post_strb32_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str.w r1, [r0, #3]
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: adds r1, r0, #3
; CHECK-NEXT: vstrb.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1285,8 +1035,9 @@ entry:
define i8* @post_strb32_127(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str.w r1, [r0, #127]
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #127
; CHECK-NEXT: vstrb.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
@ -1300,8 +1051,9 @@ entry:
define i8* @post_strb32_128(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str.w r1, [r0, #128]
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #128
; CHECK-NEXT: vstrb.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 128
@ -1316,8 +1068,8 @@ entry:
define i8* @post_strb16_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0, #4]
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0, #4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1331,9 +1083,9 @@ entry:
define i8* @post_strb16_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str.w r1, [r0, #3]
; CHECK-NEXT: str.w r2, [r0, #7]
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: adds r1, r0, #3
; CHECK-NEXT: vstrb.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1347,9 +1099,9 @@ entry:
define i8* @post_strb16_127(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str.w r1, [r0, #127]
; CHECK-NEXT: str.w r2, [r0, #131]
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #127
; CHECK-NEXT: vstrb.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
@ -1363,8 +1115,9 @@ entry:
define i8* @post_strb16_128(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0, #128]
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #128
; CHECK-NEXT: vstrb.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 128

View File

@ -117,16 +117,8 @@ entry:
define i8* @post_ldrhu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: ldrh r3, [r2, #4]!
; CHECK-NEXT: ldrh r0, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -142,15 +134,8 @@ entry:
define i8* @post_ldrhu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh r2, [r0, #6]
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -166,16 +151,8 @@ entry:
define i8* @post_ldrhu32_2(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: ldrh r3, [r2, #2]!
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrh r3, [r0, #4]
; CHECK-NEXT: ldrh r0, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -191,15 +168,8 @@ entry:
define i8* @post_ldrhu32_254(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh r2, [r0, #6]
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -215,15 +185,8 @@ entry:
define i8* @post_ldrhu32_256(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh r2, [r0, #6]
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: add.w r0, r0, #256
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -240,14 +203,8 @@ entry:
define i8* @post_ldrhs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsh.w r2, [r0]
; CHECK-NEXT: ldrsh.w r3, [r0, #2]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #6]
; CHECK-NEXT: ldrsh r2, [r0, #4]!
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -263,19 +220,10 @@ entry:
define i8* @post_ldrhs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrsh.w r2, [r0]
; CHECK-NEXT: ldrsh.w r3, [r0, #2]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #4]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrsh.w lr, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <4 x i16>*
@ -289,14 +237,8 @@ entry:
define i8* @post_ldrhs32_2(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsh.w r2, [r0]
; CHECK-NEXT: ldrsh.w r3, [r0, #4]
; CHECK-NEXT: ldrsh.w r12, [r0, #6]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh r2, [r0, #2]!
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -312,19 +254,10 @@ entry:
define i8* @post_ldrhs32_254(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrsh.w r2, [r0]
; CHECK-NEXT: ldrsh.w r3, [r0, #2]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #4]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrsh.w lr, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 254
%0 = bitcast i8* %x to <4 x i16>*
@ -338,19 +271,10 @@ entry:
define i8* @post_ldrhs32_256(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrsh.w r2, [r0]
; CHECK-NEXT: ldrsh.w r3, [r0, #2]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #4]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrsh.w lr, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: add.w r0, r0, #256
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 256
%0 = bitcast i8* %x to <4 x i16>*
@ -446,19 +370,10 @@ entry:
define i8* @post_ldrbu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #3]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %x to <4 x i8>*
@ -472,14 +387,8 @@ entry:
define i8* @post_ldrbu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrb r2, [r0, #3]!
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -495,19 +404,10 @@ entry:
define i8* @post_ldrbu32_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #3]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 127
%0 = bitcast i8* %x to <4 x i8>*
@ -521,19 +421,10 @@ entry:
define i8* @post_ldrbu32_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #3]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 128
%0 = bitcast i8* %x to <4 x i8>*
@ -548,15 +439,8 @@ entry:
define i8* @post_ldrbs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -572,14 +456,8 @@ entry:
define i8* @post_ldrbs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb r2, [r0, #3]!
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -595,15 +473,8 @@ entry:
define i8* @post_ldrbs32_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -619,15 +490,8 @@ entry:
define i8* @post_ldrbs32_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -644,28 +508,10 @@ entry:
define i8* @post_ldrbu16_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: vmov.16 q0[1], r3
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: ldrb.w lr, [r0, #3]
; CHECK-NEXT: vmov.16 q0[2], r12
; CHECK-NEXT: ldrb r3, [r2, #4]!
; CHECK-NEXT: vmov.16 q0[3], lr
; CHECK-NEXT: vmov.16 q0[4], r3
; CHECK-NEXT: ldrb r3, [r0, #5]
; CHECK-NEXT: vmov.16 q0[5], r3
; CHECK-NEXT: ldrb r3, [r0, #6]
; CHECK-NEXT: ldrb r0, [r0, #7]
; CHECK-NEXT: vmov.16 q0[6], r3
; CHECK-NEXT: vmov.16 q0[7], r0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %x to <8 x i8>*
@ -679,28 +525,10 @@ entry:
define i8* @post_ldrbu16_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: vmov.16 q0[1], r3
; CHECK-NEXT: ldrb r3, [r2, #3]!
; CHECK-NEXT: vmov.16 q0[2], r12
; CHECK-NEXT: ldrb.w lr, [r0, #4]
; CHECK-NEXT: vmov.16 q0[3], r3
; CHECK-NEXT: ldrb r3, [r0, #5]
; CHECK-NEXT: vmov.16 q0[4], lr
; CHECK-NEXT: vmov.16 q0[5], r3
; CHECK-NEXT: ldrb r3, [r0, #6]
; CHECK-NEXT: ldrb r0, [r0, #7]
; CHECK-NEXT: vmov.16 q0[6], r3
; CHECK-NEXT: vmov.16 q0[7], r0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <8 x i8>*
@ -714,27 +542,10 @@ entry:
define i8* @post_ldrbu16_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: vmov.16 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #3]
; CHECK-NEXT: vmov.16 q0[2], r12
; CHECK-NEXT: ldrb r2, [r0, #4]
; CHECK-NEXT: vmov.16 q0[3], lr
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrb r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrb r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrb r2, [r0, #7]
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 127
%0 = bitcast i8* %x to <8 x i8>*
@ -748,27 +559,10 @@ entry:
define i8* @post_ldrbu16_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: vmov.16 q0[1], r3
; CHECK-NEXT: ldrb.w lr, [r0, #3]
; CHECK-NEXT: vmov.16 q0[2], r12
; CHECK-NEXT: ldrb r2, [r0, #4]
; CHECK-NEXT: vmov.16 q0[3], lr
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrb r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrb r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrb r2, [r0, #7]
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 128
%0 = bitcast i8* %x to <8 x i8>*
@ -783,24 +577,8 @@ entry:
define i8* @post_ldrbs16_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.16 q0[3], r2
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: ldrsb r3, [r2, #4]!
; CHECK-NEXT: vmov.16 q0[4], r3
; CHECK-NEXT: ldrsb.w r3, [r0, #5]
; CHECK-NEXT: vmov.16 q0[5], r3
; CHECK-NEXT: ldrsb.w r3, [r0, #6]
; CHECK-NEXT: ldrsb.w r0, [r0, #7]
; CHECK-NEXT: vmov.16 q0[6], r3
; CHECK-NEXT: vmov.16 q0[7], r0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -816,24 +594,8 @@ entry:
define i8* @post_ldrbs16_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: ldrsb r3, [r2, #3]!
; CHECK-NEXT: vmov.16 q0[3], r3
; CHECK-NEXT: ldrsb.w r3, [r0, #4]
; CHECK-NEXT: vmov.16 q0[4], r3
; CHECK-NEXT: ldrsb.w r3, [r0, #5]
; CHECK-NEXT: vmov.16 q0[5], r3
; CHECK-NEXT: ldrsb.w r3, [r0, #6]
; CHECK-NEXT: ldrsb.w r0, [r0, #7]
; CHECK-NEXT: vmov.16 q0[6], r3
; CHECK-NEXT: vmov.16 q0[7], r0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -849,23 +611,8 @@ entry:
define i8* @post_ldrbs16_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.16 q0[3], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #4]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #7]
; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -881,23 +628,8 @@ entry:
define i8* @post_ldrbs16_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb.w r2, [r0]
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.16 q0[3], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #4]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #7]
; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -1127,9 +859,9 @@ entry:
define i8* @post_strh32_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: str r2, [r0, #4]!
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1143,8 +875,8 @@ entry:
define i8* @post_strh32_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0]
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: bx lr
entry:
@ -1159,8 +891,8 @@ entry:
define i8* @post_strh32_2(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0]
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: bx lr
entry:
@ -1175,8 +907,8 @@ entry:
define i8* @post_strh32_254(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0]
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: bx lr
entry:
@ -1191,8 +923,8 @@ entry:
define i8* @post_strh32_256(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0]
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: add.w r0, r0, #256
; CHECK-NEXT: bx lr
entry:
@ -1289,8 +1021,9 @@ entry:
define i8* @post_strb32_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str r1, [r0], #4
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1304,8 +1037,9 @@ entry:
define i8* @post_strb32_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str r1, [r0], #3
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1319,8 +1053,9 @@ entry:
define i8* @post_strb32_127(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str r1, [r0], #127
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
@ -1334,8 +1069,9 @@ entry:
define i8* @post_strb32_128(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str r1, [r0], #128
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 128
@ -1350,9 +1086,9 @@ entry:
define i8* @post_strb16_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: str r2, [r0, #4]!
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1366,8 +1102,8 @@ entry:
define i8* @post_strb16_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0]
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: bx lr
entry:
@ -1382,8 +1118,8 @@ entry:
define i8* @post_strb16_127(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0]
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: bx lr
entry:
@ -1398,8 +1134,8 @@ entry:
define i8* @post_strb16_128(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0]
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: bx lr
entry:

View File

@ -117,14 +117,8 @@ entry:
define i8* @post_ldrhu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0, #4]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh r2, [r0, #6]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vldrh.u32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -140,14 +134,8 @@ entry:
define i8* @post_ldrhu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0, #3]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh r2, [r0, #6]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -163,14 +151,8 @@ entry:
define i8* @post_ldrhu32_2(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0, #2]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh r2, [r0, #6]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -186,14 +168,8 @@ entry:
define i8* @post_ldrhu32_254(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0, #254]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh r2, [r0, #6]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -209,15 +185,8 @@ entry:
define i8* @post_ldrhu32_256(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh.w r2, [r0, #256]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrh.w r2, [r0, #258]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrh.w r2, [r0, #260]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrh.w r2, [r0, #262]
; CHECK-NEXT: add.w r0, r0, #256
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -234,14 +203,8 @@ entry:
define i8* @post_ldrhs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsh r2, [r0, #4]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r2, [r0, #2]
; CHECK-NEXT: ldrsh.w r3, [r0, #4]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vldrh.s32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -257,14 +220,8 @@ entry:
define i8* @post_ldrhs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsh r2, [r0, #3]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r2, [r0, #2]
; CHECK-NEXT: ldrsh.w r3, [r0, #4]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -280,14 +237,8 @@ entry:
define i8* @post_ldrhs32_2(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsh r2, [r0, #2]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r2, [r0, #2]
; CHECK-NEXT: ldrsh.w r3, [r0, #4]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -303,14 +254,8 @@ entry:
define i8* @post_ldrhs32_254(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsh r2, [r0, #254]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r2, [r0, #2]
; CHECK-NEXT: ldrsh.w r3, [r0, #4]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #6]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -326,19 +271,10 @@ entry:
define i8* @post_ldrhs32_256(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrsh.w r2, [r0, #256]
; CHECK-NEXT: ldrsh.w r3, [r0, #258]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsh.w r12, [r0, #260]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrsh.w lr, [r0, #262]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: add.w r0, r0, #256
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 256
%0 = bitcast i8* %z to <4 x i16>*
@ -434,14 +370,8 @@ entry:
define i8* @post_ldrbu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrb r2, [r0, #4]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb r2, [r0, #1]
; CHECK-NEXT: ldrb r3, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrb.w r12, [r0, #3]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vldrb.u32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -457,14 +387,8 @@ entry:
define i8* @post_ldrbu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrb r2, [r0, #3]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb r2, [r0, #1]
; CHECK-NEXT: ldrb r3, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrb.w r12, [r0, #3]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -480,14 +404,8 @@ entry:
define i8* @post_ldrbu32_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrb r2, [r0, #127]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb r2, [r0, #1]
; CHECK-NEXT: ldrb r3, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrb.w r12, [r0, #3]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -503,14 +421,8 @@ entry:
define i8* @post_ldrbu32_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrb r2, [r0, #128]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb r2, [r0, #1]
; CHECK-NEXT: ldrb r3, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrb.w r12, [r0, #3]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -527,14 +439,8 @@ entry:
define i8* @post_ldrbs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb r2, [r0, #4]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vldrb.s32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -550,14 +456,8 @@ entry:
define i8* @post_ldrbs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb r2, [r0, #3]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -573,14 +473,8 @@ entry:
define i8* @post_ldrbs32_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb r2, [r0, #127]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -596,14 +490,8 @@ entry:
define i8* @post_ldrbs32_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb r2, [r0, #128]!
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -620,26 +508,10 @@ entry:
define i8* @post_ldrbu16_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0, #4]!
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb r2, [r0, #1]
; CHECK-NEXT: ldrb r3, [r0, #2]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrb.w r12, [r0, #3]
; CHECK-NEXT: vmov.16 q0[2], r3
; CHECK-NEXT: ldrb.w lr, [r0, #4]
; CHECK-NEXT: vmov.16 q0[3], r12
; CHECK-NEXT: ldrb r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[4], lr
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrb r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrb r2, [r0, #7]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: vldrb.u16 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <8 x i8>*
@ -653,26 +525,10 @@ entry:
define i8* @post_ldrbu16_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0, #3]!
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb r2, [r0, #1]
; CHECK-NEXT: ldrb r3, [r0, #2]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrb.w r12, [r0, #3]
; CHECK-NEXT: vmov.16 q0[2], r3
; CHECK-NEXT: ldrb.w lr, [r0, #4]
; CHECK-NEXT: vmov.16 q0[3], r12
; CHECK-NEXT: ldrb r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[4], lr
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrb r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrb r2, [r0, #7]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x i8>*
@ -686,26 +542,10 @@ entry:
define i8* @post_ldrbu16_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0, #127]!
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb r2, [r0, #1]
; CHECK-NEXT: ldrb r3, [r0, #2]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrb.w r12, [r0, #3]
; CHECK-NEXT: vmov.16 q0[2], r3
; CHECK-NEXT: ldrb.w lr, [r0, #4]
; CHECK-NEXT: vmov.16 q0[3], r12
; CHECK-NEXT: ldrb r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[4], lr
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrb r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrb r2, [r0, #7]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 127
%0 = bitcast i8* %z to <8 x i8>*
@ -719,26 +559,10 @@ entry:
define i8* @post_ldrbu16_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrb r2, [r0, #128]!
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrb r2, [r0, #1]
; CHECK-NEXT: ldrb r3, [r0, #2]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrb.w r12, [r0, #3]
; CHECK-NEXT: vmov.16 q0[2], r3
; CHECK-NEXT: ldrb.w lr, [r0, #4]
; CHECK-NEXT: vmov.16 q0[3], r12
; CHECK-NEXT: ldrb r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[4], lr
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrb r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrb r2, [r0, #7]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 128
%0 = bitcast i8* %z to <8 x i8>*
@ -753,22 +577,8 @@ entry:
define i8* @post_ldrbs16_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb r2, [r0, #4]!
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.16 q0[3], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #4]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #7]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: vldrb.s16 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -784,22 +594,8 @@ entry:
define i8* @post_ldrbs16_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb r2, [r0, #3]!
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.16 q0[3], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #4]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #7]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -815,22 +611,8 @@ entry:
define i8* @post_ldrbs16_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb r2, [r0, #127]!
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.16 q0[3], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #4]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #7]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -846,22 +628,8 @@ entry:
define i8* @post_ldrbs16_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrsb r2, [r0, #128]!
; CHECK-NEXT: vmov.16 q0[0], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #1]
; CHECK-NEXT: vmov.16 q0[1], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #2]
; CHECK-NEXT: vmov.16 q0[2], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #3]
; CHECK-NEXT: vmov.16 q0[3], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #4]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #5]
; CHECK-NEXT: vmov.16 q0[5], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #6]
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: ldrsb.w r2, [r0, #7]
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -1091,9 +859,9 @@ entry:
define i8* @post_strh32_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str r1, [r0, #4]!
; CHECK-NEXT: str r2, [r0, #4]
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1107,9 +875,9 @@ entry:
define i8* @post_strh32_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str r1, [r0, #3]!
; CHECK-NEXT: str r2, [r0, #4]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1123,9 +891,9 @@ entry:
define i8* @post_strh32_2(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str r1, [r0, #2]!
; CHECK-NEXT: str r2, [r0, #4]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
@ -1139,9 +907,9 @@ entry:
define i8* @post_strh32_254(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str r1, [r0, #254]!
; CHECK-NEXT: str r2, [r0, #4]
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 254
@ -1155,9 +923,9 @@ entry:
define i8* @post_strh32_256(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: strd r1, r2, [r0, #256]
; CHECK-NEXT: add.w r0, r0, #256
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 256
@ -1253,8 +1021,9 @@ entry:
define i8* @post_strb32_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str r1, [r0, #4]!
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1268,8 +1037,9 @@ entry:
define i8* @post_strb32_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str r1, [r0, #3]!
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1283,8 +1053,9 @@ entry:
define i8* @post_strb32_127(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str r1, [r0, #127]!
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
@ -1298,8 +1069,9 @@ entry:
define i8* @post_strb32_128(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: str r1, [r0, #128]!
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 128
@ -1314,9 +1086,9 @@ entry:
define i8* @post_strb16_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str r1, [r0, #4]!
; CHECK-NEXT: str r2, [r0, #4]
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1330,9 +1102,9 @@ entry:
define i8* @post_strb16_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str r1, [r0, #3]!
; CHECK-NEXT: str r2, [r0, #4]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1346,9 +1118,9 @@ entry:
define i8* @post_strb16_127(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str r1, [r0, #127]!
; CHECK-NEXT: str r2, [r0, #4]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
@ -1362,9 +1134,9 @@ entry:
define i8* @post_strb16_128(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r2, [r1]
; CHECK-NEXT: str r1, [r0, #128]!
; CHECK-NEXT: str r2, [r0, #4]
; CHECK-NEXT: adds r0, #128
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 128

View File

@ -494,15 +494,24 @@ define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: movs r0, #7
; CHECK-NEXT: movs r1, #1
; CHECK-NEXT: strh.w r0, [sp, #2]
; CHECK-NEXT: adr r1, .LCPI30_0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: strh.w r0, [sp]
; CHECK-NEXT: movt r1, #9
; CHECK-NEXT: ldr r0, [sp]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: mov r2, sp
; CHECK-NEXT: vmov.f32 s1, s5
; CHECK-NEXT: vmov.f32 s2, s6
; CHECK-NEXT: vmov.f32 s3, s7
; CHECK-NEXT: vstrh.32 q0, [r2]
; CHECK-NEXT: ldrd r0, r1, [sp], #8
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI30_0:
; CHECK-NEXT: .zero 4
; CHECK-NEXT: .long 7 @ 0x7
; CHECK-NEXT: .long 1 @ 0x1
; CHECK-NEXT: .long 9 @ 0x9
entry:
%f = shufflevector <8 x i16> %v, <8 x i16> <i16 undef, i16 7, i16 1, i16 9, i16 undef, i16 undef, i16 undef, i16 undef>, <4 x i32> <i32 0, i32 9, i32 10, i32 11>
%0 = bitcast <4 x i16> %f to i64

View File

@ -0,0 +1,127 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s
define void @foo_int8_int32(<4 x i8>* %dest, <4 x i32>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int8_int32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%wide.load = load <4 x i32>, <4 x i32>* %src, align 4
%0 = trunc <4 x i32> %wide.load to <4 x i8>
store <4 x i8> %0, <4 x i8>* %dest, align 1
ret void
}
define void @foo_int16_int32(<4 x i16>* %dest, <4 x i32>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int16_int32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%wide.load = load <4 x i32>, <4 x i32>* %src, align 4
%0 = trunc <4 x i32> %wide.load to <4 x i16>
store <4 x i16> %0, <4 x i16>* %dest, align 2
ret void
}
define void @foo_int8_int16(<8 x i8>* %dest, <8 x i16>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int8_int16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%wide.load = load <8 x i16>, <8 x i16>* %src, align 2
%0 = trunc <8 x i16> %wide.load to <8 x i8>
store <8 x i8> %0, <8 x i8>* %dest, align 1
ret void
}
define void @foo_int32_int8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int32_int8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%wide.load = load <4 x i8>, <4 x i8>* %src, align 1
%0 = sext <4 x i8> %wide.load to <4 x i32>
store <4 x i32> %0, <4 x i32>* %dest, align 4
ret void
}
define void @foo_int16_int8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int16_int8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%wide.load = load <8 x i8>, <8 x i8>* %src, align 1
%0 = sext <8 x i8> %wide.load to <8 x i16>
store <8 x i16> %0, <8 x i16>* %dest, align 2
ret void
}
define void @foo_int32_int16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int32_int16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.s32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%wide.load = load <4 x i16>, <4 x i16>* %src, align 2
%0 = sext <4 x i16> %wide.load to <4 x i32>
store <4 x i32> %0, <4 x i32>* %dest, align 4
ret void
}
define void @foo_uint32_uint8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_uint32_uint8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%wide.load = load <4 x i8>, <4 x i8>* %src, align 1
%0 = zext <4 x i8> %wide.load to <4 x i32>
store <4 x i32> %0, <4 x i32>* %dest, align 4
ret void
}
define void @foo_uint16_uint8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_uint16_uint8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%wide.load = load <8 x i8>, <8 x i8>* %src, align 1
%0 = zext <8 x i8> %wide.load to <8 x i16>
store <8 x i16> %0, <8 x i16>* %dest, align 2
ret void
}
define void @foo_uint32_uint16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_uint32_uint16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%wide.load = load <4 x i16>, <4 x i16>* %src, align 2
%0 = zext <4 x i16> %wide.load to <4 x i32>
store <4 x i32> %0, <4 x i32>* %dest, align 4
ret void
}