forked from OSchip/llvm-project
[ARM] Fix loads and stores for predicate vectors
These predicate vectors can usually be loaded and stored with a single instruction, a VSTR_P0. However this instruction will store the entire P0 predicate, 16 bits, zeroextended to 32bits. Each lane of the the v4i1/v8i1/v16i1 representing 4/2/1 bits. As far as I understand, when llvm says "store this v4i1", it really does need to store 4 bits (or 8, that being the size of a byte, with this bottom 4 as the interesting bits). For example a bitcast from a v8i1 to a i8 is defined as a store followed by a load, which is how the code is expanded. So this instead lowers the v4i1/v8i1 load/store through some shuffles to get the bits into the correct positions. This, as you might imagine, is not as efficient as a single instruction. But I believe it is needed for correctness. v16i1 equally should not load/store 32bits, only storing the 16bits of data. Stack loads/stores are still using the VSTR_P0 (as can be seen by the test not changing). This is fine as they are self-consistent, it is only "externally observable loads/stores" (from our point of view) that need to be corrected. Differential revision: https://reviews.llvm.org/D67085 llvm-svn: 371419
This commit is contained in:
parent
63e6d8db1c
commit
2b7089949e
|
@ -378,6 +378,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
|
|||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
|
||||
setOperationAction(ISD::LOAD, VT, Custom);
|
||||
setOperationAction(ISD::STORE, VT, Custom);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8783,6 +8785,65 @@ void ARMTargetLowering::ExpandDIV_Windows(
|
|||
Results.push_back(Upper);
|
||||
}
|
||||
|
||||
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
|
||||
EVT MemVT = LD->getMemoryVT();
|
||||
assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
|
||||
"Expected a predicate type!");
|
||||
assert(MemVT == Op.getValueType());
|
||||
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
|
||||
"Expected a non-extending load");
|
||||
assert(LD->isUnindexed() && "Expected a unindexed load");
|
||||
|
||||
// The basic MVE VLDR on a v4i1/v8i1 actually loads the entire 16bit
|
||||
// predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
|
||||
// need to make sure that 8/4 bits are actually loaded into the correct
|
||||
// place, which means loading the value and then shuffling the values into
|
||||
// the bottom bits of the predicate.
|
||||
// Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
|
||||
// for BE).
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue Load = DAG.getExtLoad(
|
||||
ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
|
||||
EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
|
||||
LD->getMemOperand());
|
||||
SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Load);
|
||||
if (MemVT != MVT::v16i1)
|
||||
Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
|
||||
DAG.getConstant(0, dl, MVT::i32));
|
||||
return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
|
||||
}
|
||||
|
||||
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
|
||||
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
|
||||
EVT MemVT = ST->getMemoryVT();
|
||||
assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
|
||||
"Expected a predicate type!");
|
||||
assert(MemVT == ST->getValue().getValueType());
|
||||
assert(!ST->isTruncatingStore() && "Expected a non-extending store");
|
||||
assert(ST->isUnindexed() && "Expected a unindexed store");
|
||||
|
||||
// Only store the v4i1 or v8i1 worth of bits, via a buildvector with top bits
|
||||
// unset and a scalar store.
|
||||
SDLoc dl(Op);
|
||||
SDValue Build = ST->getValue();
|
||||
if (MemVT != MVT::v16i1) {
|
||||
SmallVector<SDValue, 16> Ops;
|
||||
for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++)
|
||||
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
|
||||
DAG.getConstant(I, dl, MVT::i32)));
|
||||
for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
|
||||
Ops.push_back(DAG.getUNDEF(MVT::i32));
|
||||
Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
|
||||
}
|
||||
SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
|
||||
return DAG.getTruncStore(
|
||||
ST->getChain(), dl, GRP, ST->getBasePtr(),
|
||||
EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
|
||||
ST->getMemOperand());
|
||||
}
|
||||
|
||||
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
|
||||
if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
|
||||
// Acquire/Release load/store is not legal for targets without a dmb or
|
||||
|
@ -8982,6 +9043,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::UADDO:
|
||||
case ISD::USUBO:
|
||||
return LowerUnsignedALUO(Op, DAG);
|
||||
case ISD::LOAD:
|
||||
return LowerPredicateLoad(Op, DAG);
|
||||
case ISD::STORE:
|
||||
return LowerPredicateStore(Op, DAG);
|
||||
case ISD::ATOMIC_LOAD:
|
||||
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
|
||||
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
|
||||
|
|
|
@ -4999,24 +4999,6 @@ let Predicates = [HasMVEInt, IsBE] in {
|
|||
def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
// Predicate loads
|
||||
def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
|
||||
// Predicate stores
|
||||
def : Pat<(store (v4i1 VCCR:$val), t2addrmode_imm7<2>:$addr),
|
||||
(VSTR_P0_off VCCR:$val, t2addrmode_imm7<2>:$addr)>;
|
||||
def : Pat<(store (v8i1 VCCR:$val), t2addrmode_imm7<2>:$addr),
|
||||
(VSTR_P0_off VCCR:$val, t2addrmode_imm7<2>:$addr)>;
|
||||
def : Pat<(store (v16i1 VCCR:$val), t2addrmode_imm7<2>:$addr),
|
||||
(VSTR_P0_off VCCR:$val, t2addrmode_imm7<2>:$addr)>;
|
||||
}
|
||||
|
||||
|
||||
// Widening/Narrowing Loads/Stores
|
||||
|
||||
|
|
|
@ -8,11 +8,23 @@ define void @foo_v4i32_v4i32(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i32> *%src
|
|||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #4
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s32 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #4]
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: and r1, r3, #15
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrne r3, [r2]
|
||||
|
@ -29,9 +41,21 @@ define void @foo_v4i32_v4i32(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i32> *%src
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrmi r1, [r2, #12]
|
||||
; CHECK-NEXT: vmovmi.32 q0[3], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: and r3, r2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #8, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #12, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #2, #1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: bfi r1, r2, #3, #1
|
||||
; CHECK-NEXT: and r1, r1, #15
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne r2, s0
|
||||
|
@ -64,11 +88,23 @@ define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%
|
|||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #4
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s32 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #4]
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: and r1, r3, #15
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrbne r3, [r2]
|
||||
|
@ -85,11 +121,23 @@ define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrbmi r1, [r2, #3]
|
||||
; CHECK-NEXT: vmovmi.32 q0[3], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: and r3, r2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #8, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #12, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #2, #1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: bfi r1, r2, #3, #1
|
||||
; CHECK-NEXT: and r1, r1, #15
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne r2, s0
|
||||
|
@ -123,11 +171,23 @@ define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16>
|
|||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #4
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s32 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #4]
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: and r1, r3, #15
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrhne r3, [r2]
|
||||
|
@ -144,10 +204,22 @@ define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16>
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrhmi r1, [r2, #6]
|
||||
; CHECK-NEXT: vmovmi.32 q0[3], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: and r3, r2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #8, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #12, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #2, #1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: bfi r1, r2, #3, #1
|
||||
; CHECK-NEXT: and r1, r1, #15
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne r2, s0
|
||||
|
@ -181,12 +253,24 @@ define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%
|
|||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #4
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vmov.i32 q1, #0xff
|
||||
; CHECK-NEXT: vcmp.s32 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #4]
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: and r1, r3, #15
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrbne r3, [r2]
|
||||
|
@ -203,10 +287,22 @@ define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrbmi r1, [r2, #3]
|
||||
; CHECK-NEXT: vmovmi.32 q0[3], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: and r3, r2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #8, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #12, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #2, #1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: bfi r1, r2, #3, #1
|
||||
; CHECK-NEXT: and r1, r1, #15
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne r2, s0
|
||||
|
@ -240,11 +336,23 @@ define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16>
|
|||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #4
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s32 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #4]
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: and r1, r3, #15
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrhne r3, [r2]
|
||||
|
@ -261,10 +369,22 @@ define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16>
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrhmi r1, [r2, #6]
|
||||
; CHECK-NEXT: vmovmi.32 q0[3], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: and r3, r2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #8, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #12, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #2, #1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: bfi r1, r2, #3, #1
|
||||
; CHECK-NEXT: and r1, r1, #15
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne r2, s0
|
||||
|
@ -298,12 +418,36 @@ define void @foo_v8i16_v8i16(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i16> *%src
|
|||
; CHECK-NEXT: .pad #16
|
||||
; CHECK-NEXT: sub sp, #16
|
||||
; CHECK-NEXT: vldrh.u16 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #8
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s16 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #8]
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #2, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #6, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #4, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #10, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #5, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #6, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #14, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #7, #1
|
||||
; CHECK-NEXT: uxtb r1, r3
|
||||
; CHECK-NEXT: lsls r3, r3, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrhne r3, [r2]
|
||||
; CHECK-NEXT: vmovne.16 q0[0], r3
|
||||
|
@ -335,10 +479,34 @@ define void @foo_v8i16_v8i16(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i16> *%src
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrhmi r1, [r2, #14]
|
||||
; CHECK-NEXT: vmovmi.16 q0[7], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmrs r1, p0
|
||||
; CHECK-NEXT: and r3, r1, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #2, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #6, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #3, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #8, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #4, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #10, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #5, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #12, #1
|
||||
; CHECK-NEXT: ubfx r1, r1, #14, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #6, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r2, r1, #7, #1
|
||||
; CHECK-NEXT: uxtb r1, r2
|
||||
; CHECK-NEXT: lsls r2, r2, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne.u16 r2, q0[0]
|
||||
; CHECK-NEXT: strhne r2, [r0]
|
||||
|
@ -386,12 +554,36 @@ define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%
|
|||
; CHECK-NEXT: .pad #16
|
||||
; CHECK-NEXT: sub sp, #16
|
||||
; CHECK-NEXT: vldrh.u16 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #8
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s16 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #8]
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #2, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #6, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #4, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #10, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #5, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #6, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #14, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #7, #1
|
||||
; CHECK-NEXT: uxtb r1, r3
|
||||
; CHECK-NEXT: lsls r3, r3, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrbne r3, [r2]
|
||||
; CHECK-NEXT: vmovne.16 q0[0], r3
|
||||
|
@ -423,11 +615,35 @@ define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrbmi r1, [r2, #7]
|
||||
; CHECK-NEXT: vmovmi.16 q0[7], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmrs r1, p0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: and r3, r1, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #2, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #6, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #3, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #8, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #4, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #10, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #5, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #12, #1
|
||||
; CHECK-NEXT: ubfx r1, r1, #14, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #6, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r2, r1, #7, #1
|
||||
; CHECK-NEXT: uxtb r1, r2
|
||||
; CHECK-NEXT: lsls r2, r2, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne.u16 r2, q0[0]
|
||||
; CHECK-NEXT: strhne r2, [r0]
|
||||
|
@ -476,12 +692,36 @@ define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%
|
|||
; CHECK-NEXT: .pad #16
|
||||
; CHECK-NEXT: sub sp, #16
|
||||
; CHECK-NEXT: vldrh.u16 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #8
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s16 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #8]
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #2, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #6, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #4, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #10, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #5, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #6, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #14, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #7, #1
|
||||
; CHECK-NEXT: uxtb r1, r3
|
||||
; CHECK-NEXT: lsls r3, r3, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrbne r3, [r2]
|
||||
; CHECK-NEXT: vmovne.16 q0[0], r3
|
||||
|
@ -513,11 +753,35 @@ define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrbmi r1, [r2, #7]
|
||||
; CHECK-NEXT: vmovmi.16 q0[7], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmrs r1, p0
|
||||
; CHECK-NEXT: vmovlb.u8 q0, q0
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: and r3, r1, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #2, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #6, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #3, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #8, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #4, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #10, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #5, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #12, #1
|
||||
; CHECK-NEXT: ubfx r1, r1, #14, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #6, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r2, r1, #7, #1
|
||||
; CHECK-NEXT: uxtb r1, r2
|
||||
; CHECK-NEXT: lsls r2, r2, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne.u16 r2, q0[0]
|
||||
; CHECK-NEXT: strhne r2, [r0]
|
||||
|
@ -573,13 +837,12 @@ define void @foo_v16i8_v16i8(<16 x i8> *%dest, <16 x i8> *%mask, <16 x i8> *%src
|
|||
; CHECK-NEXT: bfc r4, #0, #4
|
||||
; CHECK-NEXT: mov sp, r4
|
||||
; CHECK-NEXT: vldrb.u8 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #16
|
||||
; CHECK-NEXT: sub.w r4, r7, #8
|
||||
; CHECK-NEXT: vcmp.s8 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrh.w r1, [sp, #16]
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: vmrs r3, p0
|
||||
; CHECK-NEXT: uxth r1, r3
|
||||
; CHECK-NEXT: lsls r3, r3, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrbne r3, [r2]
|
||||
; CHECK-NEXT: vmovne.8 q0[0], r3
|
||||
|
@ -643,10 +906,9 @@ define void @foo_v16i8_v16i8(<16 x i8> *%dest, <16 x i8> *%mask, <16 x i8> *%src
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrbmi r1, [r2, #15]
|
||||
; CHECK-NEXT: vmovmi.8 q0[15], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrh.w r1, [sp]
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: uxth r1, r2
|
||||
; CHECK-NEXT: lsls r2, r2, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne.u8 r2, q0[0]
|
||||
; CHECK-NEXT: strbne r2, [r0]
|
||||
|
@ -726,12 +988,36 @@ define void @foo_trunc_v8i8_v8i16(<8 x i8> *%dest, <8 x i16> *%mask, <8 x i16> *
|
|||
; CHECK-NEXT: .pad #16
|
||||
; CHECK-NEXT: sub sp, #16
|
||||
; CHECK-NEXT: vldrh.u16 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #8
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s16 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #8]
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #2, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #6, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #4, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #10, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #5, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #6, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #14, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #7, #1
|
||||
; CHECK-NEXT: uxtb r1, r3
|
||||
; CHECK-NEXT: lsls r3, r3, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrhne r3, [r2]
|
||||
; CHECK-NEXT: vmovne.16 q0[0], r3
|
||||
|
@ -763,10 +1049,34 @@ define void @foo_trunc_v8i8_v8i16(<8 x i8> *%dest, <8 x i16> *%mask, <8 x i16> *
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrhmi r1, [r2, #14]
|
||||
; CHECK-NEXT: vmovmi.16 q0[7], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmrs r1, p0
|
||||
; CHECK-NEXT: and r3, r1, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #2, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #6, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #3, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #8, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #4, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #10, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #5, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #12, #1
|
||||
; CHECK-NEXT: ubfx r1, r1, #14, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #6, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r2, r1, #7, #1
|
||||
; CHECK-NEXT: uxtb r1, r2
|
||||
; CHECK-NEXT: lsls r2, r2, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne.u16 r2, q0[0]
|
||||
; CHECK-NEXT: strbne r2, [r0]
|
||||
|
@ -815,11 +1125,23 @@ define void @foo_trunc_v4i8_v4i32(<4 x i8> *%dest, <4 x i32> *%mask, <4 x i32> *
|
|||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #4
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s32 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #4]
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: and r1, r3, #15
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrne r3, [r2]
|
||||
|
@ -836,9 +1158,21 @@ define void @foo_trunc_v4i8_v4i32(<4 x i8> *%dest, <4 x i32> *%mask, <4 x i32> *
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrmi r1, [r2, #12]
|
||||
; CHECK-NEXT: vmovmi.32 q0[3], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: and r3, r2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #8, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #12, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #2, #1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: bfi r1, r2, #3, #1
|
||||
; CHECK-NEXT: and r1, r1, #15
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne r2, s0
|
||||
|
@ -872,11 +1206,23 @@ define void @foo_trunc_v4i16_v4i32(<4 x i16> *%dest, <4 x i32> *%mask, <4 x i32>
|
|||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #4
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s32 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #4]
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: and r1, r3, #15
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: ldrne r3, [r2]
|
||||
|
@ -893,9 +1239,21 @@ define void @foo_trunc_v4i16_v4i32(<4 x i16> *%dest, <4 x i32> *%mask, <4 x i32>
|
|||
; CHECK-NEXT: itt mi
|
||||
; CHECK-NEXT: ldrmi r1, [r2, #12]
|
||||
; CHECK-NEXT: vmovmi.32 q0[3], r1
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: and r3, r2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #8, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #12, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #2, #1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: bfi r1, r2, #3, #1
|
||||
; CHECK-NEXT: and r1, r1, #15
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: vmovne r2, s0
|
||||
|
@ -929,11 +1287,23 @@ define void @foo_v4f32_v4f32(<4 x float> *%dest, <4 x i32> *%mask, <4 x float> *
|
|||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #4
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s32 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #4]
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: and r1, r3, #15
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: vldrne s0, [r2]
|
||||
|
@ -946,9 +1316,21 @@ define void @foo_v4f32_v4f32(<4 x float> *%dest, <4 x i32> *%mask, <4 x float> *
|
|||
; CHECK-NEXT: lsls r1, r1, #28
|
||||
; CHECK-NEXT: it mi
|
||||
; CHECK-NEXT: vldrmi s3, [r2, #12]
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: and r3, r2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #8, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #12, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r1, r3, #2, #1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: bfi r1, r2, #3, #1
|
||||
; CHECK-NEXT: and r1, r1, #15
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: vstrne s0, [r0]
|
||||
|
@ -977,12 +1359,36 @@ define void @foo_v8f16_v8f16(<8 x half> *%dest, <8 x i16> *%mask, <8 x half> *%s
|
|||
; CHECK-NEXT: .pad #16
|
||||
; CHECK-NEXT: sub sp, #16
|
||||
; CHECK-NEXT: vldrh.u16 q0, [r1]
|
||||
; CHECK-NEXT: add r3, sp, #8
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vcmp.s16 gt, q0, zr
|
||||
; CHECK-NEXT: @ implicit-def: $q0
|
||||
; CHECK-NEXT: vstr p0, [r3]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp, #8]
|
||||
; CHECK-NEXT: lsls r3, r1, #31
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r1, r12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #2, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #6, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #3, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #8, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #4, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #10, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #5, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #12, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #6, #1
|
||||
; CHECK-NEXT: ubfx r1, r12, #14, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r3, r1, #7, #1
|
||||
; CHECK-NEXT: uxtb r1, r3
|
||||
; CHECK-NEXT: lsls r3, r3, #31
|
||||
; CHECK-NEXT: bne .LBB13_18
|
||||
; CHECK-NEXT: @ %bb.1: @ %else
|
||||
; CHECK-NEXT: lsls r3, r1, #30
|
||||
|
@ -1010,10 +1416,34 @@ define void @foo_v8f16_v8f16(<8 x half> *%dest, <8 x i16> *%mask, <8 x half> *%s
|
|||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov.16 q0[7], r1
|
||||
; CHECK-NEXT: .LBB13_9: @ %else20
|
||||
; CHECK-NEXT: mov r1, sp
|
||||
; CHECK-NEXT: vstr p0, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [sp]
|
||||
; CHECK-NEXT: lsls r2, r1, #31
|
||||
; CHECK-NEXT: vmrs r1, p0
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: and r3, r1, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #0, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #2, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #1, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #4, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #2, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #6, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #3, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #8, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #4, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #10, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #5, #1
|
||||
; CHECK-NEXT: ubfx r3, r1, #12, #1
|
||||
; CHECK-NEXT: ubfx r1, r1, #14, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r2, r3, #6, #1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r2, r1, #7, #1
|
||||
; CHECK-NEXT: uxtb r1, r2
|
||||
; CHECK-NEXT: lsls r2, r2, #31
|
||||
; CHECK-NEXT: bne .LBB13_25
|
||||
; CHECK-NEXT: @ %bb.10: @ %else23
|
||||
; CHECK-NEXT: lsls r2, r1, #30
|
||||
|
@ -1072,13 +1502,13 @@ define void @foo_v8f16_v8f16(<8 x half> *%dest, <8 x i16> *%mask, <8 x half> *%s
|
|||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov.16 q0[5], r3
|
||||
; CHECK-NEXT: lsls r3, r1, #25
|
||||
; CHECK-NEXT: bpl .LBB13_7
|
||||
; CHECK-NEXT: bpl.w .LBB13_7
|
||||
; CHECK-NEXT: .LBB13_24: @ %cond.load16
|
||||
; CHECK-NEXT: vldr.16 s4, [r2, #12]
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: lsls r1, r1, #24
|
||||
; CHECK-NEXT: bmi .LBB13_8
|
||||
; CHECK-NEXT: bmi.w .LBB13_8
|
||||
; CHECK-NEXT: b .LBB13_9
|
||||
; CHECK-NEXT: .LBB13_25: @ %cond.store
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,19 +1,55 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
|
||||
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
|
||||
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @bitcast_to_v4i1(i4 %b, <4 x i32> %a) {
|
||||
; CHECK-LABEL: bitcast_to_v4i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: and r0, r0, #15
|
||||
; CHECK-NEXT: strb.w r0, [sp]
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vldr p0, [r0]
|
||||
; CHECK-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: bitcast_to_v4i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: and r0, r0, #15
|
||||
; CHECK-LE-NEXT: vmov.i8 q1, #0x0
|
||||
; CHECK-LE-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-LE-NEXT: vmsr p0, r0
|
||||
; CHECK-LE-NEXT: vpsel q1, q2, q1
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[0]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[1]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[2]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[3]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-LE-NEXT: vcmp.i32 ne, q2, zr
|
||||
; CHECK-LE-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: bitcast_to_v4i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .pad #4
|
||||
; CHECK-BE-NEXT: sub sp, #4
|
||||
; CHECK-BE-NEXT: and r0, r0, #15
|
||||
; CHECK-BE-NEXT: vmov.i8 q1, #0x0
|
||||
; CHECK-BE-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-BE-NEXT: vmsr p0, r0
|
||||
; CHECK-BE-NEXT: vpsel q1, q2, q1
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[0]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[1]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[2]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[3]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-BE-NEXT: vpsel q1, q1, q0
|
||||
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
||||
; CHECK-BE-NEXT: add sp, #4
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = bitcast i4 %b to <4 x i1>
|
||||
%s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
|
||||
|
@ -21,17 +57,70 @@ entry:
|
|||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @bitcast_to_v8i1(i8 %b, <8 x i16> %a) {
|
||||
; CHECK-LABEL: bitcast_to_v8i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: strb.w r0, [sp]
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vldr p0, [r0]
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: bitcast_to_v8i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .pad #8
|
||||
; CHECK-LE-NEXT: sub sp, #8
|
||||
; CHECK-LE-NEXT: uxtb r0, r0
|
||||
; CHECK-LE-NEXT: vmov.i8 q1, #0x0
|
||||
; CHECK-LE-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-LE-NEXT: vmsr p0, r0
|
||||
; CHECK-LE-NEXT: vpsel q2, q2, q1
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[0]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[0], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[1]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[1], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[2]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[2], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[3]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[3], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[4]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[4], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[5]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[5], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[6]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[6], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[7]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[7], r0
|
||||
; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr
|
||||
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-LE-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-LE-NEXT: add sp, #8
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: bitcast_to_v8i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .pad #8
|
||||
; CHECK-BE-NEXT: sub sp, #8
|
||||
; CHECK-BE-NEXT: uxtb r0, r0
|
||||
; CHECK-BE-NEXT: vmov.i8 q1, #0x0
|
||||
; CHECK-BE-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-BE-NEXT: vmsr p0, r0
|
||||
; CHECK-BE-NEXT: vpsel q2, q2, q1
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[0]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[0], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[1]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[1], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[2]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[2], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[3]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[3], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[4]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[4], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[5]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[5], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[6]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[6], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[7]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[7], r0
|
||||
; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
|
||||
; CHECK-BE-NEXT: vrev64.16 q1, q0
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-BE-NEXT: vrev32.16 q0, q0
|
||||
; CHECK-BE-NEXT: vpsel q1, q1, q0
|
||||
; CHECK-BE-NEXT: vrev64.16 q0, q1
|
||||
; CHECK-BE-NEXT: add sp, #8
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = bitcast i8 %b to <8 x i1>
|
||||
%s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer
|
||||
|
@ -39,25 +128,46 @@ entry:
|
|||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @bitcast_to_v16i1(i16 %b, <16 x i8> %a) {
|
||||
; CHECK-LABEL: bitcast_to_v16i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r6, r7, lr}
|
||||
; CHECK-NEXT: .setfp r7, sp, #8
|
||||
; CHECK-NEXT: add r7, sp, #8
|
||||
; CHECK-NEXT: .pad #16
|
||||
; CHECK-NEXT: sub sp, #16
|
||||
; CHECK-NEXT: mov r4, sp
|
||||
; CHECK-NEXT: bfc r4, #0, #4
|
||||
; CHECK-NEXT: mov sp, r4
|
||||
; CHECK-NEXT: strh.w r0, [sp]
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: sub.w r4, r7, #8
|
||||
; CHECK-NEXT: vldr p0, [r0]
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-NEXT: mov sp, r4
|
||||
; CHECK-NEXT: pop {r4, r6, r7, pc}
|
||||
; CHECK-LE-LABEL: bitcast_to_v16i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r4, r6, r7, lr}
|
||||
; CHECK-LE-NEXT: push {r4, r6, r7, lr}
|
||||
; CHECK-LE-NEXT: .setfp r7, sp, #8
|
||||
; CHECK-LE-NEXT: add r7, sp, #8
|
||||
; CHECK-LE-NEXT: .pad #16
|
||||
; CHECK-LE-NEXT: sub sp, #16
|
||||
; CHECK-LE-NEXT: mov r4, sp
|
||||
; CHECK-LE-NEXT: bfc r4, #0, #4
|
||||
; CHECK-LE-NEXT: mov sp, r4
|
||||
; CHECK-LE-NEXT: uxth r0, r0
|
||||
; CHECK-LE-NEXT: sub.w r4, r7, #8
|
||||
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-LE-NEXT: vmsr p0, r0
|
||||
; CHECK-LE-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-LE-NEXT: mov sp, r4
|
||||
; CHECK-LE-NEXT: pop {r4, r6, r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: bitcast_to_v16i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r4, r6, r7, lr}
|
||||
; CHECK-BE-NEXT: push {r4, r6, r7, lr}
|
||||
; CHECK-BE-NEXT: .setfp r7, sp, #8
|
||||
; CHECK-BE-NEXT: add r7, sp, #8
|
||||
; CHECK-BE-NEXT: .pad #16
|
||||
; CHECK-BE-NEXT: sub sp, #16
|
||||
; CHECK-BE-NEXT: mov r4, sp
|
||||
; CHECK-BE-NEXT: bfc r4, #0, #4
|
||||
; CHECK-BE-NEXT: mov sp, r4
|
||||
; CHECK-BE-NEXT: vrev64.8 q1, q0
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-BE-NEXT: uxth r0, r0
|
||||
; CHECK-BE-NEXT: sub.w r4, r7, #8
|
||||
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
||||
; CHECK-BE-NEXT: vmsr p0, r0
|
||||
; CHECK-BE-NEXT: vpsel q1, q1, q0
|
||||
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
||||
; CHECK-BE-NEXT: mov sp, r4
|
||||
; CHECK-BE-NEXT: pop {r4, r6, r7, pc}
|
||||
entry:
|
||||
%c = bitcast i16 %b to <16 x i1>
|
||||
%s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer
|
||||
|
@ -65,20 +175,36 @@ entry:
|
|||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @bitcast_to_v2i1(i2 %b, <2 x i64> %a) {
|
||||
; CHECK-LABEL: bitcast_to_v2i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: and r0, r0, #3
|
||||
; CHECK-NEXT: sbfx r1, r0, #0, #1
|
||||
; CHECK-NEXT: sbfx r0, r0, #1, #1
|
||||
; CHECK-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: bitcast_to_v2i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: and r0, r0, #3
|
||||
; CHECK-LE-NEXT: sbfx r1, r0, #0, #1
|
||||
; CHECK-LE-NEXT: sbfx r0, r0, #1, #1
|
||||
; CHECK-LE-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-LE-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-LE-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-LE-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-LE-NEXT: vand q0, q0, q1
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: bitcast_to_v2i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .pad #4
|
||||
; CHECK-BE-NEXT: sub sp, #4
|
||||
; CHECK-BE-NEXT: and r0, r0, #3
|
||||
; CHECK-BE-NEXT: sbfx r1, r0, #0, #1
|
||||
; CHECK-BE-NEXT: sbfx r0, r0, #1, #1
|
||||
; CHECK-BE-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-BE-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-BE-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-BE-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-BE-NEXT: vrev64.32 q2, q1
|
||||
; CHECK-BE-NEXT: vand q0, q0, q2
|
||||
; CHECK-BE-NEXT: add sp, #4
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = bitcast i2 %b to <2 x i1>
|
||||
%s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> zeroinitializer
|
||||
|
@ -87,16 +213,52 @@ entry:
|
|||
|
||||
|
||||
define arm_aapcs_vfpcc i4 @bitcast_from_v4i1(<4 x i32> %a) {
|
||||
; CHECK-LABEL: bitcast_from_v4i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: vcmp.i32 eq, q0, zr
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vstr p0, [r0]
|
||||
; CHECK-NEXT: ldrb.w r0, [sp]
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: bitcast_from_v4i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr
|
||||
; CHECK-LE-NEXT: movs r0, #0
|
||||
; CHECK-LE-NEXT: vmrs r1, p0
|
||||
; CHECK-LE-NEXT: and r2, r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r2, #0, #1
|
||||
; CHECK-LE-NEXT: ubfx r2, r1, #4, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r2, #1, #1
|
||||
; CHECK-LE-NEXT: ubfx r2, r1, #8, #1
|
||||
; CHECK-LE-NEXT: ubfx r1, r1, #12, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r2, #2, #1
|
||||
; CHECK-LE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r1, #3, #1
|
||||
; CHECK-LE-NEXT: and r0, r0, #15
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: bitcast_from_v4i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .pad #4
|
||||
; CHECK-BE-NEXT: sub sp, #4
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: movs r3, #0
|
||||
; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr
|
||||
; CHECK-BE-NEXT: vmrs r0, p0
|
||||
; CHECK-BE-NEXT: and r2, r0, #1
|
||||
; CHECK-BE-NEXT: ubfx r1, r0, #4, #1
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-BE-NEXT: bfi r3, r2, #0, #1
|
||||
; CHECK-BE-NEXT: bfi r3, r1, #1, #1
|
||||
; CHECK-BE-NEXT: ubfx r1, r0, #8, #1
|
||||
; CHECK-BE-NEXT: ubfx r0, r0, #12, #1
|
||||
; CHECK-BE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-BE-NEXT: bfi r3, r1, #2, #1
|
||||
; CHECK-BE-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-BE-NEXT: bfi r3, r0, #3, #1
|
||||
; CHECK-BE-NEXT: and r0, r3, #15
|
||||
; CHECK-BE-NEXT: add sp, #4
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <4 x i32> %a, zeroinitializer
|
||||
%b = bitcast <4 x i1> %c to i4
|
||||
|
@ -104,16 +266,76 @@ entry:
|
|||
}
|
||||
|
||||
define arm_aapcs_vfpcc i8 @bitcast_from_v8i1(<8 x i16> %a) {
|
||||
; CHECK-LABEL: bitcast_from_v8i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vcmp.i16 eq, q0, zr
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vstr p0, [r0]
|
||||
; CHECK-NEXT: ldrb.w r0, [sp]
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: bitcast_from_v8i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .pad #8
|
||||
; CHECK-LE-NEXT: sub sp, #8
|
||||
; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr
|
||||
; CHECK-LE-NEXT: movs r0, #0
|
||||
; CHECK-LE-NEXT: vmrs r1, p0
|
||||
; CHECK-LE-NEXT: and r2, r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r2, #0, #1
|
||||
; CHECK-LE-NEXT: ubfx r2, r1, #2, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r2, #1, #1
|
||||
; CHECK-LE-NEXT: ubfx r2, r1, #4, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r2, #2, #1
|
||||
; CHECK-LE-NEXT: ubfx r2, r1, #6, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r2, #3, #1
|
||||
; CHECK-LE-NEXT: ubfx r2, r1, #8, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r2, #4, #1
|
||||
; CHECK-LE-NEXT: ubfx r2, r1, #10, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r2, #5, #1
|
||||
; CHECK-LE-NEXT: ubfx r2, r1, #12, #1
|
||||
; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r2, #6, #1
|
||||
; CHECK-LE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-LE-NEXT: bfi r0, r1, #7, #1
|
||||
; CHECK-LE-NEXT: uxtb r0, r0
|
||||
; CHECK-LE-NEXT: add sp, #8
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: bitcast_from_v8i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .pad #8
|
||||
; CHECK-BE-NEXT: sub sp, #8
|
||||
; CHECK-BE-NEXT: vrev64.16 q1, q0
|
||||
; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr
|
||||
; CHECK-BE-NEXT: vmrs r1, p0
|
||||
; CHECK-BE-NEXT: ubfx r0, r1, #2, #1
|
||||
; CHECK-BE-NEXT: rsbs r2, r0, #0
|
||||
; CHECK-BE-NEXT: and r0, r1, #1
|
||||
; CHECK-BE-NEXT: rsbs r3, r0, #0
|
||||
; CHECK-BE-NEXT: movs r0, #0
|
||||
; CHECK-BE-NEXT: bfi r0, r3, #0, #1
|
||||
; CHECK-BE-NEXT: bfi r0, r2, #1, #1
|
||||
; CHECK-BE-NEXT: ubfx r2, r1, #4, #1
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: bfi r0, r2, #2, #1
|
||||
; CHECK-BE-NEXT: ubfx r2, r1, #6, #1
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: bfi r0, r2, #3, #1
|
||||
; CHECK-BE-NEXT: ubfx r2, r1, #8, #1
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: bfi r0, r2, #4, #1
|
||||
; CHECK-BE-NEXT: ubfx r2, r1, #10, #1
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: bfi r0, r2, #5, #1
|
||||
; CHECK-BE-NEXT: ubfx r2, r1, #12, #1
|
||||
; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: bfi r0, r2, #6, #1
|
||||
; CHECK-BE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-BE-NEXT: bfi r0, r1, #7, #1
|
||||
; CHECK-BE-NEXT: uxtb r0, r0
|
||||
; CHECK-BE-NEXT: add sp, #8
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <8 x i16> %a, zeroinitializer
|
||||
%b = bitcast <8 x i1> %c to i8
|
||||
|
@ -121,24 +343,42 @@ entry:
|
|||
}
|
||||
|
||||
define arm_aapcs_vfpcc i16 @bitcast_from_v16i1(<16 x i8> %a) {
|
||||
; CHECK-LABEL: bitcast_from_v16i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r6, r7, lr}
|
||||
; CHECK-NEXT: .setfp r7, sp, #8
|
||||
; CHECK-NEXT: add r7, sp, #8
|
||||
; CHECK-NEXT: .pad #16
|
||||
; CHECK-NEXT: sub sp, #16
|
||||
; CHECK-NEXT: mov r4, sp
|
||||
; CHECK-NEXT: bfc r4, #0, #4
|
||||
; CHECK-NEXT: mov sp, r4
|
||||
; CHECK-NEXT: sub.w r4, r7, #8
|
||||
; CHECK-NEXT: vcmp.i8 eq, q0, zr
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vstr p0, [r0]
|
||||
; CHECK-NEXT: ldrh.w r0, [sp]
|
||||
; CHECK-NEXT: mov sp, r4
|
||||
; CHECK-NEXT: pop {r4, r6, r7, pc}
|
||||
; CHECK-LE-LABEL: bitcast_from_v16i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r4, r6, r7, lr}
|
||||
; CHECK-LE-NEXT: push {r4, r6, r7, lr}
|
||||
; CHECK-LE-NEXT: .setfp r7, sp, #8
|
||||
; CHECK-LE-NEXT: add r7, sp, #8
|
||||
; CHECK-LE-NEXT: .pad #16
|
||||
; CHECK-LE-NEXT: sub sp, #16
|
||||
; CHECK-LE-NEXT: mov r4, sp
|
||||
; CHECK-LE-NEXT: bfc r4, #0, #4
|
||||
; CHECK-LE-NEXT: mov sp, r4
|
||||
; CHECK-LE-NEXT: vcmp.i8 eq, q0, zr
|
||||
; CHECK-LE-NEXT: sub.w r4, r7, #8
|
||||
; CHECK-LE-NEXT: vmrs r0, p0
|
||||
; CHECK-LE-NEXT: uxth r0, r0
|
||||
; CHECK-LE-NEXT: mov sp, r4
|
||||
; CHECK-LE-NEXT: pop {r4, r6, r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: bitcast_from_v16i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r4, r6, r7, lr}
|
||||
; CHECK-BE-NEXT: push {r4, r6, r7, lr}
|
||||
; CHECK-BE-NEXT: .setfp r7, sp, #8
|
||||
; CHECK-BE-NEXT: add r7, sp, #8
|
||||
; CHECK-BE-NEXT: .pad #16
|
||||
; CHECK-BE-NEXT: sub sp, #16
|
||||
; CHECK-BE-NEXT: mov r4, sp
|
||||
; CHECK-BE-NEXT: bfc r4, #0, #4
|
||||
; CHECK-BE-NEXT: mov sp, r4
|
||||
; CHECK-BE-NEXT: vrev64.8 q1, q0
|
||||
; CHECK-BE-NEXT: sub.w r4, r7, #8
|
||||
; CHECK-BE-NEXT: vcmp.i8 eq, q1, zr
|
||||
; CHECK-BE-NEXT: vmrs r0, p0
|
||||
; CHECK-BE-NEXT: uxth r0, r0
|
||||
; CHECK-BE-NEXT: mov sp, r4
|
||||
; CHECK-BE-NEXT: pop {r4, r6, r7, pc}
|
||||
entry:
|
||||
%c = icmp eq <16 x i8> %a, zeroinitializer
|
||||
%b = bitcast <16 x i1> %c to i16
|
||||
|
@ -146,25 +386,46 @@ entry:
|
|||
}
|
||||
|
||||
define arm_aapcs_vfpcc i2 @bitcast_from_v2i1(<2 x i64> %a) {
|
||||
; CHECK-LABEL: bitcast_from_v2i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: ands r1, r1, #1
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: mvnne r1, #1
|
||||
; CHECK-NEXT: bfi r1, r0, #0, #1
|
||||
; CHECK-NEXT: and r0, r1, #3
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: bitcast_from_v2i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: vmov r0, s1
|
||||
; CHECK-LE-NEXT: vmov r1, s0
|
||||
; CHECK-LE-NEXT: vmov r2, s2
|
||||
; CHECK-LE-NEXT: orrs r0, r1
|
||||
; CHECK-LE-NEXT: vmov r1, s3
|
||||
; CHECK-LE-NEXT: cset r0, eq
|
||||
; CHECK-LE-NEXT: orrs r1, r2
|
||||
; CHECK-LE-NEXT: cset r1, eq
|
||||
; CHECK-LE-NEXT: ands r1, r1, #1
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: mvnne r1, #1
|
||||
; CHECK-LE-NEXT: bfi r1, r0, #0, #1
|
||||
; CHECK-LE-NEXT: and r0, r1, #3
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: bitcast_from_v2i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .pad #4
|
||||
; CHECK-BE-NEXT: sub sp, #4
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vmov r0, s6
|
||||
; CHECK-BE-NEXT: vmov r1, s7
|
||||
; CHECK-BE-NEXT: vmov r2, s5
|
||||
; CHECK-BE-NEXT: orrs r0, r1
|
||||
; CHECK-BE-NEXT: vmov r1, s4
|
||||
; CHECK-BE-NEXT: cset r0, eq
|
||||
; CHECK-BE-NEXT: orrs r1, r2
|
||||
; CHECK-BE-NEXT: cset r1, eq
|
||||
; CHECK-BE-NEXT: ands r1, r1, #1
|
||||
; CHECK-BE-NEXT: it ne
|
||||
; CHECK-BE-NEXT: mvnne r1, #1
|
||||
; CHECK-BE-NEXT: bfi r1, r0, #0, #1
|
||||
; CHECK-BE-NEXT: and r0, r1, #3
|
||||
; CHECK-BE-NEXT: add sp, #4
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <2 x i64> %a, zeroinitializer
|
||||
%b = bitcast <2 x i1> %c to i2
|
||||
|
|
|
@ -5,15 +5,41 @@
|
|||
define arm_aapcs_vfpcc <4 x i32> @load_v4i1(<4 x i1> *%src, <4 x i32> %a) {
|
||||
; CHECK-LE-LABEL: load_v4i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vldr p0, [r0]
|
||||
; CHECK-LE-NEXT: ldrb r0, [r0]
|
||||
; CHECK-LE-NEXT: vmov.i8 q1, #0x0
|
||||
; CHECK-LE-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-LE-NEXT: vmsr p0, r0
|
||||
; CHECK-LE-NEXT: vpsel q1, q2, q1
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[0]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[1]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[2]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[3]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-LE-NEXT: vcmp.i32 ne, q2, zr
|
||||
; CHECK-LE-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: load_v4i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: vldr p0, [r0]
|
||||
; CHECK-BE-NEXT: ldrb r0, [r0]
|
||||
; CHECK-BE-NEXT: vmov.i8 q1, #0x0
|
||||
; CHECK-BE-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-BE-NEXT: vmsr p0, r0
|
||||
; CHECK-BE-NEXT: vpsel q1, q2, q1
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[0]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[1]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[2]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[3]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-BE-NEXT: vpsel q1, q1, q0
|
||||
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
||||
|
@ -27,16 +53,58 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @load_v8i1(<8 x i1> *%src, <8 x i16> %a) {
|
||||
; CHECK-LE-LABEL: load_v8i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vldr p0, [r0]
|
||||
; CHECK-LE-NEXT: ldrb r0, [r0]
|
||||
; CHECK-LE-NEXT: vmov.i8 q1, #0x0
|
||||
; CHECK-LE-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-LE-NEXT: vmsr p0, r0
|
||||
; CHECK-LE-NEXT: vpsel q2, q2, q1
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[0]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[0], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[1]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[1], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[2]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[2], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[3]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[3], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[4]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[4], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[5]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[5], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[6]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[6], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q2[7]
|
||||
; CHECK-LE-NEXT: vmov.16 q1[7], r0
|
||||
; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr
|
||||
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-LE-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: load_v8i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: ldrb r0, [r0]
|
||||
; CHECK-BE-NEXT: vmov.i8 q1, #0x0
|
||||
; CHECK-BE-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-BE-NEXT: vmsr p0, r0
|
||||
; CHECK-BE-NEXT: vpsel q2, q2, q1
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[0]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[0], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[1]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[1], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[2]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[2], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[3]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[3], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[4]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[4], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[5]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[5], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[6]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[6], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q2[7]
|
||||
; CHECK-BE-NEXT: vmov.16 q1[7], r0
|
||||
; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
|
||||
; CHECK-BE-NEXT: vrev64.16 q1, q0
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-BE-NEXT: vldr p0, [r0]
|
||||
; CHECK-BE-NEXT: vrev32.16 q0, q0
|
||||
; CHECK-BE-NEXT: vpsel q1, q1, q0
|
||||
; CHECK-BE-NEXT: vrev64.16 q0, q1
|
||||
|
@ -50,17 +118,19 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @load_v16i1(<16 x i1> *%src, <16 x i8> %a) {
|
||||
; CHECK-LE-LABEL: load_v16i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vldr p0, [r0]
|
||||
; CHECK-LE-NEXT: ldrh r0, [r0]
|
||||
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-LE-NEXT: vmsr p0, r0
|
||||
; CHECK-LE-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: load_v16i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: ldrh r0, [r0]
|
||||
; CHECK-BE-NEXT: vrev64.8 q1, q0
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-BE-NEXT: vldr p0, [r0]
|
||||
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
||||
; CHECK-BE-NEXT: vmsr p0, r0
|
||||
; CHECK-BE-NEXT: vpsel q1, q1, q0
|
||||
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
|
@ -106,14 +176,44 @@ define arm_aapcs_vfpcc void @store_v4i1(<4 x i1> *%dst, <4 x i32> %a) {
|
|||
; CHECK-LE-LABEL: store_v4i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr
|
||||
; CHECK-LE-NEXT: vstr p0, [r0]
|
||||
; CHECK-LE-NEXT: movs r1, #0
|
||||
; CHECK-LE-NEXT: vmrs r2, p0
|
||||
; CHECK-LE-NEXT: and r3, r2, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r3, #0, #1
|
||||
; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r3, #1, #1
|
||||
; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
|
||||
; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r3, #2, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r2, #3, #1
|
||||
; CHECK-LE-NEXT: and r1, r1, #15
|
||||
; CHECK-LE-NEXT: strb r1, [r0]
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: store_v4i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr
|
||||
; CHECK-BE-NEXT: vstr p0, [r0]
|
||||
; CHECK-BE-NEXT: vmrs r1, p0
|
||||
; CHECK-BE-NEXT: and r3, r1, #1
|
||||
; CHECK-BE-NEXT: ubfx r2, r1, #4, #1
|
||||
; CHECK-BE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-BE-NEXT: rsb.w r12, r2, #0
|
||||
; CHECK-BE-NEXT: movs r2, #0
|
||||
; CHECK-BE-NEXT: bfi r2, r3, #0, #1
|
||||
; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
|
||||
; CHECK-BE-NEXT: ubfx r1, r1, #12, #1
|
||||
; CHECK-BE-NEXT: bfi r2, r12, #1, #1
|
||||
; CHECK-BE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-BE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-BE-NEXT: bfi r2, r3, #2, #1
|
||||
; CHECK-BE-NEXT: bfi r2, r1, #3, #1
|
||||
; CHECK-BE-NEXT: and r1, r2, #15
|
||||
; CHECK-BE-NEXT: strb r1, [r0]
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <4 x i32> %a, zeroinitializer
|
||||
|
@ -125,14 +225,66 @@ define arm_aapcs_vfpcc void @store_v8i1(<8 x i1> *%dst, <8 x i16> %a) {
|
|||
; CHECK-LE-LABEL: store_v8i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr
|
||||
; CHECK-LE-NEXT: vstr p0, [r0]
|
||||
; CHECK-LE-NEXT: movs r1, #0
|
||||
; CHECK-LE-NEXT: vmrs r2, p0
|
||||
; CHECK-LE-NEXT: and r3, r2, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r3, #0, #1
|
||||
; CHECK-LE-NEXT: ubfx r3, r2, #2, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r3, #1, #1
|
||||
; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r3, #2, #1
|
||||
; CHECK-LE-NEXT: ubfx r3, r2, #6, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r3, #3, #1
|
||||
; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r3, #4, #1
|
||||
; CHECK-LE-NEXT: ubfx r3, r2, #10, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r3, #5, #1
|
||||
; CHECK-LE-NEXT: ubfx r3, r2, #12, #1
|
||||
; CHECK-LE-NEXT: ubfx r2, r2, #14, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r3, #6, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: bfi r1, r2, #7, #1
|
||||
; CHECK-LE-NEXT: strb r1, [r0]
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: store_v8i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: vrev64.16 q1, q0
|
||||
; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr
|
||||
; CHECK-BE-NEXT: vstr p0, [r0]
|
||||
; CHECK-BE-NEXT: vmrs r2, p0
|
||||
; CHECK-BE-NEXT: ubfx r1, r2, #2, #1
|
||||
; CHECK-BE-NEXT: rsb.w r12, r1, #0
|
||||
; CHECK-BE-NEXT: and r1, r2, #1
|
||||
; CHECK-BE-NEXT: rsbs r3, r1, #0
|
||||
; CHECK-BE-NEXT: movs r1, #0
|
||||
; CHECK-BE-NEXT: bfi r1, r3, #0, #1
|
||||
; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
|
||||
; CHECK-BE-NEXT: bfi r1, r12, #1, #1
|
||||
; CHECK-BE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-BE-NEXT: bfi r1, r3, #2, #1
|
||||
; CHECK-BE-NEXT: ubfx r3, r2, #6, #1
|
||||
; CHECK-BE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-BE-NEXT: bfi r1, r3, #3, #1
|
||||
; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
|
||||
; CHECK-BE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-BE-NEXT: bfi r1, r3, #4, #1
|
||||
; CHECK-BE-NEXT: ubfx r3, r2, #10, #1
|
||||
; CHECK-BE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-BE-NEXT: bfi r1, r3, #5, #1
|
||||
; CHECK-BE-NEXT: ubfx r3, r2, #12, #1
|
||||
; CHECK-BE-NEXT: ubfx r2, r2, #14, #1
|
||||
; CHECK-BE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-BE-NEXT: bfi r1, r3, #6, #1
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: bfi r1, r2, #7, #1
|
||||
; CHECK-BE-NEXT: strb r1, [r0]
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <8 x i16> %a, zeroinitializer
|
||||
|
@ -144,14 +296,16 @@ define arm_aapcs_vfpcc void @store_v16i1(<16 x i1> *%dst, <16 x i8> %a) {
|
|||
; CHECK-LE-LABEL: store_v16i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vcmp.i8 eq, q0, zr
|
||||
; CHECK-LE-NEXT: vstr p0, [r0]
|
||||
; CHECK-LE-NEXT: vmrs r1, p0
|
||||
; CHECK-LE-NEXT: strh r1, [r0]
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: store_v16i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: vrev64.8 q1, q0
|
||||
; CHECK-BE-NEXT: vcmp.i8 eq, q1, zr
|
||||
; CHECK-BE-NEXT: vstr p0, [r0]
|
||||
; CHECK-BE-NEXT: vmrs r1, p0
|
||||
; CHECK-BE-NEXT: strh r1, [r0]
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <16 x i8> %a, zeroinitializer
|
||||
|
|
Loading…
Reference in New Issue