forked from OSchip/llvm-project
Revert "[AArch64] Custom lower <4 x i8> loads"
This reverts commit 51e434fc25
because of a
build bot failure in test-suite::GCC-C-execute-pr60960.test that I need to
investigate.
This commit is contained in:
parent
7bcb3bd169
commit
3a7cea2858
|
@ -1131,13 +1131,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::VSCALE, MVT::i32, Custom);
|
||||
|
||||
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
|
||||
}
|
||||
|
||||
if (Subtarget->hasSVE()) {
|
||||
|
@ -4483,40 +4476,6 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// Custom lowering for extending v4i8 vector loads.
|
||||
SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
|
||||
assert(LoadNode && "Expected custom lowering of a load node");
|
||||
EVT VT = Op->getValueType(0);
|
||||
assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32");
|
||||
|
||||
if (LoadNode->getMemoryVT() != MVT::v4i8)
|
||||
return SDValue();
|
||||
|
||||
unsigned ExtType;
|
||||
if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
|
||||
ExtType = ISD::SIGN_EXTEND;
|
||||
else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
|
||||
LoadNode->getExtensionType() == ISD::EXTLOAD)
|
||||
ExtType = ISD::ZERO_EXTEND;
|
||||
else
|
||||
return SDValue();
|
||||
|
||||
SDValue Load = DAG.getLoad(MVT::f32, DL, DAG.getEntryNode(),
|
||||
LoadNode->getBasePtr(), MachinePointerInfo());
|
||||
SDValue Chain = Load.getValue(1);
|
||||
SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
|
||||
SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
|
||||
SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
|
||||
Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
|
||||
DAG.getConstant(0, DL, MVT::i64));
|
||||
if (VT == MVT::v4i32)
|
||||
Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
|
||||
return DAG.getMergeValues({Ext, Chain}, DL);
|
||||
}
|
||||
|
||||
// Generate SUBS and CSEL for integer abs.
|
||||
SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
@ -4760,7 +4719,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
|||
case ISD::LOAD:
|
||||
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
|
||||
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
|
||||
return LowerLOAD(Op, DAG);
|
||||
llvm_unreachable("Unexpected request to lower ISD::LOAD");
|
||||
case ISD::ADD:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
|
||||
case ISD::AND:
|
||||
|
|
|
@ -851,7 +851,6 @@ private:
|
|||
SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
|
||||
SDValue ThisVal) const;
|
||||
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
|
|
|
@ -86,195 +86,27 @@ define <2 x i8> @test3(<2 x i8>* %v2i8_ptr) {
|
|||
define <4 x i8> @test4(<4 x i8>* %v4i8_ptr) {
|
||||
; CHECK-LE-LABEL: test4:
|
||||
; CHECK-LE: // %bb.0:
|
||||
; CHECK-LE-NEXT: ldr s0, [x0]
|
||||
; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-LE-NEXT: ld1 { v0.b }[0], [x0]
|
||||
; CHECK-LE-NEXT: add x8, x0, #1 // =1
|
||||
; CHECK-LE-NEXT: ld1 { v0.b }[2], [x8]
|
||||
; CHECK-LE-NEXT: add x8, x0, #2 // =2
|
||||
; CHECK-LE-NEXT: ld1 { v0.b }[4], [x8]
|
||||
; CHECK-LE-NEXT: add x8, x0, #3 // =3
|
||||
; CHECK-LE-NEXT: ld1 { v0.b }[6], [x8]
|
||||
; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-LE-NEXT: ret
|
||||
;
|
||||
; CHECK-BE-LABEL: test4:
|
||||
; CHECK-BE: // %bb.0:
|
||||
; CHECK-BE-NEXT: ldr s0, [x0]
|
||||
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
|
||||
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-BE-NEXT: ld1 { v0.b }[0], [x0]
|
||||
; CHECK-BE-NEXT: add x8, x0, #1 // =1
|
||||
; CHECK-BE-NEXT: ld1 { v0.b }[2], [x8]
|
||||
; CHECK-BE-NEXT: add x8, x0, #2 // =2
|
||||
; CHECK-BE-NEXT: ld1 { v0.b }[4], [x8]
|
||||
; CHECK-BE-NEXT: add x8, x0, #3 // =3
|
||||
; CHECK-BE-NEXT: ld1 { v0.b }[6], [x8]
|
||||
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
|
||||
; CHECK-BE-NEXT: ret
|
||||
%v4i8 = load <4 x i8>, <4 x i8>* %v4i8_ptr
|
||||
ret <4 x i8> %v4i8
|
||||
}
|
||||
|
||||
define <4 x i32> @fsext_v4i32(<4 x i8>* %a) {
|
||||
; CHECK-LE-LABEL: fsext_v4i32:
|
||||
; CHECK-LE: // %bb.0:
|
||||
; CHECK-LE-NEXT: ldr s0, [x0]
|
||||
; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0
|
||||
; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0
|
||||
; CHECK-LE-NEXT: ret
|
||||
;
|
||||
; CHECK-BE-LABEL: fsext_v4i32:
|
||||
; CHECK-BE: // %bb.0:
|
||||
; CHECK-BE-NEXT: ldr s0, [x0]
|
||||
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
|
||||
; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0
|
||||
; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0
|
||||
; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
|
||||
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-BE-NEXT: ret
|
||||
%x = load <4 x i8>, <4 x i8>* %a
|
||||
%y = sext <4 x i8> %x to <4 x i32>
|
||||
ret <4 x i32> %y
|
||||
}
|
||||
|
||||
define <4 x i32> @fzext_v4i32(<4 x i8>* %a) {
|
||||
; CHECK-LE-LABEL: fzext_v4i32:
|
||||
; CHECK-LE: // %bb.0:
|
||||
; CHECK-LE-NEXT: ldr s0, [x0]
|
||||
; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0
|
||||
; CHECK-LE-NEXT: ret
|
||||
;
|
||||
; CHECK-BE-LABEL: fzext_v4i32:
|
||||
; CHECK-BE: // %bb.0:
|
||||
; CHECK-BE-NEXT: ldr s0, [x0]
|
||||
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
|
||||
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
|
||||
; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
|
||||
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-BE-NEXT: ret
|
||||
%x = load <4 x i8>, <4 x i8>* %a
|
||||
%y = zext <4 x i8> %x to <4 x i32>
|
||||
ret <4 x i32> %y
|
||||
}
|
||||
|
||||
; TODO: This codegen could just be:
|
||||
; ldrb w0, [x0]
|
||||
;
|
||||
define i32 @loadExti32(<4 x i8>* %ref) {
|
||||
; CHECK-LE-LABEL: loadExti32:
|
||||
; CHECK-LE: // %bb.0:
|
||||
; CHECK-LE-NEXT: ldr s0, [x0]
|
||||
; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-LE-NEXT: umov w8, v0.h[0]
|
||||
; CHECK-LE-NEXT: and w0, w8, #0xff
|
||||
; CHECK-LE-NEXT: ret
|
||||
;
|
||||
; CHECK-BE-LABEL: loadExti32:
|
||||
; CHECK-BE: // %bb.0:
|
||||
; CHECK-BE-NEXT: ldr s0, [x0]
|
||||
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
|
||||
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-BE-NEXT: umov w8, v0.h[0]
|
||||
; CHECK-BE-NEXT: and w0, w8, #0xff
|
||||
; CHECK-BE-NEXT: ret
|
||||
%a = load <4 x i8>, <4 x i8>* %ref
|
||||
%vecext = extractelement <4 x i8> %a, i32 0
|
||||
%conv = zext i8 %vecext to i32
|
||||
ret i32 %conv
|
||||
}
|
||||
|
||||
define <4 x i16> @fsext_v4i16(<4 x i8>* %a) {
|
||||
; CHECK-LE-LABEL: fsext_v4i16:
|
||||
; CHECK-LE: // %bb.0:
|
||||
; CHECK-LE-NEXT: ldr s0, [x0]
|
||||
; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0
|
||||
; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-LE-NEXT: ret
|
||||
;
|
||||
; CHECK-BE-LABEL: fsext_v4i16:
|
||||
; CHECK-BE: // %bb.0:
|
||||
; CHECK-BE-NEXT: ldr s0, [x0]
|
||||
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
|
||||
; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0
|
||||
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
|
||||
; CHECK-BE-NEXT: ret
|
||||
%x = load <4 x i8>, <4 x i8>* %a
|
||||
%y = sext <4 x i8> %x to <4 x i16>
|
||||
ret <4 x i16> %y
|
||||
}
|
||||
|
||||
define <4 x i16> @fzext_v4i16(<4 x i8>* %a) {
|
||||
; CHECK-LE-LABEL: fzext_v4i16:
|
||||
; CHECK-LE: // %bb.0:
|
||||
; CHECK-LE-NEXT: ldr s0, [x0]
|
||||
; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-LE-NEXT: ret
|
||||
;
|
||||
; CHECK-BE-LABEL: fzext_v4i16:
|
||||
; CHECK-BE: // %bb.0:
|
||||
; CHECK-BE-NEXT: ldr s0, [x0]
|
||||
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
|
||||
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
|
||||
; CHECK-BE-NEXT: ret
|
||||
%x = load <4 x i8>, <4 x i8>* %a
|
||||
%y = zext <4 x i8> %x to <4 x i16>
|
||||
ret <4 x i16> %y
|
||||
}
|
||||
|
||||
define <4 x i16> @anyext_v4i16(<4 x i8> *%a, <4 x i8> *%b) {
|
||||
; CHECK-LE-LABEL: anyext_v4i16:
|
||||
; CHECK-LE: // %bb.0:
|
||||
; CHECK-LE-NEXT: ldr s0, [x0]
|
||||
; CHECK-LE-NEXT: ldr s1, [x1]
|
||||
; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0
|
||||
; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h
|
||||
; CHECK-LE-NEXT: shl v0.4h, v0.4h, #8
|
||||
; CHECK-LE-NEXT: sshr v0.4h, v0.4h, #8
|
||||
; CHECK-LE-NEXT: ret
|
||||
;
|
||||
; CHECK-BE-LABEL: anyext_v4i16:
|
||||
; CHECK-BE: // %bb.0:
|
||||
; CHECK-BE-NEXT: ldr s0, [x0]
|
||||
; CHECK-BE-NEXT: ldr s1, [x1]
|
||||
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
|
||||
; CHECK-BE-NEXT: rev32 v1.8b, v1.8b
|
||||
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0
|
||||
; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h
|
||||
; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8
|
||||
; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8
|
||||
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
|
||||
; CHECK-BE-NEXT: ret
|
||||
%x = load <4 x i8>, <4 x i8>* %a, align 4
|
||||
%y = load <4 x i8>, <4 x i8>* %b, align 4
|
||||
%z = add <4 x i8> %x, %y
|
||||
%s = sext <4 x i8> %z to <4 x i16>
|
||||
ret <4 x i16> %s
|
||||
}
|
||||
|
||||
define <4 x i32> @anyext_v4i32(<4 x i8> *%a, <4 x i8> *%b) {
|
||||
; CHECK-LE-LABEL: anyext_v4i32:
|
||||
; CHECK-LE: // %bb.0:
|
||||
; CHECK-LE-NEXT: ldr s0, [x0]
|
||||
; CHECK-LE-NEXT: ldr s1, [x1]
|
||||
; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0
|
||||
; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h
|
||||
; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0
|
||||
; CHECK-LE-NEXT: shl v0.4s, v0.4s, #24
|
||||
; CHECK-LE-NEXT: sshr v0.4s, v0.4s, #24
|
||||
; CHECK-LE-NEXT: ret
|
||||
;
|
||||
; CHECK-BE-LABEL: anyext_v4i32:
|
||||
; CHECK-BE: // %bb.0:
|
||||
; CHECK-BE-NEXT: ldr s0, [x0]
|
||||
; CHECK-BE-NEXT: ldr s1, [x1]
|
||||
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
|
||||
; CHECK-BE-NEXT: rev32 v1.8b, v1.8b
|
||||
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0
|
||||
; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h
|
||||
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
|
||||
; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24
|
||||
; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24
|
||||
; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
|
||||
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-BE-NEXT: ret
|
||||
%x = load <4 x i8>, <4 x i8>* %a, align 4
|
||||
%y = load <4 x i8>, <4 x i8>* %b, align 4
|
||||
%z = add <4 x i8> %x, %y
|
||||
%s = sext <4 x i8> %z to <4 x i32>
|
||||
ret <4 x i32> %s
|
||||
}
|
||||
|
|
|
@ -1494,12 +1494,17 @@ define <8 x i16> @neon.ushl8h_no_constant_shift(<8 x i8>* %A) nounwind {
|
|||
}
|
||||
|
||||
define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(<4 x i8>* %A) nounwind {
|
||||
; CHECK-LABEL: neon.ushl8h_constant_shift_extend_not_2x:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr s0, [x0]
|
||||
; CHECK-NEXT: ushll.8h v0, v0, #0
|
||||
; CHECK-NEXT: ushll.4s v0, v0, #1
|
||||
; CHECK-NEXT: ret
|
||||
;CHECK-LABEL: @neon.ushl8h_constant_shift_extend_not_2x
|
||||
;CHECK-NOT: ushll.8h v0,
|
||||
;CHECK: ldrb w8, [x0]
|
||||
;CHECK: fmov s0, w8
|
||||
;CHECK: ldrb w8, [x0, #1]
|
||||
;CHECK: mov.s v0[1], w8
|
||||
;CHECK: ldrb w8, [x0, #2]
|
||||
;CHECK: mov.s v0[2], w8
|
||||
;CHECK: ldrb w8, [x0, #3]
|
||||
;CHECK: mov.s v0[3], w8
|
||||
;CHECK: shl.4s v0, v0, #1
|
||||
%tmp1 = load <4 x i8>, <4 x i8>* %A
|
||||
%tmp2 = zext <4 x i8> %tmp1 to <4 x i32>
|
||||
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
|
||||
|
@ -1632,12 +1637,16 @@ define <8 x i16> @neon.sshll8h_constant_shift(<8 x i8>* %A) nounwind {
|
|||
}
|
||||
|
||||
define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(<4 x i8>* %A) nounwind {
|
||||
; CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr s0, [x0]
|
||||
; CHECK-NEXT: sshll.8h v0, v0, #0
|
||||
; CHECK-NEXT: sshll.4s v0, v0, #1
|
||||
; CHECK-NEXT: ret
|
||||
;CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift
|
||||
;CHECK: ldrsb w8, [x0]
|
||||
;CHECK-NEXT: fmov s0, w8
|
||||
;CHECK-NEXT: ldrsb w8, [x0, #1]
|
||||
;CHECK-NEXT: mov.s v0[1], w8
|
||||
;CHECK-NEXT: ldrsb w8, [x0, #2]
|
||||
;CHECK-NEXT: mov.s v0[2], w8
|
||||
;CHECK-NEXT: ldrsb w8, [x0, #3]
|
||||
;CHECK-NEXT: mov.s v0[3], w8
|
||||
;CHECK-NEXT: shl.4s v0, v0, #1
|
||||
%tmp1 = load <4 x i8>, <4 x i8>* %A
|
||||
%tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
|
||||
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
|
||||
|
|
|
@ -0,0 +1,145 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=LE
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=BE
|
||||
|
||||
define <4 x i32> @fsext_v4i32(<4 x i8>* %a) {
|
||||
; LE-LABEL: fsext_v4i32:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: ldrsb w8, [x0]
|
||||
; LE-NEXT: ldrsb w9, [x0, #1]
|
||||
; LE-NEXT: ldrsb w10, [x0, #2]
|
||||
; LE-NEXT: ldrsb w11, [x0, #3]
|
||||
; LE-NEXT: fmov s0, w8
|
||||
; LE-NEXT: mov v0.s[1], w9
|
||||
; LE-NEXT: mov v0.s[2], w10
|
||||
; LE-NEXT: mov v0.s[3], w11
|
||||
; LE-NEXT: ret
|
||||
;
|
||||
; BE-LABEL: fsext_v4i32:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: ldrsb w8, [x0]
|
||||
; BE-NEXT: ldrsb w9, [x0, #1]
|
||||
; BE-NEXT: ldrsb w10, [x0, #2]
|
||||
; BE-NEXT: ldrsb w11, [x0, #3]
|
||||
; BE-NEXT: fmov s0, w8
|
||||
; BE-NEXT: mov v0.s[1], w9
|
||||
; BE-NEXT: mov v0.s[2], w10
|
||||
; BE-NEXT: mov v0.s[3], w11
|
||||
; BE-NEXT: rev64 v0.4s, v0.4s
|
||||
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; BE-NEXT: ret
|
||||
%x = load <4 x i8>, <4 x i8>* %a
|
||||
%y = sext <4 x i8> %x to <4 x i32>
|
||||
ret <4 x i32> %y
|
||||
}
|
||||
|
||||
define <4 x i32> @fzext_v4i32(<4 x i8>* %a) {
|
||||
; LE-LABEL: fzext_v4i32:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: ldrb w8, [x0]
|
||||
; LE-NEXT: ldrb w9, [x0, #1]
|
||||
; LE-NEXT: ldrb w10, [x0, #2]
|
||||
; LE-NEXT: ldrb w11, [x0, #3]
|
||||
; LE-NEXT: fmov s0, w8
|
||||
; LE-NEXT: mov v0.s[1], w9
|
||||
; LE-NEXT: mov v0.s[2], w10
|
||||
; LE-NEXT: mov v0.s[3], w11
|
||||
; LE-NEXT: ret
|
||||
;
|
||||
; BE-LABEL: fzext_v4i32:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: ldrb w8, [x0]
|
||||
; BE-NEXT: ldrb w9, [x0, #1]
|
||||
; BE-NEXT: ldrb w10, [x0, #2]
|
||||
; BE-NEXT: ldrb w11, [x0, #3]
|
||||
; BE-NEXT: fmov s0, w8
|
||||
; BE-NEXT: mov v0.s[1], w9
|
||||
; BE-NEXT: mov v0.s[2], w10
|
||||
; BE-NEXT: mov v0.s[3], w11
|
||||
; BE-NEXT: rev64 v0.4s, v0.4s
|
||||
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; BE-NEXT: ret
|
||||
%x = load <4 x i8>, <4 x i8>* %a
|
||||
%y = zext <4 x i8> %x to <4 x i32>
|
||||
ret <4 x i32> %y
|
||||
}
|
||||
|
||||
define i32 @loadExt.i32(<4 x i8>* %ref) {
|
||||
; CHECK-LABEL: loadExt.i32:
|
||||
; CHECK: ldrb
|
||||
; LE-LABEL: loadExt.i32:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: ldrb w0, [x0]
|
||||
; LE-NEXT: ret
|
||||
;
|
||||
; BE-LABEL: loadExt.i32:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: ldrb w0, [x0]
|
||||
; BE-NEXT: ret
|
||||
%a = load <4 x i8>, <4 x i8>* %ref
|
||||
%vecext = extractelement <4 x i8> %a, i32 0
|
||||
%conv = zext i8 %vecext to i32
|
||||
ret i32 %conv
|
||||
}
|
||||
|
||||
define <4 x i16> @fsext_v4i16(<4 x i8>* %a) {
|
||||
; LE-LABEL: fsext_v4i16:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: ldrsb w8, [x0]
|
||||
; LE-NEXT: ldrsb w9, [x0, #1]
|
||||
; LE-NEXT: ldrsb w10, [x0, #2]
|
||||
; LE-NEXT: ldrsb w11, [x0, #3]
|
||||
; LE-NEXT: fmov s0, w8
|
||||
; LE-NEXT: mov v0.h[1], w9
|
||||
; LE-NEXT: mov v0.h[2], w10
|
||||
; LE-NEXT: mov v0.h[3], w11
|
||||
; LE-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; LE-NEXT: ret
|
||||
;
|
||||
; BE-LABEL: fsext_v4i16:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: ldrsb w8, [x0]
|
||||
; BE-NEXT: ldrsb w9, [x0, #1]
|
||||
; BE-NEXT: ldrsb w10, [x0, #2]
|
||||
; BE-NEXT: ldrsb w11, [x0, #3]
|
||||
; BE-NEXT: fmov s0, w8
|
||||
; BE-NEXT: mov v0.h[1], w9
|
||||
; BE-NEXT: mov v0.h[2], w10
|
||||
; BE-NEXT: mov v0.h[3], w11
|
||||
; BE-NEXT: rev64 v0.4h, v0.4h
|
||||
; BE-NEXT: ret
|
||||
%x = load <4 x i8>, <4 x i8>* %a
|
||||
%y = sext <4 x i8> %x to <4 x i16>
|
||||
ret <4 x i16> %y
|
||||
}
|
||||
|
||||
define <4 x i16> @fzext_v4i16(<4 x i8>* %a) {
|
||||
; LE-LABEL: fzext_v4i16:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: ldrb w8, [x0]
|
||||
; LE-NEXT: ldrb w9, [x0, #1]
|
||||
; LE-NEXT: ldrb w10, [x0, #2]
|
||||
; LE-NEXT: ldrb w11, [x0, #3]
|
||||
; LE-NEXT: fmov s0, w8
|
||||
; LE-NEXT: mov v0.h[1], w9
|
||||
; LE-NEXT: mov v0.h[2], w10
|
||||
; LE-NEXT: mov v0.h[3], w11
|
||||
; LE-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; LE-NEXT: ret
|
||||
;
|
||||
; BE-LABEL: fzext_v4i16:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: ldrb w8, [x0]
|
||||
; BE-NEXT: ldrb w9, [x0, #1]
|
||||
; BE-NEXT: ldrb w10, [x0, #2]
|
||||
; BE-NEXT: ldrb w11, [x0, #3]
|
||||
; BE-NEXT: fmov s0, w8
|
||||
; BE-NEXT: mov v0.h[1], w9
|
||||
; BE-NEXT: mov v0.h[2], w10
|
||||
; BE-NEXT: mov v0.h[3], w11
|
||||
; BE-NEXT: rev64 v0.4h, v0.4h
|
||||
; BE-NEXT: ret
|
||||
%x = load <4 x i8>, <4 x i8>* %a
|
||||
%y = zext <4 x i8> %x to <4 x i16>
|
||||
ret <4 x i16> %y
|
||||
}
|
|
@ -112,10 +112,22 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
|
|||
define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
|
||||
; CHECK-LABEL: v4i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr s0, [x0]
|
||||
; CHECK-NEXT: ldr s1, [x1]
|
||||
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
|
||||
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
|
||||
; CHECK-NEXT: ldrsb w8, [x0]
|
||||
; CHECK-NEXT: ldrsb w9, [x1]
|
||||
; CHECK-NEXT: ldrsb w10, [x0, #1]
|
||||
; CHECK-NEXT: ldrsb w11, [x1, #1]
|
||||
; CHECK-NEXT: fmov s0, w8
|
||||
; CHECK-NEXT: fmov s1, w9
|
||||
; CHECK-NEXT: ldrsb w8, [x0, #2]
|
||||
; CHECK-NEXT: ldrsb w9, [x1, #2]
|
||||
; CHECK-NEXT: mov v0.h[1], w10
|
||||
; CHECK-NEXT: mov v1.h[1], w11
|
||||
; CHECK-NEXT: ldrsb w10, [x0, #3]
|
||||
; CHECK-NEXT: ldrsb w11, [x1, #3]
|
||||
; CHECK-NEXT: mov v0.h[2], w8
|
||||
; CHECK-NEXT: mov v1.h[2], w9
|
||||
; CHECK-NEXT: mov v0.h[3], w10
|
||||
; CHECK-NEXT: mov v1.h[3], w11
|
||||
; CHECK-NEXT: shl v1.4h, v1.4h, #8
|
||||
; CHECK-NEXT: shl v0.4h, v0.4h, #8
|
||||
; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h
|
||||
|
|
|
@ -113,10 +113,22 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
|
|||
define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
|
||||
; CHECK-LABEL: v4i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr s0, [x0]
|
||||
; CHECK-NEXT: ldr s1, [x1]
|
||||
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
|
||||
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
|
||||
; CHECK-NEXT: ldrsb w8, [x0]
|
||||
; CHECK-NEXT: ldrsb w9, [x1]
|
||||
; CHECK-NEXT: ldrsb w10, [x0, #1]
|
||||
; CHECK-NEXT: ldrsb w11, [x1, #1]
|
||||
; CHECK-NEXT: fmov s0, w8
|
||||
; CHECK-NEXT: fmov s1, w9
|
||||
; CHECK-NEXT: ldrsb w8, [x0, #2]
|
||||
; CHECK-NEXT: ldrsb w9, [x1, #2]
|
||||
; CHECK-NEXT: mov v0.h[1], w10
|
||||
; CHECK-NEXT: mov v1.h[1], w11
|
||||
; CHECK-NEXT: ldrsb w10, [x0, #3]
|
||||
; CHECK-NEXT: ldrsb w11, [x1, #3]
|
||||
; CHECK-NEXT: mov v0.h[2], w8
|
||||
; CHECK-NEXT: mov v1.h[2], w9
|
||||
; CHECK-NEXT: mov v0.h[3], w10
|
||||
; CHECK-NEXT: mov v1.h[3], w11
|
||||
; CHECK-NEXT: shl v1.4h, v1.4h, #8
|
||||
; CHECK-NEXT: shl v0.4h, v0.4h, #8
|
||||
; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h
|
||||
|
|
|
@ -112,11 +112,23 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
|
|||
define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
|
||||
; CHECK-LABEL: v4i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr s0, [x0]
|
||||
; CHECK-NEXT: ldr s1, [x1]
|
||||
; CHECK-NEXT: ldrb w8, [x0]
|
||||
; CHECK-NEXT: ldrb w9, [x1]
|
||||
; CHECK-NEXT: ldrb w10, [x0, #1]
|
||||
; CHECK-NEXT: ldrb w11, [x1, #1]
|
||||
; CHECK-NEXT: ldrb w12, [x0, #2]
|
||||
; CHECK-NEXT: fmov s0, w8
|
||||
; CHECK-NEXT: ldrb w8, [x1, #2]
|
||||
; CHECK-NEXT: fmov s1, w9
|
||||
; CHECK-NEXT: mov v0.h[1], w10
|
||||
; CHECK-NEXT: ldrb w9, [x0, #3]
|
||||
; CHECK-NEXT: ldrb w10, [x1, #3]
|
||||
; CHECK-NEXT: mov v1.h[1], w11
|
||||
; CHECK-NEXT: mov v0.h[2], w12
|
||||
; CHECK-NEXT: mov v1.h[2], w8
|
||||
; CHECK-NEXT: mov v0.h[3], w9
|
||||
; CHECK-NEXT: mov v1.h[3], w10
|
||||
; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
|
||||
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
|
||||
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
|
||||
; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h
|
||||
; CHECK-NEXT: xtn v0.8b, v0.8h
|
||||
|
|
|
@ -113,10 +113,22 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
|
|||
define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
|
||||
; CHECK-LABEL: v4i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr s0, [x0]
|
||||
; CHECK-NEXT: ldr s1, [x1]
|
||||
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
|
||||
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
|
||||
; CHECK-NEXT: ldrb w8, [x0]
|
||||
; CHECK-NEXT: ldrb w9, [x1]
|
||||
; CHECK-NEXT: ldrb w10, [x0, #1]
|
||||
; CHECK-NEXT: ldrb w11, [x1, #1]
|
||||
; CHECK-NEXT: fmov s0, w8
|
||||
; CHECK-NEXT: fmov s1, w9
|
||||
; CHECK-NEXT: ldrb w8, [x0, #2]
|
||||
; CHECK-NEXT: ldrb w9, [x1, #2]
|
||||
; CHECK-NEXT: mov v0.h[1], w10
|
||||
; CHECK-NEXT: mov v1.h[1], w11
|
||||
; CHECK-NEXT: ldrb w10, [x0, #3]
|
||||
; CHECK-NEXT: ldrb w11, [x1, #3]
|
||||
; CHECK-NEXT: mov v0.h[2], w8
|
||||
; CHECK-NEXT: mov v1.h[2], w9
|
||||
; CHECK-NEXT: mov v0.h[3], w10
|
||||
; CHECK-NEXT: mov v1.h[3], w11
|
||||
; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h
|
||||
; CHECK-NEXT: xtn v0.8b, v0.8h
|
||||
; CHECK-NEXT: str s0, [x2]
|
||||
|
|
Loading…
Reference in New Issue