forked from OSchip/llvm-project
DAG: Avoid OOB when legalizing vector indexing
If a vector index is out of bounds, the result is supposed to be undefined but is not undefined behavior. Change the legalization for indexing the vector on the stack so that an out of bounds index does not create an out of bounds memory access. llvm-svn: 291604
This commit is contained in:
parent
7acb42a41a
commit
0b382a7cb8
|
@ -3118,6 +3118,13 @@ public:
|
|||
EVT DataVT, SelectionDAG &DAG,
|
||||
bool IsCompressedMemory) const;
|
||||
|
||||
/// Get a pointer to vector element \p Idx located in memory for a vector of
|
||||
/// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of
|
||||
/// bounds the returned pointer is unspecified, but will be within the vector
|
||||
/// bounds.
|
||||
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
|
||||
SDValue Idx) const;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Instruction Emitting Hooks
|
||||
//
|
||||
|
|
|
@ -330,8 +330,6 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
|
|||
// supported by the target.
|
||||
EVT VT = Tmp1.getValueType();
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
EVT IdxVT = Tmp3.getValueType();
|
||||
EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
|
||||
SDValue StackPtr = DAG.CreateStackTemporary(VT);
|
||||
|
||||
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
|
||||
|
@ -341,13 +339,8 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
|
|||
DAG.getEntryNode(), dl, Tmp1, StackPtr,
|
||||
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
|
||||
|
||||
// Truncate or zero extend offset to target pointer type.
|
||||
Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT);
|
||||
// Add the offset to the index.
|
||||
unsigned EltSize = EltVT.getSizeInBits()/8;
|
||||
Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,
|
||||
DAG.getConstant(EltSize, dl, IdxVT));
|
||||
SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
|
||||
SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);
|
||||
|
||||
// Store the scalar value.
|
||||
Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT);
|
||||
// Load the updated vector.
|
||||
|
@ -1209,20 +1202,16 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
|
|||
}
|
||||
}
|
||||
|
||||
EVT VecVT = Vec.getValueType();
|
||||
|
||||
if (!Ch.getNode()) {
|
||||
// Store the value to a temporary stack slot, then LOAD the returned part.
|
||||
StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
|
||||
StackPtr = DAG.CreateStackTemporary(VecVT);
|
||||
Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
|
||||
MachinePointerInfo());
|
||||
}
|
||||
|
||||
// Add the offset to the index.
|
||||
unsigned EltSize = Vec.getScalarValueSizeInBits() / 8;
|
||||
Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
|
||||
DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
|
||||
|
||||
Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
|
||||
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
|
||||
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
|
||||
|
||||
SDValue NewLoad;
|
||||
|
||||
|
@ -1232,7 +1221,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
|
|||
else
|
||||
NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
|
||||
MachinePointerInfo(),
|
||||
Vec.getValueType().getVectorElementType());
|
||||
VecVT.getVectorElementType());
|
||||
|
||||
// Replace the chain going out of the store, by the one out of the load.
|
||||
DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
|
||||
|
@ -1256,8 +1245,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
|
|||
SDLoc dl(Op);
|
||||
|
||||
// Store the value to a temporary stack slot, then LOAD the returned part.
|
||||
|
||||
SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
|
||||
EVT VecVT = Vec.getValueType();
|
||||
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
|
||||
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
|
||||
MachinePointerInfo PtrInfo =
|
||||
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
|
||||
|
@ -1266,16 +1255,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
|
|||
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
|
||||
|
||||
// Then store the inserted part.
|
||||
|
||||
// Add the offset to the index.
|
||||
unsigned EltSize = Vec.getScalarValueSizeInBits() / 8;
|
||||
|
||||
Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
|
||||
DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
|
||||
Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
|
||||
|
||||
SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
|
||||
StackPtr);
|
||||
SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
|
||||
|
||||
// Store the subvector.
|
||||
Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo());
|
||||
|
|
|
@ -1021,22 +1021,6 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair,
|
|||
DAG.getIntPtrConstant(1, dl));
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
|
||||
SDValue Index) {
|
||||
SDLoc dl(Index);
|
||||
// Make sure the index type is big enough to compute in.
|
||||
Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy(DAG.getDataLayout()));
|
||||
|
||||
// Calculate the element offset and add it to the pointer.
|
||||
unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
|
||||
assert(EltSize * 8 == EltVT.getSizeInBits() &&
|
||||
"Converting bits to bytes lost precision");
|
||||
|
||||
Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
|
||||
DAG.getConstant(EltSize, dl, Index.getValueType()));
|
||||
return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
|
||||
}
|
||||
|
||||
/// Build an integer with low bits Lo and high bits Hi.
|
||||
SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
|
||||
// Arbitrarily use dlHi for result SDLoc
|
||||
|
|
|
@ -173,7 +173,6 @@ private:
|
|||
/// input operand is returned.
|
||||
SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
|
||||
|
||||
SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
|
||||
SDValue JoinIntegers(SDValue Lo, SDValue Hi);
|
||||
SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
|
||||
|
||||
|
|
|
@ -846,7 +846,6 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
|
|||
GetSplitVector(Vec, Lo, Hi);
|
||||
|
||||
EVT VecVT = Vec.getValueType();
|
||||
EVT VecElemVT = VecVT.getVectorElementType();
|
||||
unsigned VecElems = VecVT.getVectorNumElements();
|
||||
unsigned SubElems = SubVec.getValueType().getVectorNumElements();
|
||||
|
||||
|
@ -872,7 +871,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
|
|||
DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
|
||||
|
||||
// Store the new subvector into the specified index.
|
||||
SDValue SubVecPtr = GetVectorElementPointer(StackPtr, VecElemVT, Idx);
|
||||
SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
|
||||
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
|
||||
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
|
||||
Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo());
|
||||
|
@ -1003,7 +1002,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
|
|||
|
||||
// Store the new element. This may be larger than the vector element type,
|
||||
// so use a truncating store.
|
||||
SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
|
||||
SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
|
||||
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
|
||||
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
|
||||
Store =
|
||||
|
@ -1650,7 +1649,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
|
|||
DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
|
||||
|
||||
// Load back the required element.
|
||||
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
|
||||
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
|
||||
return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
|
||||
MachinePointerInfo(), EltVT);
|
||||
}
|
||||
|
|
|
@ -3706,7 +3706,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
|
|||
return Result;
|
||||
}
|
||||
|
||||
SDValue
|
||||
SDValue
|
||||
TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
|
||||
const SDLoc &DL, EVT DataVT,
|
||||
SelectionDAG &DAG,
|
||||
|
@ -3738,6 +3738,49 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
|
|||
return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
|
||||
}
|
||||
|
||||
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
|
||||
SDValue Idx,
|
||||
EVT VecVT,
|
||||
const SDLoc &dl) {
|
||||
if (isa<ConstantSDNode>(Idx))
|
||||
return Idx;
|
||||
|
||||
EVT IdxVT = Idx.getValueType();
|
||||
unsigned NElts = VecVT.getVectorNumElements();
|
||||
if (isPowerOf2_32(NElts)) {
|
||||
APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
|
||||
Log2_32(NElts));
|
||||
return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
|
||||
DAG.getConstant(Imm, dl, IdxVT));
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
|
||||
DAG.getConstant(NElts - 1, dl, IdxVT));
|
||||
}
|
||||
|
||||
SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
|
||||
SDValue VecPtr, EVT VecVT,
|
||||
SDValue Index) const {
|
||||
SDLoc dl(Index);
|
||||
// Make sure the index type is big enough to compute in.
|
||||
Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout()));
|
||||
|
||||
EVT EltVT = VecVT.getVectorElementType();
|
||||
|
||||
// Calculate the element offset and add it to the pointer.
|
||||
unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
|
||||
assert(EltSize * 8 == EltVT.getSizeInBits() &&
|
||||
"Converting bits to bytes lost precision");
|
||||
|
||||
Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
|
||||
|
||||
EVT IdxVT = Index.getValueType();
|
||||
|
||||
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
|
||||
DAG.getConstant(EltSize, dl, IdxVT));
|
||||
return DAG.getNode(ISD::ADD, dl, IdxVT, Index, VecPtr);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Implementation of Emulated TLS Model
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -904,8 +904,9 @@ define <8 x i8> @getl(<16 x i8> %x) #0 {
|
|||
|
||||
; CHECK-LABEL: test_extracts_inserts_varidx_extract:
|
||||
; CHECK: str q0
|
||||
; CHECK: add x[[PTR:[0-9]+]], {{.*}}, w0, sxtw #1
|
||||
; CHECK-DAG: ld1 { v[[R:[0-9]+]].h }[0], [x[[PTR]]]
|
||||
; CHECK-DAG: and [[MASKED_IDX:x[0-9]+]], x0, #0x7
|
||||
; CHECK: bfi [[PTR:x[0-9]+]], [[MASKED_IDX]], #1, #3
|
||||
; CHECK-DAG: ld1 { v[[R:[0-9]+]].h }[0], {{\[}}[[PTR]]{{\]}}
|
||||
; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
|
||||
; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
|
||||
; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
|
||||
|
@ -922,7 +923,9 @@ define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: test_extracts_inserts_varidx_insert:
|
||||
; CHECK: str h0, [{{.*}}, w0, sxtw #1]
|
||||
; CHECK: and [[MASKED_IDX:x[0-9]+]], x0, #0x3
|
||||
; CHECK: bfi x9, [[MASKED_IDX]], #1, #2
|
||||
; CHECK: st1 { v0.h }[0], [x9]
|
||||
; CHECK-DAG: ldr d[[R:[0-9]+]]
|
||||
; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
|
||||
; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: _test:
|
||||
; CHECK: fmov.2d v0, #2.00000000
|
||||
; CHECK: str q0, [sp, #-16]!
|
||||
; CHECK: mov x8, sp
|
||||
; CHECK: ldr s0, [x8, w1, sxtw #2]
|
||||
; CHECK-DAG: fmov.2d v0, #2.00000000
|
||||
; CHECK-DAG: and [[MASK_IDX:x[0-9]+]], x1, #0x3
|
||||
; CHECK-DAG: mov x9, sp
|
||||
; CHECK-DAG: str q0, [sp], #16
|
||||
; CHECK-DAG: bfi [[PTR:x[0-9]+]], [[MASK_IDX]], #2, #2
|
||||
; CHECK: ldr s0, {{\[}}[[PTR]]{{\]}}
|
||||
; CHECK: str s0, [x0]
|
||||
|
||||
define void @test(float * %p1, i32 %v1) {
|
||||
|
@ -16,9 +18,11 @@ entry:
|
|||
|
||||
; CHECK-LABEL: _test2
|
||||
; CHECK: movi.16b v0, #63
|
||||
; CHECK: str q0, [sp, #-16]!
|
||||
; CHECK: mov x8, sp
|
||||
; CHECK: ldr s0, [x8, w1, sxtw #2]
|
||||
; CHECK-DAG: and [[MASK_IDX:x[0-9]+]], x1, #0x3
|
||||
; CHECK-DAG: str q0, [sp], #16
|
||||
; CHECK-DAG: mov x9, sp
|
||||
; CHECK-DAG: bfi [[PTR:x[0-9]+]], [[MASK_IDX]], #2, #2
|
||||
; CHECK: ldr s0, {{\[}}[[PTR]]{{\]}}
|
||||
; CHECK: str s0, [x0]
|
||||
|
||||
define void @test2(float * %p1, i32 %v1) {
|
||||
|
|
|
@ -207,11 +207,15 @@ define void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16>
|
|||
; GCN: buffer_load_ushort v{{[0-9]+}}, off
|
||||
; GCN: buffer_load_ushort v{{[0-9]+}}, off
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: s_and_b32 [[MASK_IDX:s[0-9]+]], s{{[0-9]+}}, 3{{$}}
|
||||
; GCN-DAG: v_or_b32_e32 [[IDX:v[0-9]+]], [[MASK_IDX]], [[BASE_FI]]{{$}}
|
||||
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
|
||||
; GCN: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
|
||||
; GCN: buffer_store_short v{{[0-9]+}}, [[IDX]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
|
||||
; GCN: s_waitcnt
|
||||
|
||||
|
|
|
@ -7,11 +7,14 @@
|
|||
;
|
||||
; CHECK-LABEL: {{^}}main:
|
||||
|
||||
; CHECK-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x200
|
||||
; CHECK-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
|
||||
; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
|
||||
; CHECK-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
|
||||
|
||||
; TODO: add 0?
|
||||
; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
|
||||
; CHECK-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]], [[ZERO]]
|
||||
; CHECK-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[CLAMP_IDX]], [[K]]
|
||||
|
||||
; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
|
||||
; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
|
||||
|
|
|
@ -825,7 +825,7 @@ define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
|
|||
; CHECK-ALL: strh
|
||||
; CHECK-ALL: mov
|
||||
; CHECK-ALL-DAG: ldrh
|
||||
; CHECK-ALL-DAG: add
|
||||
; CHECK-ALL-DAG: orr
|
||||
; CHECK-ALL: strh
|
||||
; CHECK-ALL: ldrh
|
||||
; CHECK-ALL: strh
|
||||
|
@ -855,7 +855,7 @@ define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
|
|||
; CHECK-VFP: orr
|
||||
; CHECK-VFP: str
|
||||
; CHECK-VFP: mov
|
||||
; CHECK-VFP: add
|
||||
; CHECK-VFP: orr
|
||||
; CHECK-VFP: ldrh
|
||||
; CHECK-VFP: strh
|
||||
; CHECK-VFP: add sp, sp, #8
|
||||
|
|
|
@ -373,7 +373,8 @@ define <8 x i8> @check_i8_varidx(<16 x i8> %v, i32 %idx) {
|
|||
; CHECK: mov r[[FP:[0-9]+]], sp
|
||||
; CHECK: ldr r[[IDX:[0-9]+]], [r[[FP]], #4]
|
||||
; CHECK: mov r[[SPCOPY:[0-9]+]], sp
|
||||
; CHECK: vst1.64 {d{{.*}}, d{{.*}}}, [r[[SPCOPY]]:128], r[[IDX]]
|
||||
; CHECK: and r[[MASKED_IDX:[0-9]+]], r[[IDX]], #15
|
||||
; CHECK: vst1.64 {d{{.*}}, d{{.*}}}, [r[[SPCOPY]]:128], r[[MASKED_IDX]]
|
||||
; CHECK: vld1.8 {d{{.*}}[]}, [r[[SPCOPY]]]
|
||||
%x = extractelement <16 x i8> %v, i32 %idx
|
||||
%1 = insertelement <8 x i8> undef, i8 %x, i32 0
|
||||
|
|
|
@ -14,6 +14,7 @@ define i1 @via_stack_bug(i8 signext %idx) {
|
|||
; ALL-DAG: addiu [[ONE:\$[0-9]+]], $zero, 1
|
||||
; ALL-DAG: sb [[ONE]], 7($sp)
|
||||
; ALL-DAG: sb $zero, 6($sp)
|
||||
; ALL-DAG: andi [[MASKED_IDX:\$[0-9]+]], $4, 1
|
||||
; ALL-DAG: addiu [[VPTR:\$[0-9]+]], $sp, 6
|
||||
; ALL-DAG: addu [[EPTR:\$[0-9]+]], $4, [[VPTR]]
|
||||
; ALL-DAG: or [[EPTR:\$[0-9]+]], [[MASKED_IDX]], [[VPTR]]
|
||||
; ALL: lbu $2, 0([[EPTR]])
|
||||
|
|
|
@ -23,7 +23,7 @@ entry:
|
|||
; CHECK: mfvsrd [[TOGPR:[0-9]+]],
|
||||
; CHECK: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]]
|
||||
; CHECK: extsw 3, [[RSHREG]]
|
||||
; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2
|
||||
; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29
|
||||
; CHECK-P7-DAG: stxvw4x 34,
|
||||
; CHECK-P7: lwax 3, [[ELEMOFFREG]],
|
||||
; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 2
|
||||
|
@ -52,7 +52,7 @@ entry:
|
|||
; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
|
||||
; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]]
|
||||
; CHECK: mfvsrd 3,
|
||||
; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 3
|
||||
; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 3, 28, 28
|
||||
; CHECK-P7-DAG: stxvd2x 34,
|
||||
; CHECK-P7: ldx 3, [[ELEMOFFREG]],
|
||||
; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
|
||||
|
@ -75,7 +75,7 @@ entry:
|
|||
; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[TRUNCREG]]
|
||||
; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
|
||||
; CHECK: xscvspdpn 1,
|
||||
; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2
|
||||
; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29
|
||||
; CHECK-P7-DAG: stxvw4x 34,
|
||||
; CHECK-P7: lfsx 1, [[ELEMOFFREG]],
|
||||
; CHECK-BE: sldi [[ELNOREG:[0-9]+]], 5, 2
|
||||
|
|
|
@ -404,6 +404,7 @@ define i64 @extractelement_v4i64_3(<4 x i64> %a, i256 %i) nounwind {
|
|||
define i8 @extractelement_v16i8_var(<16 x i8> %a, i256 %i) nounwind {
|
||||
; SSE-LABEL: extractelement_v16i8_var:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: andl $15, %edi
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movb (%rdi,%rax), %al
|
||||
|
@ -411,6 +412,7 @@ define i8 @extractelement_v16i8_var(<16 x i8> %a, i256 %i) nounwind {
|
|||
;
|
||||
; AVX-LABEL: extractelement_v16i8_var:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: andl $15, %edi
|
||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX-NEXT: movb (%rdi,%rax), %al
|
||||
|
@ -426,6 +428,7 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind {
|
|||
; SSE-NEXT: movq %rsp, %rbp
|
||||
; SSE-NEXT: andq $-32, %rsp
|
||||
; SSE-NEXT: subq $64, %rsp
|
||||
; SSE-NEXT: andl $31, %edi
|
||||
; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movaps %xmm0, (%rsp)
|
||||
; SSE-NEXT: movq %rsp, %rax
|
||||
|
@ -440,6 +443,7 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind {
|
|||
; AVX-NEXT: movq %rsp, %rbp
|
||||
; AVX-NEXT: andq $-32, %rsp
|
||||
; AVX-NEXT: subq $64, %rsp
|
||||
; AVX-NEXT: andl $31, %edi
|
||||
; AVX-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX-NEXT: movq %rsp, %rax
|
||||
; AVX-NEXT: movb (%rdi,%rax), %al
|
||||
|
@ -454,12 +458,14 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind {
|
|||
define i16 @extractelement_v8i16_var(<8 x i16> %a, i256 %i) nounwind {
|
||||
; SSE-LABEL: extractelement_v8i16_var:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: andl $7, %edi
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movzwl -24(%rsp,%rdi,2), %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extractelement_v8i16_var:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: andl $7, %edi
|
||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movzwl -24(%rsp,%rdi,2), %eax
|
||||
; AVX-NEXT: retq
|
||||
|
@ -474,6 +480,7 @@ define i16 @extractelement_v16i16_var(<16 x i16> %a, i256 %i) nounwind {
|
|||
; SSE-NEXT: movq %rsp, %rbp
|
||||
; SSE-NEXT: andq $-32, %rsp
|
||||
; SSE-NEXT: subq $64, %rsp
|
||||
; SSE-NEXT: andl $15, %edi
|
||||
; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movaps %xmm0, (%rsp)
|
||||
; SSE-NEXT: movzwl (%rsp,%rdi,2), %eax
|
||||
|
@ -487,6 +494,7 @@ define i16 @extractelement_v16i16_var(<16 x i16> %a, i256 %i) nounwind {
|
|||
; AVX-NEXT: movq %rsp, %rbp
|
||||
; AVX-NEXT: andq $-32, %rsp
|
||||
; AVX-NEXT: subq $64, %rsp
|
||||
; AVX-NEXT: andl $15, %edi
|
||||
; AVX-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX-NEXT: movzwl (%rsp,%rdi,2), %eax
|
||||
; AVX-NEXT: movq %rbp, %rsp
|
||||
|
@ -500,12 +508,14 @@ define i16 @extractelement_v16i16_var(<16 x i16> %a, i256 %i) nounwind {
|
|||
define i32 @extractelement_v4i32_var(<4 x i32> %a, i256 %i) nounwind {
|
||||
; SSE-LABEL: extractelement_v4i32_var:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: andl $3, %edi
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movl -24(%rsp,%rdi,4), %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extractelement_v4i32_var:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: andl $3, %edi
|
||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movl -24(%rsp,%rdi,4), %eax
|
||||
; AVX-NEXT: retq
|
||||
|
@ -520,6 +530,7 @@ define i32 @extractelement_v8i32_var(<8 x i32> %a, i256 %i) nounwind {
|
|||
; SSE-NEXT: movq %rsp, %rbp
|
||||
; SSE-NEXT: andq $-32, %rsp
|
||||
; SSE-NEXT: subq $64, %rsp
|
||||
; SSE-NEXT: andl $7, %edi
|
||||
; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movaps %xmm0, (%rsp)
|
||||
; SSE-NEXT: movl (%rsp,%rdi,4), %eax
|
||||
|
@ -533,6 +544,7 @@ define i32 @extractelement_v8i32_var(<8 x i32> %a, i256 %i) nounwind {
|
|||
; AVX1-NEXT: movq %rsp, %rbp
|
||||
; AVX1-NEXT: andq $-32, %rsp
|
||||
; AVX1-NEXT: subq $64, %rsp
|
||||
; AVX1-NEXT: andl $7, %edi
|
||||
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX1-NEXT: movl (%rsp,%rdi,4), %eax
|
||||
; AVX1-NEXT: movq %rbp, %rsp
|
||||
|
@ -554,12 +566,14 @@ define i32 @extractelement_v8i32_var(<8 x i32> %a, i256 %i) nounwind {
|
|||
define i64 @extractelement_v2i64_var(<2 x i64> %a, i256 %i) nounwind {
|
||||
; SSE-LABEL: extractelement_v2i64_var:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: andl $1, %edi
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq -24(%rsp,%rdi,8), %rax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extractelement_v2i64_var:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: andl $1, %edi
|
||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: movq -24(%rsp,%rdi,8), %rax
|
||||
; AVX-NEXT: retq
|
||||
|
@ -574,6 +588,7 @@ define i64 @extractelement_v4i64_var(<4 x i64> %a, i256 %i) nounwind {
|
|||
; SSE-NEXT: movq %rsp, %rbp
|
||||
; SSE-NEXT: andq $-32, %rsp
|
||||
; SSE-NEXT: subq $64, %rsp
|
||||
; SSE-NEXT: andl $3, %edi
|
||||
; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movaps %xmm0, (%rsp)
|
||||
; SSE-NEXT: movq (%rsp,%rdi,8), %rax
|
||||
|
@ -587,6 +602,7 @@ define i64 @extractelement_v4i64_var(<4 x i64> %a, i256 %i) nounwind {
|
|||
; AVX-NEXT: movq %rsp, %rbp
|
||||
; AVX-NEXT: andq $-32, %rsp
|
||||
; AVX-NEXT: subq $64, %rsp
|
||||
; AVX-NEXT: andl $3, %edi
|
||||
; AVX-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX-NEXT: movq (%rsp,%rdi,8), %rax
|
||||
; AVX-NEXT: movq %rbp, %rsp
|
||||
|
|
|
@ -16,11 +16,11 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
|
|||
; CHECK-NEXT: movl 20(%esp), %edx
|
||||
; CHECK-NEXT: paddd (%edx), %xmm0
|
||||
; CHECK-NEXT: movdqa %xmm0, (%edx)
|
||||
; CHECK-NEXT: shll $4, %ecx
|
||||
; CHECK-NEXT: movl (%ecx,%edx), %esi
|
||||
; CHECK-NEXT: movl 12(%ecx,%edx), %edi
|
||||
; CHECK-NEXT: movl 8(%ecx,%edx), %ebx
|
||||
; CHECK-NEXT: movl 4(%ecx,%edx), %edx
|
||||
; CHECK-NEXT: movl (%edx), %esi
|
||||
; CHECK-NEXT: movl 12(%edx), %edi
|
||||
; CHECK-NEXT: movl 8(%edx), %ebx
|
||||
; CHECK-NEXT: movl 4(%edx), %edx
|
||||
; CHECK-NEXT: shll $4, %ecx
|
||||
; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
|
||||
; CHECK-NEXT: movl %edx, (%eax,%ecx)
|
||||
; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
|
||||
|
|
|
@ -68,9 +68,10 @@ define void @store_i64_from_vector256(<16 x i16> %x, <16 x i16> %y, i64* %i) {
|
|||
|
||||
define void @PR23476(<5 x i64> %in, i64* %out, i32 %index) {
|
||||
; X32-LABEL: PR23476:
|
||||
; X32: andl $7, %eax
|
||||
; X32: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: movsd %xmm0, (%eax)
|
||||
; X32-NEXT: movsd %xmm0, (%ecx)
|
||||
%ext = extractelement <5 x i64> %in, i32 %index
|
||||
store i64 %ext, i64* %out, align 8
|
||||
ret void
|
||||
|
|
|
@ -12,6 +12,7 @@ define i32 @t0(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
|
|||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-16, %esp
|
||||
; X32-NEXT: subl $32, %esp
|
||||
; X32-NEXT: andl $3, %eax
|
||||
; X32-NEXT: movaps %xmm0, (%esp)
|
||||
; X32-NEXT: movl $76, (%esp,%eax,4)
|
||||
; X32-NEXT: movl (%esp), %eax
|
||||
|
@ -21,9 +22,10 @@ define i32 @t0(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
|
|||
;
|
||||
; X64-LABEL: t0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movslq %edi, %rax
|
||||
; X64-NEXT: movl $76, -24(%rsp,%rax,4)
|
||||
; X64-NEXT: andl $3, %edi
|
||||
; X64-NEXT: movl $76, -24(%rsp,%rdi,4)
|
||||
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: retq
|
||||
%t13 = insertelement <4 x i32> %t8, i32 76, i32 %t7
|
||||
|
@ -38,6 +40,7 @@ define i32 @t1(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
|
|||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-16, %esp
|
||||
; X32-NEXT: subl $32, %esp
|
||||
; X32-NEXT: andl $3, %eax
|
||||
; X32-NEXT: movl $76, %ecx
|
||||
; X32-NEXT: pinsrd $0, %ecx, %xmm0
|
||||
; X32-NEXT: movdqa %xmm0, (%esp)
|
||||
|
@ -48,11 +51,12 @@ define i32 @t1(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
|
|||
;
|
||||
; X64-LABEL: t1:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; X64-NEXT: movl $76, %eax
|
||||
; X64-NEXT: pinsrd $0, %eax, %xmm0
|
||||
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movslq %edi, %rax
|
||||
; X64-NEXT: movl -24(%rsp,%rax,4), %eax
|
||||
; X64-NEXT: andl $3, %edi
|
||||
; X64-NEXT: movl -24(%rsp,%rdi,4), %eax
|
||||
; X64-NEXT: retq
|
||||
%t13 = insertelement <4 x i32> %t8, i32 76, i32 0
|
||||
%t9 = extractelement <4 x i32> %t13, i32 %t7
|
||||
|
@ -66,6 +70,7 @@ define <4 x i32> @t2(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
|
|||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-16, %esp
|
||||
; X32-NEXT: subl $32, %esp
|
||||
; X32-NEXT: andl $3, %eax
|
||||
; X32-NEXT: movdqa %xmm0, (%esp)
|
||||
; X32-NEXT: pinsrd $0, (%esp,%eax,4), %xmm0
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
|
@ -74,9 +79,10 @@ define <4 x i32> @t2(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
|
|||
;
|
||||
; X64-LABEL: t2:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movslq %edi, %rax
|
||||
; X64-NEXT: pinsrd $0, -24(%rsp,%rax,4), %xmm0
|
||||
; X64-NEXT: andl $3, %edi
|
||||
; X64-NEXT: pinsrd $0, -24(%rsp,%rdi,4), %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t9 = extractelement <4 x i32> %t8, i32 %t7
|
||||
%t13 = insertelement <4 x i32> %t8, i32 %t9, i32 0
|
||||
|
@ -90,6 +96,7 @@ define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
|
|||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-16, %esp
|
||||
; X32-NEXT: subl $32, %esp
|
||||
; X32-NEXT: andl $3, %eax
|
||||
; X32-NEXT: movaps %xmm0, (%esp)
|
||||
; X32-NEXT: movss %xmm0, (%esp,%eax,4)
|
||||
; X32-NEXT: movaps (%esp), %xmm0
|
||||
|
@ -99,9 +106,10 @@ define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
|
|||
;
|
||||
; X64-LABEL: t3:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movslq %edi, %rax
|
||||
; X64-NEXT: movss %xmm0, -24(%rsp,%rax,4)
|
||||
; X64-NEXT: andl $3, %edi
|
||||
; X64-NEXT: movss %xmm0, -24(%rsp,%rdi,4)
|
||||
; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t9 = extractelement <4 x i32> %t8, i32 0
|
||||
|
|
|
@ -10,6 +10,7 @@ define <8 x float> @f(<8 x float> %a, i32 %b) nounwind {
|
|||
; X32-NEXT: andl $-32, %esp
|
||||
; X32-NEXT: subl $64, %esp
|
||||
; X32-NEXT: movl 8(%ebp), %eax
|
||||
; X32-NEXT: andl $7, %eax
|
||||
; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movaps %xmm0, (%esp)
|
||||
; X32-NEXT: movl $1084227584, (%esp,%eax,4) ## imm = 0x40A00000
|
||||
|
@ -25,10 +26,11 @@ define <8 x float> @f(<8 x float> %a, i32 %b) nounwind {
|
|||
; X64-NEXT: movq %rsp, %rbp
|
||||
; X64-NEXT: andq $-32, %rsp
|
||||
; X64-NEXT: subq $64, %rsp
|
||||
; X64-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; X64-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movaps %xmm0, (%rsp)
|
||||
; X64-NEXT: movslq %edi, %rax
|
||||
; X64-NEXT: movl $1084227584, (%rsp,%rax,4) ## imm = 0x40A00000
|
||||
; X64-NEXT: andl $7, %edi
|
||||
; X64-NEXT: movl $1084227584, (%rsp,%rdi,4) ## imm = 0x40A00000
|
||||
; X64-NEXT: movaps (%rsp), %xmm0
|
||||
; X64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
|
||||
; X64-NEXT: movq %rbp, %rsp
|
||||
|
|
|
@ -11,10 +11,11 @@ define <4 x i32> @var_insert(<4 x i32> %x, i32 %val, i32 %idx) nounwind {
|
|||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-16, %esp
|
||||
; X32-NEXT: subl $32, %esp
|
||||
; X32-NEXT: movl 8(%ebp), %eax
|
||||
; X32-NEXT: movl 12(%ebp), %ecx
|
||||
; X32-NEXT: movl 12(%ebp), %eax
|
||||
; X32-NEXT: andl $3, %eax
|
||||
; X32-NEXT: movl 8(%ebp), %ecx
|
||||
; X32-NEXT: movaps %xmm0, (%esp)
|
||||
; X32-NEXT: movl %eax, (%esp,%ecx,4)
|
||||
; X32-NEXT: movl %ecx, (%esp,%eax,4)
|
||||
; X32-NEXT: movaps (%esp), %xmm0
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
|
@ -22,9 +23,10 @@ define <4 x i32> @var_insert(<4 x i32> %x, i32 %val, i32 %idx) nounwind {
|
|||
;
|
||||
; X64-LABEL: var_insert:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
|
||||
; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movslq %esi, %rax
|
||||
; X64-NEXT: movl %edi, -24(%rsp,%rax,4)
|
||||
; X64-NEXT: andl $3, %esi
|
||||
; X64-NEXT: movl %edi, -24(%rsp,%rsi,4)
|
||||
; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -40,6 +42,7 @@ define i32 @var_extract(<4 x i32> %x, i32 %idx) nounwind {
|
|||
; X32-NEXT: andl $-16, %esp
|
||||
; X32-NEXT: subl $32, %esp
|
||||
; X32-NEXT: movl 8(%ebp), %eax
|
||||
; X32-NEXT: andl $3, %eax
|
||||
; X32-NEXT: movaps %xmm0, (%esp)
|
||||
; X32-NEXT: movl (%esp,%eax,4), %eax
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
|
@ -48,9 +51,10 @@ define i32 @var_extract(<4 x i32> %x, i32 %idx) nounwind {
|
|||
;
|
||||
; X64-LABEL: var_extract:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movslq %edi, %rax
|
||||
; X64-NEXT: movl -24(%rsp,%rax,4), %eax
|
||||
; X64-NEXT: andl $3, %edi
|
||||
; X64-NEXT: movl -24(%rsp,%rdi,4), %eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%tmp3 = extractelement <4 x i32> %x, i32 %idx
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -13,6 +13,10 @@ define <4 x double> @var_shuffle_v4f64_v4f64_xxxx_i64(<4 x double> %x, i64 %i0,
|
|||
; ALL-NEXT: movq %rsp, %rbp
|
||||
; ALL-NEXT: andq $-32, %rsp
|
||||
; ALL-NEXT: subq $64, %rsp
|
||||
; ALL-NEXT: andl $3, %ecx
|
||||
; ALL-NEXT: andl $3, %edx
|
||||
; ALL-NEXT: andl $3, %esi
|
||||
; ALL-NEXT: andl $3, %edi
|
||||
; ALL-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
|
@ -40,6 +44,8 @@ define <4 x double> @var_shuffle_v4f64_v4f64_uxx0_i64(<4 x double> %x, i64 %i0,
|
|||
; ALL-NEXT: movq %rsp, %rbp
|
||||
; ALL-NEXT: andq $-32, %rsp
|
||||
; ALL-NEXT: subq $64, %rsp
|
||||
; ALL-NEXT: andl $3, %edx
|
||||
; ALL-NEXT: andl $3, %esi
|
||||
; ALL-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; ALL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
|
@ -62,6 +68,10 @@ define <4 x double> @var_shuffle_v4f64_v4f64_uxx0_i64(<4 x double> %x, i64 %i0,
|
|||
define <4 x double> @var_shuffle_v4f64_v2f64_xxxx_i64(<2 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
|
||||
; ALL-LABEL: var_shuffle_v4f64_v2f64_xxxx_i64:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: andl $1, %ecx
|
||||
; ALL-NEXT: andl $1, %edx
|
||||
; ALL-NEXT: andl $1, %esi
|
||||
; ALL-NEXT: andl $1, %edi
|
||||
; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
|
@ -87,6 +97,10 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i
|
|||
; AVX1-NEXT: movq %rsp, %rbp
|
||||
; AVX1-NEXT: andq $-32, %rsp
|
||||
; AVX1-NEXT: subq $64, %rsp
|
||||
; AVX1-NEXT: andl $3, %ecx
|
||||
; AVX1-NEXT: andl $3, %edx
|
||||
; AVX1-NEXT: andl $3, %esi
|
||||
; AVX1-NEXT: andl $3, %edi
|
||||
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
|
@ -105,6 +119,10 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i
|
|||
; AVX2-NEXT: movq %rsp, %rbp
|
||||
; AVX2-NEXT: andq $-32, %rsp
|
||||
; AVX2-NEXT: subq $64, %rsp
|
||||
; AVX2-NEXT: andl $3, %ecx
|
||||
; AVX2-NEXT: andl $3, %edx
|
||||
; AVX2-NEXT: andl $3, %esi
|
||||
; AVX2-NEXT: andl $3, %edi
|
||||
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
|
@ -134,6 +152,8 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i
|
|||
; AVX1-NEXT: movq %rsp, %rbp
|
||||
; AVX1-NEXT: andq $-32, %rsp
|
||||
; AVX1-NEXT: subq $64, %rsp
|
||||
; AVX1-NEXT: andl $3, %esi
|
||||
; AVX1-NEXT: andl $3, %edi
|
||||
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
|
@ -150,6 +170,8 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i
|
|||
; AVX2-NEXT: movq %rsp, %rbp
|
||||
; AVX2-NEXT: andq $-32, %rsp
|
||||
; AVX2-NEXT: subq $64, %rsp
|
||||
; AVX2-NEXT: andl $3, %esi
|
||||
; AVX2-NEXT: andl $3, %edi
|
||||
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
|
@ -173,6 +195,10 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i
|
|||
define <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
|
||||
; AVX1-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: andl $1, %ecx
|
||||
; AVX1-NEXT: andl $1, %edx
|
||||
; AVX1-NEXT: andl $1, %esi
|
||||
; AVX1-NEXT: andl $1, %edi
|
||||
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
|
@ -185,6 +211,10 @@ define <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i
|
|||
;
|
||||
; AVX2-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: andl $1, %esi
|
||||
; AVX2-NEXT: andl $1, %edi
|
||||
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
|
@ -212,15 +242,23 @@ define <8 x float> @var_shuffle_v8f32_v8f32_xxxxxxxx_i32(<8 x float> %x, i32 %i0
|
|||
; AVX1-NEXT: movq %rsp, %rbp
|
||||
; AVX1-NEXT: andq $-32, %rsp
|
||||
; AVX1-NEXT: subq $64, %rsp
|
||||
; AVX1-NEXT: movslq %edi, %rax
|
||||
; AVX1-NEXT: movslq %esi, %rsi
|
||||
; AVX1-NEXT: movslq %edx, %rdx
|
||||
; AVX1-NEXT: movslq %ecx, %r11
|
||||
; AVX1-NEXT: movslq %r8d, %r10
|
||||
; AVX1-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
|
||||
; AVX1-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
|
||||
; AVX1-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
||||
; AVX1-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
|
||||
; AVX1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
|
||||
; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; AVX1-NEXT: andl $7, %edi
|
||||
; AVX1-NEXT: andl $7, %esi
|
||||
; AVX1-NEXT: andl $7, %edx
|
||||
; AVX1-NEXT: andl $7, %ecx
|
||||
; AVX1-NEXT: andl $7, %r8d
|
||||
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX1-NEXT: movslq %r9d, %r8
|
||||
; AVX1-NEXT: movslq 16(%rbp), %rdi
|
||||
; AVX1-NEXT: movslq 24(%rbp), %rcx
|
||||
; AVX1-NEXT: andl $7, %r9d
|
||||
; AVX1-NEXT: movl 16(%rbp), %r10d
|
||||
; AVX1-NEXT: andl $7, %r10d
|
||||
; AVX1-NEXT: movl 24(%rbp), %eax
|
||||
; AVX1-NEXT: andl $7, %eax
|
||||
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
|
@ -284,15 +322,23 @@ define <8 x float> @var_shuffle_v8f32_v8f32_xxxxxxxx_i32(<8 x float> %x, i32 %i0
|
|||
define <8 x float> @var_shuffle_v8f32_v4f32_xxxxxxxx_i32(<4 x float> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7) nounwind {
|
||||
; ALL-LABEL: var_shuffle_v8f32_v4f32_xxxxxxxx_i32:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: movslq %edi, %rax
|
||||
; ALL-NEXT: movslq %esi, %rsi
|
||||
; ALL-NEXT: movslq %edx, %rdx
|
||||
; ALL-NEXT: movslq %ecx, %r11
|
||||
; ALL-NEXT: movslq %r8d, %r10
|
||||
; ALL-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
|
||||
; ALL-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
|
||||
; ALL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
||||
; ALL-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
|
||||
; ALL-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
|
||||
; ALL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; ALL-NEXT: andl $3, %edi
|
||||
; ALL-NEXT: andl $3, %esi
|
||||
; ALL-NEXT: andl $3, %edx
|
||||
; ALL-NEXT: andl $3, %ecx
|
||||
; ALL-NEXT: andl $3, %r8d
|
||||
; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; ALL-NEXT: movslq %r9d, %r8
|
||||
; ALL-NEXT: movslq {{[0-9]+}}(%rsp), %rdi
|
||||
; ALL-NEXT: movslq {{[0-9]+}}(%rsp), %rcx
|
||||
; ALL-NEXT: andl $3, %r9d
|
||||
; ALL-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; ALL-NEXT: andl $3, %r10d
|
||||
; ALL-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; ALL-NEXT: andl $3, %eax
|
||||
; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; ALL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
|
@ -331,48 +377,64 @@ define <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x,
|
|||
; AVX1-NEXT: movq %rsp, %rbp
|
||||
; AVX1-NEXT: andq $-32, %rsp
|
||||
; AVX1-NEXT: subq $64, %rsp
|
||||
; AVX1-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
|
||||
; AVX1-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
|
||||
; AVX1-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
||||
; AVX1-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
|
||||
; AVX1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
|
||||
; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX1-NEXT: movslq 32(%rbp), %rax
|
||||
; AVX1-NEXT: movl 32(%rbp), %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: movslq 40(%rbp), %rax
|
||||
; AVX1-NEXT: movl 40(%rbp), %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq 48(%rbp), %rax
|
||||
; AVX1-NEXT: movl 48(%rbp), %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq 56(%rbp), %rax
|
||||
; AVX1-NEXT: movl 56(%rbp), %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq 64(%rbp), %rax
|
||||
; AVX1-NEXT: movl 64(%rbp), %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq 72(%rbp), %rax
|
||||
; AVX1-NEXT: movl 72(%rbp), %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq 80(%rbp), %rax
|
||||
; AVX1-NEXT: movl 80(%rbp), %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq 88(%rbp), %rax
|
||||
; AVX1-NEXT: movl 88(%rbp), %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq %edi, %rax
|
||||
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: andl $15, %edi
|
||||
; AVX1-NEXT: movzwl (%rsp,%rdi,2), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: movslq %esi, %rax
|
||||
; AVX1-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq %edx, %rax
|
||||
; AVX1-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq %ecx, %rax
|
||||
; AVX1-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq %r8d, %rax
|
||||
; AVX1-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq %r9d, %rax
|
||||
; AVX1-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq 16(%rbp), %rax
|
||||
; AVX1-NEXT: andl $15, %esi
|
||||
; AVX1-NEXT: vpinsrw $1, (%rsp,%rsi,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: andl $15, %edx
|
||||
; AVX1-NEXT: vpinsrw $2, (%rsp,%rdx,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: andl $15, %ecx
|
||||
; AVX1-NEXT: vpinsrw $3, (%rsp,%rcx,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: andl $15, %r8d
|
||||
; AVX1-NEXT: vpinsrw $4, (%rsp,%r8,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: andl $15, %r9d
|
||||
; AVX1-NEXT: vpinsrw $5, (%rsp,%r9,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movl 16(%rbp), %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq 24(%rbp), %rax
|
||||
; AVX1-NEXT: movl 24(%rbp), %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
|
@ -386,48 +448,64 @@ define <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x,
|
|||
; AVX2-NEXT: movq %rsp, %rbp
|
||||
; AVX2-NEXT: andq $-32, %rsp
|
||||
; AVX2-NEXT: subq $64, %rsp
|
||||
; AVX2-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
|
||||
; AVX2-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
|
||||
; AVX2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
||||
; AVX2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
|
||||
; AVX2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
|
||||
; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX2-NEXT: movslq 32(%rbp), %rax
|
||||
; AVX2-NEXT: movl 32(%rbp), %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: movslq 40(%rbp), %rax
|
||||
; AVX2-NEXT: movl 40(%rbp), %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq 48(%rbp), %rax
|
||||
; AVX2-NEXT: movl 48(%rbp), %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq 56(%rbp), %rax
|
||||
; AVX2-NEXT: movl 56(%rbp), %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq 64(%rbp), %rax
|
||||
; AVX2-NEXT: movl 64(%rbp), %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq 72(%rbp), %rax
|
||||
; AVX2-NEXT: movl 72(%rbp), %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq 80(%rbp), %rax
|
||||
; AVX2-NEXT: movl 80(%rbp), %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq 88(%rbp), %rax
|
||||
; AVX2-NEXT: movl 88(%rbp), %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq %edi, %rax
|
||||
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: andl $15, %edi
|
||||
; AVX2-NEXT: movzwl (%rsp,%rdi,2), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: movslq %esi, %rax
|
||||
; AVX2-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq %edx, %rax
|
||||
; AVX2-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq %ecx, %rax
|
||||
; AVX2-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq %r8d, %rax
|
||||
; AVX2-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq %r9d, %rax
|
||||
; AVX2-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq 16(%rbp), %rax
|
||||
; AVX2-NEXT: andl $15, %esi
|
||||
; AVX2-NEXT: vpinsrw $1, (%rsp,%rsi,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: andl $15, %edx
|
||||
; AVX2-NEXT: vpinsrw $2, (%rsp,%rdx,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: andl $15, %ecx
|
||||
; AVX2-NEXT: vpinsrw $3, (%rsp,%rcx,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: andl $15, %r8d
|
||||
; AVX2-NEXT: vpinsrw $4, (%rsp,%r8,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: andl $15, %r9d
|
||||
; AVX2-NEXT: vpinsrw $5, (%rsp,%r9,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movl 16(%rbp), %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq 24(%rbp), %rax
|
||||
; AVX2-NEXT: movl 24(%rbp), %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
|
@ -472,48 +550,64 @@ define <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x,
|
|||
define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, i32 %i11, i32 %i12, i32 %i13, i32 %i14, i32 %i15) nounwind {
|
||||
; AVX1-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
|
||||
; AVX1-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
|
||||
; AVX1-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
||||
; AVX1-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
|
||||
; AVX1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
|
||||
; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX1-NEXT: andl $7, %eax
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX1-NEXT: andl $7, %eax
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX1-NEXT: andl $7, %eax
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX1-NEXT: andl $7, %eax
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX1-NEXT: andl $7, %eax
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX1-NEXT: andl $7, %eax
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX1-NEXT: andl $7, %eax
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX1-NEXT: andl $7, %eax
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movslq %edi, %rax
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: andl $7, %edi
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rdi,2), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: movslq %esi, %rax
|
||||
; AVX1-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq %edx, %rax
|
||||
; AVX1-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq %ecx, %rax
|
||||
; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq %r8d, %rax
|
||||
; AVX1-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq %r9d, %rax
|
||||
; AVX1-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX1-NEXT: andl $7, %esi
|
||||
; AVX1-NEXT: vpinsrw $1, -24(%rsp,%rsi,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: andl $7, %edx
|
||||
; AVX1-NEXT: vpinsrw $2, -24(%rsp,%rdx,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: andl $7, %ecx
|
||||
; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: andl $7, %r8d
|
||||
; AVX1-NEXT: vpinsrw $4, -24(%rsp,%r8,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: andl $7, %r9d
|
||||
; AVX1-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm1, %xmm1
|
||||
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX1-NEXT: andl $7, %eax
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
|
||||
; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX1-NEXT: andl $7, %eax
|
||||
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX1-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
|
@ -521,48 +615,64 @@ define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i
|
|||
;
|
||||
; AVX2-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
|
||||
; AVX2-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
|
||||
; AVX2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
||||
; AVX2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
|
||||
; AVX2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
|
||||
; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movslq %edi, %rax
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: andl $7, %edi
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rdi,2), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: movslq %esi, %rax
|
||||
; AVX2-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq %edx, %rax
|
||||
; AVX2-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq %ecx, %rax
|
||||
; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq %r8d, %rax
|
||||
; AVX2-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq %r9d, %rax
|
||||
; AVX2-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: andl $7, %esi
|
||||
; AVX2-NEXT: vpinsrw $1, -24(%rsp,%rsi,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: andl $7, %edx
|
||||
; AVX2-NEXT: vpinsrw $2, -24(%rsp,%rdx,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: andl $7, %ecx
|
||||
; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: andl $7, %r8d
|
||||
; AVX2-NEXT: vpinsrw $4, -24(%rsp,%r8,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: andl $7, %r9d
|
||||
; AVX2-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm1, %xmm1
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
|
||||
; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: andl $7, %eax
|
||||
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVX2-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
|
@ -615,8 +725,12 @@ define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwi
|
|||
; AVX1-NEXT: subq $64, %rsp
|
||||
; AVX1-NEXT: movq (%rdi), %rax
|
||||
; AVX1-NEXT: movq 8(%rdi), %rcx
|
||||
; AVX1-NEXT: andl $3, %eax
|
||||
; AVX1-NEXT: andl $3, %ecx
|
||||
; AVX1-NEXT: movq 16(%rdi), %rdx
|
||||
; AVX1-NEXT: andl $3, %edx
|
||||
; AVX1-NEXT: movq 24(%rdi), %rsi
|
||||
; AVX1-NEXT: andl $3, %esi
|
||||
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
|
@ -637,8 +751,12 @@ define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwi
|
|||
; AVX2-NEXT: subq $64, %rsp
|
||||
; AVX2-NEXT: movq (%rdi), %rax
|
||||
; AVX2-NEXT: movq 8(%rdi), %rcx
|
||||
; AVX2-NEXT: andl $3, %eax
|
||||
; AVX2-NEXT: andl $3, %ecx
|
||||
; AVX2-NEXT: movq 16(%rdi), %rdx
|
||||
; AVX2-NEXT: andl $3, %edx
|
||||
; AVX2-NEXT: movq 24(%rdi), %rsi
|
||||
; AVX2-NEXT: andl $3, %esi
|
||||
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
|
@ -674,8 +792,12 @@ define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwi
|
|||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: movq (%rdi), %rax
|
||||
; AVX1-NEXT: movq 8(%rdi), %rcx
|
||||
; AVX1-NEXT: andl $1, %eax
|
||||
; AVX1-NEXT: andl $1, %ecx
|
||||
; AVX1-NEXT: movq 16(%rdi), %rdx
|
||||
; AVX1-NEXT: andl $1, %edx
|
||||
; AVX1-NEXT: movq 24(%rdi), %rsi
|
||||
; AVX1-NEXT: andl $1, %esi
|
||||
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
|
@ -690,8 +812,12 @@ define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwi
|
|||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: movq (%rdi), %rax
|
||||
; AVX2-NEXT: movq 8(%rdi), %rcx
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: movq 16(%rdi), %rdx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: movq 24(%rdi), %rsi
|
||||
; AVX2-NEXT: andl $1, %esi
|
||||
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
|
|
Loading…
Reference in New Issue