From 08a4e2045da212ae0a2e0425a18528479e484e9a Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Mon, 1 Dec 2008 17:56:02 +0000 Subject: [PATCH] CellSPU: - Fix v2[if]64 vector insertion code before IBM files a bug report. - Ensure that zero (0) offsets relative to $sp don't trip an assert (add $sp, 0 gets legalized to $sp alone, tripping an assert) - Shuffle masks passed to SPUISD::SHUFB are now v16i8 or v4i32 llvm-svn: 60358 --- llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 4 +- llvm/lib/Target/CellSPU/SPUISelLowering.cpp | 76 ++++++++--------- llvm/lib/Target/CellSPU/SPUInstrInfo.td | 90 +++++++++------------ llvm/test/CodeGen/CellSPU/vecinsert.ll | 81 +++++++++++++++++-- 4 files changed, 153 insertions(+), 98 deletions(-) diff --git a/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index a2144d3e41b3..8d65cf99518a 100644 --- a/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -593,8 +593,8 @@ SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, && !SelectDFormAddr(Op, N, Base, Index)) { // If the address is neither A-form or D-form, punt and use an X-form // address: - Base = N.getOperand(0); - Index = N.getOperand(1); + Base = N.getOperand(1); + Index = N.getOperand(0); return true; } diff --git a/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/llvm/lib/Target/CellSPU/SPUISelLowering.cpp index 184f0b23fa54..222c0d6145a9 100644 --- a/llvm/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/llvm/lib/Target/CellSPU/SPUISelLowering.cpp @@ -759,12 +759,13 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { } SDValue insertEltOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, stVecVT, insertEltPtr); + DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr); SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue); - result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec, - DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp)); + result = DAG.getNode(SPUISD::SHUFB, vecVT, + vectorizeOp, alignLoadVec, + DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp)); result = DAG.getStore(the_chain, result, basePtr, LN->getSrcValue(), LN->getSrcValueOffset(), @@ -885,10 +886,10 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { static SDValue LowerConstant(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); - ConstantSDNode *CN = cast(Op.getNode()); if (VT == MVT::i64) { - SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64); + ConstantSDNode *CN = cast(Op.getNode()); + SDValue T = DAG.getConstant(CN->getZExtValue(), VT); return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); } else { @@ -906,15 +907,18 @@ LowerConstant(SDValue Op, SelectionDAG &DAG) { static SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); - ConstantFPSDNode *FP = cast(Op.getNode()); - - assert((FP != 0) && - "LowerConstantFP: Node is not ConstantFPSDNode"); if (VT == MVT::f64) { + ConstantFPSDNode *FP = cast(Op.getNode()); + + assert((FP != 0) && + "LowerConstantFP: Node is not ConstantFPSDNode"); + uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); - return DAG.getNode(ISD::BIT_CONVERT, VT, - LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG)); + SDValue T = DAG.getConstant(dbits, MVT::i64); + SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T); + return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, + DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec)); } return SDValue(); @@ -1793,7 +1797,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT)); // Copy register's contents as index in SHUFFLE_MASK: SDValue ShufMaskOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, V1.getValueType(), + DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32, DAG.getTargetConstant(V2Elt, MVT::i32), DAG.getCopyFromReg(InitTempReg, VReg, PtrVT)); // Use shuffle mask in SHUFB synthetic instruction: @@ -1818,7 +1822,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { } SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, - &ResultMask[0], ResultMask.size()); + &ResultMask[0], ResultMask.size()); return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask); } } @@ -2165,7 +2169,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { if (scaleShift > 0) { // Scale the shift factor: Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt, - DAG.getConstant(scaleShift, MVT::i32)); + DAG.getConstant(scaleShift, MVT::i32)); } vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt); @@ -2209,7 +2213,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { } retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate)); + DAG.getNode(SPUISD::SHUFB, VecVT, + vecShift, vecShift, replicate)); } return retval; @@ -2225,18 +2230,17 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - // Use $2 because it's always 16-byte aligned and it's available: - SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT); + // Use $sp ($1) because it's always 16-byte aligned and it's available: + SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + DAG.getConstant(CN->getSExtValue(), PtrVT)); + SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer); SDValue result = DAG.getNode(SPUISD::SHUFB, VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp), - VecOp, - DAG.getNode(SPUISD::SHUFFLE_MASK, VT, - DAG.getNode(ISD::ADD, PtrVT, - PtrBase, - DAG.getConstant(CN->getZExtValue(), - PtrVT)))); + VecOp, + DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask)); return result; } @@ -2901,8 +2905,10 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const #endif const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); // everything has at least one operand - SDValue Result; // Initially, NULL result + SDValue Op0 = N->getOperand(0); // everything has at least one operand + MVT NodeVT = N->getValueType(0); // The node's value type + MVT Op0VT = Op0.getValueType(); // The first operand's result + SDValue Result; // Initially, empty result switch (N->getOpcode()) { default: break; @@ -2918,14 +2924,13 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const ConstantSDNode *CN0 = cast(Op1); ConstantSDNode *CN1 = cast(Op01); SDValue combinedConst = - DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), - Op0.getValueType()); + DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT); DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", " << "(SPUindirect , " << CN1->getZExtValue() << "))\n"); DEBUG(cerr << "With: (SPUindirect , " << CN0->getZExtValue() + CN1->getZExtValue() << ")\n"); - return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(), + return DAG.getNode(SPUISD::IndirectAddr, Op0VT, Op0.getOperand(0), combinedConst); } } else if (isa(Op0) @@ -2938,8 +2943,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const ConstantSDNode *CN0 = cast(Op0); ConstantSDNode *CN1 = cast(Op11); SDValue combinedConst = - DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), - Op0.getValueType()); + DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT); DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", " << "(SPUindirect , " << CN1->getZExtValue() << "))\n"); @@ -2955,8 +2959,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: { - if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && - N->getValueType(0) == Op0.getValueType()) { + if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { // (any_extend (SPUextract_elt0 )) -> // (SPUextract_elt0 ) // Types must match, however... @@ -3000,7 +3003,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const if (isa(Op1)) { // Kill degenerate vector shifts: ConstantSDNode *CN = cast(Op1); - if (CN->getZExtValue() == 0) { Result = Op0; } @@ -3014,20 +3016,20 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: { - // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 ))) -> + // (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot ))) -> // // but only if the SPUpromote_scalar and types match. SDValue Op00 = Op0.getOperand(0); if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { SDValue Op000 = Op00.getOperand(0); - if (Op000.getValueType() == N->getValueType(0)) { + if (Op000.getValueType() == NodeVT) { Result = Op000; } } break; } case SPUISD::VEC2PREFSLOT: { - // (SPUpromote_scalar (SPUextract_elt0 )) -> + // (SPUpromote_scalar (SPUvec2prefslot )) -> // Result = Op0.getOperand(0); break; @@ -3037,7 +3039,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const } } // Otherwise, return unchanged. -#ifdef NDEBUG +#ifndef NDEBUG if (Result.getNode()) { DEBUG(cerr << "\nReplace.SPU: "); DEBUG(N->dump(&DAG)); diff --git a/llvm/lib/Target/CellSPU/SPUInstrInfo.td b/llvm/lib/Target/CellSPU/SPUInstrInfo.td index 227b67255176..e72a1bbe5235 100644 --- a/llvm/lib/Target/CellSPU/SPUInstrInfo.td +++ b/llvm/lib/Target/CellSPU/SPUInstrInfo.td @@ -269,52 +269,51 @@ def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp), // Generate Controls for Insertion: //===----------------------------------------------------------------------===// -def CBD : - RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src), - "cbd\t$rT, $src", ShuffleOp, - [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; +def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src), + "cbd\t$rT, $src", ShuffleOp, + [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; -def CBX : RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src), +def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src), "cbx\t$rT, $src", ShuffleOp, [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; -def CHD : RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src), +def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src), "chd\t$rT, $src", ShuffleOp, [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; -def CHX : RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src), +def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src), "chx\t$rT, $src", ShuffleOp, [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; -def CWD : RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src), +def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src), "cwd\t$rT, $src", ShuffleOp, [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; -def CWDf32 : RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src), - "cwd\t$rT, $src", ShuffleOp, - [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CWX : RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), +def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), "cwx\t$rT, $src", ShuffleOp, [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; -def CWXf32 : RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), +def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src), + "cwd\t$rT, $src", ShuffleOp, + [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), "cwx\t$rT, $src", ShuffleOp, [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; -def CDD : RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src), +def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src), "cdd\t$rT, $src", ShuffleOp, [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; -def CDDf64 : RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src), - "cdd\t$rT, $src", ShuffleOp, - [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CDX : RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), +def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), "cdx\t$rT, $src", ShuffleOp, [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; -def CDXf64 : RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), +def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src), + "cdd\t$rT, $src", ShuffleOp, + [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), "cdx\t$rT, $src", ShuffleOp, [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; @@ -1786,46 +1785,33 @@ class SHUFBInst pattern>: RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC", IntegerOp, pattern>; -class SHUFBVecInst: +class SHUFBVecInst: SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - [(set (vectype VECREG:$rT), (SPUshuffle (vectype VECREG:$rA), - (vectype VECREG:$rB), - (vectype VECREG:$rC)))]>; - -// It's this pattern that's probably the most useful, since SPUISelLowering -// methods create a v16i8 vector for $rC: -class SHUFBVecPat1: - Pat<(SPUshuffle (vectype VECREG:$rA), (vectype VECREG:$rB), - (masktype VECREG:$rC)), - (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>; + [(set (resultvec VECREG:$rT), + (SPUshuffle (resultvec VECREG:$rA), + (resultvec VECREG:$rB), + (maskvec VECREG:$rC)))]>; multiclass ShuffleBytes { - def v16i8 : SHUFBVecInst; - def v8i16 : SHUFBVecInst; - def v4i32 : SHUFBVecInst; - def v2i64 : SHUFBVecInst; + def v16i8 : SHUFBVecInst; + def v16i8_m32 : SHUFBVecInst; + def v8i16 : SHUFBVecInst; + def v8i16_m32 : SHUFBVecInst; + def v4i32 : SHUFBVecInst; + def v4i32_m32 : SHUFBVecInst; + def v2i64 : SHUFBVecInst; + def v2i64_m32 : SHUFBVecInst; - def v4f32 : SHUFBVecInst; - def v2f64 : SHUFBVecInst; + def v4f32 : SHUFBVecInst; + def v4f32_m32 : SHUFBVecInst; + + def v2f64 : SHUFBVecInst; + def v2f64_m32 : SHUFBVecInst; } defm SHUFB : ShuffleBytes; -// Shuffle mask is a v16i8 vector -def : SHUFBVecPat1; -def : SHUFBVecPat1; -def : SHUFBVecPat1; -def : SHUFBVecPat1; -def : SHUFBVecPat1; - -// Shuffle mask is a v4i32 vector: -def : SHUFBVecPat1; -def : SHUFBVecPat1; -def : SHUFBVecPat1; -def : SHUFBVecPat1; -def : SHUFBVecPat1; - //===----------------------------------------------------------------------===// // Shift and rotate group: //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/CellSPU/vecinsert.ll b/llvm/test/CodeGen/CellSPU/vecinsert.ll index 9864c5384939..726fe3f77c9b 100644 --- a/llvm/test/CodeGen/CellSPU/vecinsert.ll +++ b/llvm/test/CodeGen/CellSPU/vecinsert.ll @@ -1,12 +1,12 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep cbd %t1.s | count 3 -; RUN: grep chd %t1.s | count 3 -; RUN: grep cwd %t1.s | count 6 -; RUN: grep il %t1.s | count 4 -; RUN: grep ilh %t1.s | count 3 +; RUN: grep cbd %t1.s | count 5 +; RUN: grep chd %t1.s | count 5 +; RUN: grep cwd %t1.s | count 10 +; RUN: grep il %t1.s | count 15 +; RUN: grep ilh %t1.s | count 10 ; RUN: grep iohl %t1.s | count 1 -; RUN: grep ilhu %t1.s | count 1 -; RUN: grep shufb %t1.s | count 12 +; RUN: grep ilhu %t1.s | count 4 +; RUN: grep shufb %t1.s | count 26 ; RUN: grep 17219 %t1.s | count 1 ; RUN: grep 22598 %t1.s | count 1 ; RUN: grep -- -39 %t1.s | count 1 @@ -51,3 +51,70 @@ entry: %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3 ret <4 x i32> %tmp1.2 } + +define void @variable_v16i8_1(<16 x i8>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <16 x i8>* %a, i32 %i + %tmp2 = load <16 x i8>* %arrayidx + %tmp3 = insertelement <16 x i8> %tmp2, i8 1, i32 1 + %tmp8 = insertelement <16 x i8> %tmp3, i8 2, i32 11 + store <16 x i8> %tmp8, <16 x i8>* %arrayidx + ret void +} + +define void @variable_v8i16_1(<8 x i16>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <8 x i16>* %a, i32 %i + %tmp2 = load <8 x i16>* %arrayidx + %tmp3 = insertelement <8 x i16> %tmp2, i16 1, i32 1 + %tmp8 = insertelement <8 x i16> %tmp3, i16 2, i32 6 + store <8 x i16> %tmp8, <8 x i16>* %arrayidx + ret void +} + +define void @variable_v4i32_1(<4 x i32>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <4 x i32>* %a, i32 %i + %tmp2 = load <4 x i32>* %arrayidx + %tmp3 = insertelement <4 x i32> %tmp2, i32 1, i32 1 + %tmp8 = insertelement <4 x i32> %tmp3, i32 2, i32 2 + store <4 x i32> %tmp8, <4 x i32>* %arrayidx + ret void +} + +define void @variable_v4f32_1(<4 x float>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <4 x float>* %a, i32 %i + %tmp2 = load <4 x float>* %arrayidx + %tmp3 = insertelement <4 x float> %tmp2, float 1.000000e+00, i32 1 + %tmp8 = insertelement <4 x float> %tmp3, float 2.000000e+00, i32 2 + store <4 x float> %tmp8, <4 x float>* %arrayidx + ret void +} + +define void @variable_v2i64_1(<2 x i64>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <2 x i64>* %a, i32 %i + %tmp2 = load <2 x i64>* %arrayidx + %tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 0 + store <2 x i64> %tmp3, <2 x i64>* %arrayidx + ret void +} + +define void @variable_v2i64_2(<2 x i64>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <2 x i64>* %a, i32 %i + %tmp2 = load <2 x i64>* %arrayidx + %tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 1 + store <2 x i64> %tmp3, <2 x i64>* %arrayidx + ret void +} + +define void @variable_v2f64_1(<2 x double>* %a, i32 %i) nounwind { +entry: + %arrayidx = getelementptr <2 x double>* %a, i32 %i + %tmp2 = load <2 x double>* %arrayidx + %tmp3 = insertelement <2 x double> %tmp2, double 1.000000e+00, i32 1 + store <2 x double> %tmp3, <2 x double>* %arrayidx + ret void +}