diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 9ca7bf67e6d5..621c4cefe09d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -279,6 +279,9 @@ public: LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult fewerElementsVectorExtractVectorElt(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index a7d11d9c6c8f..b56d1a0b3f59 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3472,6 +3472,59 @@ LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorExtractVectorElt(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowVecTy) { + assert(TypeIdx == 1 && "not a vector type index"); + + // TODO: Handle total scalarization case. + if (!NarrowVecTy.isVector()) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register Idx = MI.getOperand(2).getReg(); + LLT VecTy = MRI.getType(SrcVec); + + // If the index is a constant, we can really break this down as you would + // expect, and index into the target size pieces. + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal))) { + // Avoid out of bounds indexing the pieces. + if (IdxVal >= VecTy.getNumElements()) { + MIRBuilder.buildUndef(DstReg); + MI.eraseFromParent(); + return Legalized; + } + + SmallVector VecParts; + LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); + + // Build a sequence of NarrowTy pieces in VecParts for this operand. + buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, + TargetOpcode::G_ANYEXT); + + unsigned NewNumElts = NarrowVecTy.getNumElements(); + + LLT IdxTy = MRI.getType(Idx); + int64_t PartIdx = IdxVal / NewNumElts; + auto NewIdx = + MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); + + MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); + MI.eraseFromParent(); + return Legalized; + } + + // With a variable index, we can't perform the extract in a smaller type, so + // we're forced to expand this. + // + // TODO: We could emit a chain of compare/select to figure out which piece to + // index. + return lowerExtractVectorElt(MI); +} + LegalizerHelper::LegalizeResult LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -3801,6 +3854,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); case G_BUILD_VECTOR: return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); + case G_EXTRACT_VECTOR_ELT: + return fewerElementsVectorExtractVectorElt(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 237d4595e10f..f84f58a6b5de 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1340,7 +1340,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .clampScalar(EltTypeIdx, S32, S64) .clampScalar(VecTypeIdx, S32, S64) .clampScalar(IdxTypeIdx, S32, S32) - // TODO: Clamp the number of elements before resorting to stack lowering. + .clampMaxNumElements(1, S32, 32) + // TODO: Clamp elements for 64-bit vectors? // It should only be necessary with variable indexes. // As a last resort, lower to the stack .lower(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll new file mode 100644 index 000000000000..d2d9bea66089 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -0,0 +1,861 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s + +; Check lowering of some large extractelement that use the stack +; instead of register indexing. + +define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { +; GCN-LABEL: v_extract_v64i32_varidx: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v15, v0 +; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0 +; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: s_mov_b32 s6, s33 +; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 +; GCN-NEXT: s_movk_i32 s4, 0x80 +; GCN-NEXT: v_mov_b32_e32 v12, s5 +; GCN-NEXT: v_mov_b32_e32 v16, v1 +; GCN-NEXT: v_add_co_u32_e32 v31, vcc, 64, v15 +; GCN-NEXT: v_mov_b32_e32 v11, s4 +; GCN-NEXT: v_addc_co_u32_e32 v32, vcc, 0, v16, vcc +; GCN-NEXT: v_add_co_u32_e32 v48, vcc, v15, v11 +; GCN-NEXT: v_addc_co_u32_e32 v49, vcc, v16, v12, vcc +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v12, s5 +; GCN-NEXT: v_mov_b32_e32 v11, s4 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_add_co_u32_e32 v59, vcc, v15, v11 +; GCN-NEXT: global_load_dwordx4 v[3:6], v[15:16], off +; GCN-NEXT: global_load_dwordx4 v[7:10], v[15:16], off offset:16 +; GCN-NEXT: v_addc_co_u32_e32 v60, vcc, v16, v12, vcc +; GCN-NEXT: global_load_dwordx4 v[11:14], v[15:16], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[15:18], v[15:16], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[19:22], v[31:32], off +; GCN-NEXT: global_load_dwordx4 v[23:26], v[31:32], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[27:30], v[31:32], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[31:34], v[31:32], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[35:38], v[48:49], off +; GCN-NEXT: global_load_dwordx4 v[39:42], v[48:49], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[43:46], v[48:49], off offset:32 +; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GCN-NEXT: v_add_u32_e32 v0, 0x100, v0 +; GCN-NEXT: v_add_u32_e32 v1, 16, v0 +; GCN-NEXT: s_add_u32 s32, s32, 0x10000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[51:54], v[59:60], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[55:58], v[59:60], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[59:62], v[59:60], off offset:48 +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 20, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 24, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 28, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 32, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 36, v0 +; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 40, v0 +; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 44, v0 +; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 48, v0 +; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 52, v0 +; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 56, v0 +; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 60, v0 +; GCN-NEXT: buffer_store_dword v18, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 64, v0 +; GCN-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 +; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x48, v0 +; GCN-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x4c, v0 +; GCN-NEXT: buffer_store_dword v22, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x50, v0 +; GCN-NEXT: buffer_store_dword v23, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x54, v0 +; GCN-NEXT: buffer_store_dword v24, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x58, v0 +; GCN-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x5c, v0 +; GCN-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x60, v0 +; GCN-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x64, v0 +; GCN-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x68, v0 +; GCN-NEXT: buffer_store_dword v29, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x6c, v0 +; GCN-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x70, v0 +; GCN-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x74, v0 +; GCN-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x78, v0 +; GCN-NEXT: buffer_store_dword v33, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x7c, v0 +; GCN-NEXT: buffer_store_dword v34, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x80, v0 +; GCN-NEXT: buffer_store_dword v35, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 +; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x88, v0 +; GCN-NEXT: buffer_store_dword v37, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x8c, v0 +; GCN-NEXT: buffer_store_dword v38, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x90, v0 +; GCN-NEXT: buffer_store_dword v39, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x94, v0 +; GCN-NEXT: buffer_store_dword v40, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x98, v0 +; GCN-NEXT: buffer_store_dword v41, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x9c, v0 +; GCN-NEXT: buffer_store_dword v42, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xa0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v15 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v9, v16 +; GCN-NEXT: v_add_u32_e32 v1, 0xa4, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v10, v17 +; GCN-NEXT: v_add_u32_e32 v1, 0xa8, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v11, v18 +; GCN-NEXT: v_add_u32_e32 v1, 0xac, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 +; GCN-NEXT: buffer_store_dword v47, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb4, v0 +; GCN-NEXT: buffer_store_dword v48, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb8, v0 +; GCN-NEXT: buffer_store_dword v49, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xbc, v0 +; GCN-NEXT: buffer_store_dword v50, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xc0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc4, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc8, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xcc, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 4, v0 +; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 8, v0 +; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 12, v0 +; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 +; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v0 +; GCN-NEXT: v_add_u32_e32 v3, 0xd4, v0 +; GCN-NEXT: v_add_u32_e32 v4, 0xd8, v0 +; GCN-NEXT: v_add_u32_e32 v5, 0xdc, v0 +; GCN-NEXT: buffer_store_dword v51, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v52, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v53, v4, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v54, v5, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v0 +; GCN-NEXT: v_add_u32_e32 v3, 0xe4, v0 +; GCN-NEXT: v_add_u32_e32 v4, 0xe8, v0 +; GCN-NEXT: v_add_u32_e32 v5, 0xec, v0 +; GCN-NEXT: v_add_u32_e32 v6, 0xf0, v0 +; GCN-NEXT: v_add_u32_e32 v7, 0xf4, v0 +; GCN-NEXT: v_add_u32_e32 v8, 0xf8, v0 +; GCN-NEXT: v_add_u32_e32 v9, 0xfc, v0 +; GCN-NEXT: buffer_store_dword v55, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v56, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v57, v4, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v58, v5, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v59, v6, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v60, v7, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v61, v8, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v62, v9, s[0:3], 0 offen +; GCN-NEXT: v_and_b32_e32 v1, 63, v2 +; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GCN-NEXT: v_add_u32_e32 v0, v0, v1 +; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b32 s33, s6 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %elt = extractelement <64 x i32> %vec, i32 %idx + ret i32 %elt +} + +define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { +; GCN-LABEL: v_extract_v128i16_varidx: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v15, v0 +; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0 +; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: s_mov_b32 s6, s33 +; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 +; GCN-NEXT: s_movk_i32 s4, 0x80 +; GCN-NEXT: v_mov_b32_e32 v12, s5 +; GCN-NEXT: v_mov_b32_e32 v16, v1 +; GCN-NEXT: v_add_co_u32_e32 v31, vcc, 64, v15 +; GCN-NEXT: v_mov_b32_e32 v11, s4 +; GCN-NEXT: v_addc_co_u32_e32 v32, vcc, 0, v16, vcc +; GCN-NEXT: v_add_co_u32_e32 v48, vcc, v15, v11 +; GCN-NEXT: v_addc_co_u32_e32 v49, vcc, v16, v12, vcc +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v12, s5 +; GCN-NEXT: v_mov_b32_e32 v11, s4 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_add_co_u32_e32 v59, vcc, v15, v11 +; GCN-NEXT: global_load_dwordx4 v[3:6], v[15:16], off +; GCN-NEXT: global_load_dwordx4 v[7:10], v[15:16], off offset:16 +; GCN-NEXT: v_addc_co_u32_e32 v60, vcc, v16, v12, vcc +; GCN-NEXT: global_load_dwordx4 v[11:14], v[15:16], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[15:18], v[15:16], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[19:22], v[31:32], off +; GCN-NEXT: global_load_dwordx4 v[23:26], v[31:32], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[27:30], v[31:32], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[31:34], v[31:32], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[35:38], v[48:49], off +; GCN-NEXT: global_load_dwordx4 v[39:42], v[48:49], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[43:46], v[48:49], off offset:32 +; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GCN-NEXT: v_add_u32_e32 v0, 0x100, v0 +; GCN-NEXT: v_add_u32_e32 v1, 16, v0 +; GCN-NEXT: s_add_u32 s32, s32, 0x10000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[51:54], v[59:60], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[55:58], v[59:60], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[59:62], v[59:60], off offset:48 +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 20, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 24, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 28, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 32, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 36, v0 +; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 40, v0 +; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 44, v0 +; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 48, v0 +; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 52, v0 +; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 56, v0 +; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 60, v0 +; GCN-NEXT: buffer_store_dword v18, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 64, v0 +; GCN-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 +; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x48, v0 +; GCN-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x4c, v0 +; GCN-NEXT: buffer_store_dword v22, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x50, v0 +; GCN-NEXT: buffer_store_dword v23, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x54, v0 +; GCN-NEXT: buffer_store_dword v24, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x58, v0 +; GCN-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x5c, v0 +; GCN-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x60, v0 +; GCN-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x64, v0 +; GCN-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x68, v0 +; GCN-NEXT: buffer_store_dword v29, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x6c, v0 +; GCN-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x70, v0 +; GCN-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x74, v0 +; GCN-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x78, v0 +; GCN-NEXT: buffer_store_dword v33, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x7c, v0 +; GCN-NEXT: buffer_store_dword v34, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x80, v0 +; GCN-NEXT: buffer_store_dword v35, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 +; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x88, v0 +; GCN-NEXT: buffer_store_dword v37, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x8c, v0 +; GCN-NEXT: buffer_store_dword v38, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x90, v0 +; GCN-NEXT: buffer_store_dword v39, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x94, v0 +; GCN-NEXT: buffer_store_dword v40, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x98, v0 +; GCN-NEXT: buffer_store_dword v41, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x9c, v0 +; GCN-NEXT: buffer_store_dword v42, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xa0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v15 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v9, v16 +; GCN-NEXT: v_add_u32_e32 v1, 0xa4, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v10, v17 +; GCN-NEXT: v_add_u32_e32 v1, 0xa8, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v11, v18 +; GCN-NEXT: v_add_u32_e32 v1, 0xac, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 +; GCN-NEXT: buffer_store_dword v47, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb4, v0 +; GCN-NEXT: buffer_store_dword v48, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb8, v0 +; GCN-NEXT: buffer_store_dword v49, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xbc, v0 +; GCN-NEXT: buffer_store_dword v50, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xc0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc4, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc8, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xcc, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 4, v0 +; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 8, v0 +; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 12, v0 +; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 +; GCN-NEXT: v_lshrrev_b32_e32 v1, 1, v2 +; GCN-NEXT: v_and_b32_e32 v1, 63, v1 +; GCN-NEXT: v_add_u32_e32 v3, 0xd0, v0 +; GCN-NEXT: v_add_u32_e32 v4, 0xd4, v0 +; GCN-NEXT: v_add_u32_e32 v5, 0xd8, v0 +; GCN-NEXT: v_add_u32_e32 v6, 0xdc, v0 +; GCN-NEXT: buffer_store_dword v51, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v52, v4, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v53, v5, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v54, v6, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v3, 0xe0, v0 +; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GCN-NEXT: v_add_u32_e32 v4, 0xe4, v0 +; GCN-NEXT: v_add_u32_e32 v5, 0xe8, v0 +; GCN-NEXT: v_add_u32_e32 v6, 0xec, v0 +; GCN-NEXT: v_add_u32_e32 v7, 0xf0, v0 +; GCN-NEXT: v_add_u32_e32 v8, 0xf4, v0 +; GCN-NEXT: v_add_u32_e32 v9, 0xf8, v0 +; GCN-NEXT: v_add_u32_e32 v10, 0xfc, v0 +; GCN-NEXT: v_add_u32_e32 v0, v0, v1 +; GCN-NEXT: buffer_store_dword v55, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v56, v4, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v57, v5, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v58, v6, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v59, v7, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v60, v8, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v61, v9, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v62, v10, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GCN-NEXT: v_and_b32_e32 v1, 1, v2 +; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v1 +; GCN-NEXT: s_mov_b32 s33, s6 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %vec = load <128 x i16>, <128 x i16> addrspace(1)* %ptr + %elt = extractelement <128 x i16> %vec, i32 %idx + ret i16 %elt +} + +define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { +; GCN-LABEL: v_extract_v32i64_varidx: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v15, v0 +; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0 +; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: s_mov_b32 s6, s33 +; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 +; GCN-NEXT: s_movk_i32 s4, 0x80 +; GCN-NEXT: v_mov_b32_e32 v12, s5 +; GCN-NEXT: v_mov_b32_e32 v16, v1 +; GCN-NEXT: v_add_co_u32_e32 v31, vcc, 64, v15 +; GCN-NEXT: v_mov_b32_e32 v11, s4 +; GCN-NEXT: v_addc_co_u32_e32 v32, vcc, 0, v16, vcc +; GCN-NEXT: v_add_co_u32_e32 v48, vcc, v15, v11 +; GCN-NEXT: v_addc_co_u32_e32 v49, vcc, v16, v12, vcc +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v12, s5 +; GCN-NEXT: v_mov_b32_e32 v11, s4 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_add_co_u32_e32 v59, vcc, v15, v11 +; GCN-NEXT: global_load_dwordx4 v[3:6], v[15:16], off +; GCN-NEXT: global_load_dwordx4 v[7:10], v[15:16], off offset:16 +; GCN-NEXT: v_addc_co_u32_e32 v60, vcc, v16, v12, vcc +; GCN-NEXT: global_load_dwordx4 v[11:14], v[15:16], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[15:18], v[15:16], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[19:22], v[31:32], off +; GCN-NEXT: global_load_dwordx4 v[23:26], v[31:32], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[27:30], v[31:32], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[31:34], v[31:32], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[35:38], v[48:49], off +; GCN-NEXT: global_load_dwordx4 v[39:42], v[48:49], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[43:46], v[48:49], off offset:32 +; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GCN-NEXT: v_add_u32_e32 v0, 0x100, v0 +; GCN-NEXT: v_add_u32_e32 v1, 16, v0 +; GCN-NEXT: s_add_u32 s32, s32, 0x10000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[51:54], v[59:60], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[55:58], v[59:60], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[59:62], v[59:60], off offset:48 +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 24, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 20, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 28, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 36, v0 +; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 44, v0 +; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 32, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 40, v0 +; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 48, v0 +; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 56, v0 +; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 52, v0 +; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 60, v0 +; GCN-NEXT: buffer_store_dword v18, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 +; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x4c, v0 +; GCN-NEXT: buffer_store_dword v22, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 64, v0 +; GCN-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x48, v0 +; GCN-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x50, v0 +; GCN-NEXT: buffer_store_dword v23, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x58, v0 +; GCN-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x54, v0 +; GCN-NEXT: buffer_store_dword v24, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x5c, v0 +; GCN-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x64, v0 +; GCN-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x6c, v0 +; GCN-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x60, v0 +; GCN-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x68, v0 +; GCN-NEXT: buffer_store_dword v29, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x70, v0 +; GCN-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x78, v0 +; GCN-NEXT: buffer_store_dword v33, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x74, v0 +; GCN-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x7c, v0 +; GCN-NEXT: buffer_store_dword v34, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 +; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x8c, v0 +; GCN-NEXT: buffer_store_dword v38, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x80, v0 +; GCN-NEXT: buffer_store_dword v35, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x88, v0 +; GCN-NEXT: buffer_store_dword v37, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x90, v0 +; GCN-NEXT: buffer_store_dword v39, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x98, v0 +; GCN-NEXT: buffer_store_dword v41, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x94, v0 +; GCN-NEXT: buffer_store_dword v40, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x9c, v0 +; GCN-NEXT: buffer_store_dword v42, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xa4, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v15 +; GCN-NEXT: v_mov_b32_e32 v9, v16 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v11, v18 +; GCN-NEXT: v_add_u32_e32 v1, 0xac, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xa0, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v10, v17 +; GCN-NEXT: v_add_u32_e32 v1, 0xa8, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 +; GCN-NEXT: buffer_store_dword v47, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb8, v0 +; GCN-NEXT: buffer_store_dword v49, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb4, v0 +; GCN-NEXT: buffer_store_dword v48, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xbc, v0 +; GCN-NEXT: buffer_store_dword v50, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xc0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc8, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc4, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xcc, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 8, v0 +; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 4, v0 +; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 12, v0 +; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 +; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v0 +; GCN-NEXT: v_add_u32_e32 v4, 0xd8, v0 +; GCN-NEXT: buffer_store_dword v51, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v53, v4, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v3, 0xd4, v0 +; GCN-NEXT: v_add_u32_e32 v6, 0xe0, v0 +; GCN-NEXT: v_add_u32_e32 v1, 0xf4, v0 +; GCN-NEXT: v_add_u32_e32 v4, 0xf8, v0 +; GCN-NEXT: v_add_u32_e32 v5, 0xdc, v0 +; GCN-NEXT: v_add_u32_e32 v7, 0xe4, v0 +; GCN-NEXT: v_add_u32_e32 v8, 0xe8, v0 +; GCN-NEXT: v_add_u32_e32 v10, 0xf0, v0 +; GCN-NEXT: buffer_store_dword v55, v6, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v57, v8, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v59, v10, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v61, v4, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v9, 0xec, v0 +; GCN-NEXT: v_add_u32_e32 v4, 0xfc, v0 +; GCN-NEXT: buffer_store_dword v52, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v54, v5, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v56, v7, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v58, v9, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v60, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v62, v4, s[0:3], 0 offen +; GCN-NEXT: v_and_b32_e32 v1, 31, v2 +; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GCN-NEXT: v_add_u32_e32 v0, v0, v1 +; GCN-NEXT: v_add_u32_e32 v1, 4, v0 +; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b32 s33, s6 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %vec = load <32 x i64>, <32 x i64> addrspace(1)* %ptr + %elt = extractelement <32 x i64> %vec, i32 %idx + ret i64 %elt +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index 4b78c605e0b7..6274f055fa27 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -2495,3 +2495,125 @@ entry: store double %ext, double addrspace(1)* %out ret void } + +define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) { +; GPRIDX-LABEL: v_extract_v64i32_7: +; GPRIDX: ; %bb.0: +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GPRIDX-NEXT: s_waitcnt vmcnt(0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v7 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: v_extract_v64i32_7: +; MOVREL: ; %bb.0: +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; MOVREL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_mov_b32_e32 v0, v7 +; MOVREL-NEXT: s_setpc_b64 s[30:31] + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %elt = extractelement <64 x i32> %vec, i32 7 + ret i32 %elt +} + +define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) { +; GPRIDX-LABEL: v_extract_v64i32_32: +; GPRIDX: ; %bb.0: +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_movk_i32 s4, 0x80 +; GPRIDX-NEXT: s_mov_b32 s5, 0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s5 +; GPRIDX-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GPRIDX-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GPRIDX-NEXT: s_waitcnt vmcnt(0) +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: v_extract_v64i32_32: +; MOVREL: ; %bb.0: +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_movk_i32 s4, 0x80 +; MOVREL-NEXT: s_mov_b32 s5, 0 +; MOVREL-NEXT: v_mov_b32_e32 v2, s4 +; MOVREL-NEXT: v_mov_b32_e32 v3, s5 +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; MOVREL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_setpc_b64 s[30:31] + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %elt = extractelement <64 x i32> %vec, i32 32 + ret i32 %elt +} + +define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) { +; GPRIDX-LABEL: v_extract_v64i32_33: +; GPRIDX: ; %bb.0: +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_movk_i32 s4, 0x80 +; GPRIDX-NEXT: s_mov_b32 s5, 0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s5 +; GPRIDX-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GPRIDX-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GPRIDX-NEXT: s_waitcnt vmcnt(0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: v_extract_v64i32_33: +; MOVREL: ; %bb.0: +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_movk_i32 s4, 0x80 +; MOVREL-NEXT: s_mov_b32 s5, 0 +; MOVREL-NEXT: v_mov_b32_e32 v2, s4 +; MOVREL-NEXT: v_mov_b32_e32 v3, s5 +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; MOVREL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_mov_b32_e32 v0, v1 +; MOVREL-NEXT: s_setpc_b64 s[30:31] + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %elt = extractelement <64 x i32> %vec, i32 33 + ret i32 %elt +} + +define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) { +; GPRIDX-LABEL: v_extract_v64i32_37: +; GPRIDX: ; %bb.0: +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_movk_i32 s4, 0x80 +; GPRIDX-NEXT: s_mov_b32 s5, 0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s5 +; GPRIDX-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GPRIDX-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GPRIDX-NEXT: s_waitcnt vmcnt(0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v5 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: v_extract_v64i32_37: +; MOVREL: ; %bb.0: +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_movk_i32 s4, 0x80 +; MOVREL-NEXT: s_mov_b32 s5, 0 +; MOVREL-NEXT: v_mov_b32_e32 v2, s4 +; MOVREL-NEXT: v_mov_b32_e32 v3, s5 +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; MOVREL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_mov_b32_e32 v0, v5 +; MOVREL-NEXT: s_setpc_b64 s[30:31] + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %elt = extractelement <64 x i32> %vec, i32 37 + ret i32 %elt +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index b548ff550343..f3c82289c239 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -1408,204 +1408,8 @@ body: | ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 + 192, align 4, addrspace 4) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store 4 into %stack.0, align 256, addrspace 5) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; CHECK: G_STORE [[UV1]](s32), [[PTR_ADD3]](p5) :: (store 4 into %stack.0 + 4, align 256, addrspace 5) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK: G_STORE [[UV2]](s32), [[PTR_ADD4]](p5) :: (store 4 into %stack.0 + 8, align 256, addrspace 5) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK: G_STORE [[UV3]](s32), [[PTR_ADD5]](p5) :: (store 4 into %stack.0 + 12, align 256, addrspace 5) - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK: G_STORE [[UV4]](s32), [[PTR_ADD6]](p5) :: (store 4 into %stack.0 + 16, align 256, addrspace 5) - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK: G_STORE [[UV5]](s32), [[PTR_ADD7]](p5) :: (store 4 into %stack.0 + 20, align 256, addrspace 5) - ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK: G_STORE [[UV6]](s32), [[PTR_ADD8]](p5) :: (store 4 into %stack.0 + 24, align 256, addrspace 5) - ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5) - ; CHECK: G_STORE [[UV7]](s32), [[COPY1]](p5) :: (store 4 into %stack.0 + 28, align 256, addrspace 5) - ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK: G_STORE [[UV8]](s32), [[PTR_ADD10]](p5) :: (store 4 into %stack.0 + 32, align 256, addrspace 5) - ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK: G_STORE [[UV9]](s32), [[PTR_ADD11]](p5) :: (store 4 into %stack.0 + 36, align 256, addrspace 5) - ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK: G_STORE [[UV10]](s32), [[PTR_ADD12]](p5) :: (store 4 into %stack.0 + 40, align 256, addrspace 5) - ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK: G_STORE [[UV11]](s32), [[PTR_ADD13]](p5) :: (store 4 into %stack.0 + 44, align 256, addrspace 5) - ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK: G_STORE [[UV12]](s32), [[PTR_ADD14]](p5) :: (store 4 into %stack.0 + 48, align 256, addrspace 5) - ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK: G_STORE [[UV13]](s32), [[PTR_ADD15]](p5) :: (store 4 into %stack.0 + 52, align 256, addrspace 5) - ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK: G_STORE [[UV14]](s32), [[PTR_ADD16]](p5) :: (store 4 into %stack.0 + 56, align 256, addrspace 5) - ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK: G_STORE [[UV15]](s32), [[PTR_ADD17]](p5) :: (store 4 into %stack.0 + 60, align 256, addrspace 5) - ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK: G_STORE [[UV16]](s32), [[PTR_ADD18]](p5) :: (store 4 into %stack.0 + 64, align 256, addrspace 5) - ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 - ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK: G_STORE [[UV17]](s32), [[PTR_ADD19]](p5) :: (store 4 into %stack.0 + 68, align 256, addrspace 5) - ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 - ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK: G_STORE [[UV18]](s32), [[PTR_ADD20]](p5) :: (store 4 into %stack.0 + 72, align 256, addrspace 5) - ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 - ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK: G_STORE [[UV19]](s32), [[PTR_ADD21]](p5) :: (store 4 into %stack.0 + 76, align 256, addrspace 5) - ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 - ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK: G_STORE [[UV20]](s32), [[PTR_ADD22]](p5) :: (store 4 into %stack.0 + 80, align 256, addrspace 5) - ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 - ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK: G_STORE [[UV21]](s32), [[PTR_ADD23]](p5) :: (store 4 into %stack.0 + 84, align 256, addrspace 5) - ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 - ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK: G_STORE [[UV22]](s32), [[PTR_ADD24]](p5) :: (store 4 into %stack.0 + 88, align 256, addrspace 5) - ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 - ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK: G_STORE [[UV23]](s32), [[PTR_ADD25]](p5) :: (store 4 into %stack.0 + 92, align 256, addrspace 5) - ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 - ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK: G_STORE [[UV24]](s32), [[PTR_ADD26]](p5) :: (store 4 into %stack.0 + 96, align 256, addrspace 5) - ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK: G_STORE [[UV25]](s32), [[PTR_ADD27]](p5) :: (store 4 into %stack.0 + 100, align 256, addrspace 5) - ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 - ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK: G_STORE [[UV26]](s32), [[PTR_ADD28]](p5) :: (store 4 into %stack.0 + 104, align 256, addrspace 5) - ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 - ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK: G_STORE [[UV27]](s32), [[PTR_ADD29]](p5) :: (store 4 into %stack.0 + 108, align 256, addrspace 5) - ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 - ; CHECK: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK: G_STORE [[UV28]](s32), [[PTR_ADD30]](p5) :: (store 4 into %stack.0 + 112, align 256, addrspace 5) - ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 - ; CHECK: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK: G_STORE [[UV29]](s32), [[PTR_ADD31]](p5) :: (store 4 into %stack.0 + 116, align 256, addrspace 5) - ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 - ; CHECK: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK: G_STORE [[UV30]](s32), [[PTR_ADD32]](p5) :: (store 4 into %stack.0 + 120, align 256, addrspace 5) - ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 - ; CHECK: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD33]](p5) :: (store 4 into %stack.0 + 124, align 256, addrspace 5) - ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD34]](p5) :: (store 4 into %stack.0 + 128, align 256, addrspace 5) - ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 - ; CHECK: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD35]](p5) :: (store 4 into %stack.0 + 132, align 256, addrspace 5) - ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD36]](p5) :: (store 4 into %stack.0 + 136, align 256, addrspace 5) - ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 - ; CHECK: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD37]](p5) :: (store 4 into %stack.0 + 140, align 256, addrspace 5) - ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK: G_STORE [[UV36]](s32), [[PTR_ADD38]](p5) :: (store 4 into %stack.0 + 144, align 256, addrspace 5) - ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 - ; CHECK: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD39]](p5) :: (store 4 into %stack.0 + 148, align 256, addrspace 5) - ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 - ; CHECK: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK: G_STORE [[UV38]](s32), [[PTR_ADD40]](p5) :: (store 4 into %stack.0 + 152, align 256, addrspace 5) - ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 - ; CHECK: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD41]](p5) :: (store 4 into %stack.0 + 156, align 256, addrspace 5) - ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 - ; CHECK: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK: G_STORE [[UV40]](s32), [[PTR_ADD42]](p5) :: (store 4 into %stack.0 + 160, align 256, addrspace 5) - ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 - ; CHECK: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK: G_STORE [[UV41]](s32), [[PTR_ADD43]](p5) :: (store 4 into %stack.0 + 164, align 256, addrspace 5) - ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 - ; CHECK: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK: G_STORE [[UV42]](s32), [[PTR_ADD44]](p5) :: (store 4 into %stack.0 + 168, align 256, addrspace 5) - ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 - ; CHECK: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK: G_STORE [[UV43]](s32), [[PTR_ADD45]](p5) :: (store 4 into %stack.0 + 172, align 256, addrspace 5) - ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 - ; CHECK: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK: G_STORE [[UV44]](s32), [[PTR_ADD46]](p5) :: (store 4 into %stack.0 + 176, align 256, addrspace 5) - ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 - ; CHECK: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK: G_STORE [[UV45]](s32), [[PTR_ADD47]](p5) :: (store 4 into %stack.0 + 180, align 256, addrspace 5) - ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 - ; CHECK: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK: G_STORE [[UV46]](s32), [[PTR_ADD48]](p5) :: (store 4 into %stack.0 + 184, align 256, addrspace 5) - ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 - ; CHECK: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK: G_STORE [[UV47]](s32), [[PTR_ADD49]](p5) :: (store 4 into %stack.0 + 188, align 256, addrspace 5) - ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 - ; CHECK: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK: G_STORE [[UV48]](s32), [[PTR_ADD50]](p5) :: (store 4 into %stack.0 + 192, align 256, addrspace 5) - ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 - ; CHECK: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK: G_STORE [[UV49]](s32), [[PTR_ADD51]](p5) :: (store 4 into %stack.0 + 196, align 256, addrspace 5) - ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 - ; CHECK: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK: G_STORE [[UV50]](s32), [[PTR_ADD52]](p5) :: (store 4 into %stack.0 + 200, align 256, addrspace 5) - ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 - ; CHECK: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK: G_STORE [[UV51]](s32), [[PTR_ADD53]](p5) :: (store 4 into %stack.0 + 204, align 256, addrspace 5) - ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 - ; CHECK: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK: G_STORE [[UV52]](s32), [[PTR_ADD54]](p5) :: (store 4 into %stack.0 + 208, align 256, addrspace 5) - ; CHECK: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 - ; CHECK: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK: G_STORE [[UV53]](s32), [[PTR_ADD55]](p5) :: (store 4 into %stack.0 + 212, align 256, addrspace 5) - ; CHECK: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 - ; CHECK: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK: G_STORE [[UV54]](s32), [[PTR_ADD56]](p5) :: (store 4 into %stack.0 + 216, align 256, addrspace 5) - ; CHECK: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 - ; CHECK: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK: G_STORE [[UV55]](s32), [[PTR_ADD57]](p5) :: (store 4 into %stack.0 + 220, align 256, addrspace 5) - ; CHECK: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 - ; CHECK: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK: G_STORE [[UV56]](s32), [[PTR_ADD58]](p5) :: (store 4 into %stack.0 + 224, align 256, addrspace 5) - ; CHECK: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 - ; CHECK: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK: G_STORE [[UV57]](s32), [[PTR_ADD59]](p5) :: (store 4 into %stack.0 + 228, align 256, addrspace 5) - ; CHECK: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 - ; CHECK: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK: G_STORE [[UV58]](s32), [[PTR_ADD60]](p5) :: (store 4 into %stack.0 + 232, align 256, addrspace 5) - ; CHECK: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 - ; CHECK: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK: G_STORE [[UV59]](s32), [[PTR_ADD61]](p5) :: (store 4 into %stack.0 + 236, align 256, addrspace 5) - ; CHECK: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 - ; CHECK: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK: G_STORE [[UV60]](s32), [[PTR_ADD62]](p5) :: (store 4 into %stack.0 + 240, align 256, addrspace 5) - ; CHECK: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 - ; CHECK: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK: G_STORE [[UV61]](s32), [[PTR_ADD63]](p5) :: (store 4 into %stack.0 + 244, align 256, addrspace 5) - ; CHECK: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 - ; CHECK: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK: G_STORE [[UV62]](s32), [[PTR_ADD64]](p5) :: (store 4 into %stack.0 + 248, align 256, addrspace 5) - ; CHECK: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 - ; CHECK: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK: G_STORE [[UV63]](s32), [[PTR_ADD65]](p5) :: (store 4 into %stack.0 + 252, align 256, addrspace 5) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 4 from %stack.0 + 28, addrspace 5) - ; CHECK: S_ENDPGM 0, implicit [[LOAD4]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<16 x s32>), 224 + ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 7 %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) @@ -1613,6 +1417,280 @@ body: | S_ENDPGM 0, implicit %3 ... +--- +name: extract_vector_elt_33_v64s32 + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_33_v64s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 + 64, align 4, addrspace 4) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 + 128, align 4, addrspace 4) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 + 192, align 4, addrspace 4) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](<16 x s32>), 32 + ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CONSTANT i32 33 + %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 + S_ENDPGM 0, implicit %3 +... + +# Test handling of out of bounds indexes +--- +name: extract_vector_elt_64_65_v64s32 + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_64_65_v64s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK: S_ENDPGM 0, implicit [[COPY1]](s32), implicit [[DEF]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CONSTANT i32 64 + %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 + %4:_(s32) = G_CONSTANT i32 65 + %5:_(s32) = G_EXTRACT_VECTOR_ELT %2, %4 + S_ENDPGM 0, implicit %3, implicit %5 +... + +--- +name: extract_vector_elt_33_v64p3 + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_33_v64p3 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x p3>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x p3>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 + 64, align 4, addrspace 4) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x p3>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 + 128, align 4, addrspace 4) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x p3>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 + 192, align 4, addrspace 4) + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 + ; CHECK: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3), [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3), [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3), [[UV6:%[0-9]+]]:_(p3), [[UV7:%[0-9]+]]:_(p3), [[UV8:%[0-9]+]]:_(p3), [[UV9:%[0-9]+]]:_(p3), [[UV10:%[0-9]+]]:_(p3), [[UV11:%[0-9]+]]:_(p3), [[UV12:%[0-9]+]]:_(p3), [[UV13:%[0-9]+]]:_(p3), [[UV14:%[0-9]+]]:_(p3), [[UV15:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[LOAD]](<16 x p3>) + ; CHECK: [[UV16:%[0-9]+]]:_(p3), [[UV17:%[0-9]+]]:_(p3), [[UV18:%[0-9]+]]:_(p3), [[UV19:%[0-9]+]]:_(p3), [[UV20:%[0-9]+]]:_(p3), [[UV21:%[0-9]+]]:_(p3), [[UV22:%[0-9]+]]:_(p3), [[UV23:%[0-9]+]]:_(p3), [[UV24:%[0-9]+]]:_(p3), [[UV25:%[0-9]+]]:_(p3), [[UV26:%[0-9]+]]:_(p3), [[UV27:%[0-9]+]]:_(p3), [[UV28:%[0-9]+]]:_(p3), [[UV29:%[0-9]+]]:_(p3), [[UV30:%[0-9]+]]:_(p3), [[UV31:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[LOAD1]](<16 x p3>) + ; CHECK: [[UV32:%[0-9]+]]:_(p3), [[UV33:%[0-9]+]]:_(p3), [[UV34:%[0-9]+]]:_(p3), [[UV35:%[0-9]+]]:_(p3), [[UV36:%[0-9]+]]:_(p3), [[UV37:%[0-9]+]]:_(p3), [[UV38:%[0-9]+]]:_(p3), [[UV39:%[0-9]+]]:_(p3), [[UV40:%[0-9]+]]:_(p3), [[UV41:%[0-9]+]]:_(p3), [[UV42:%[0-9]+]]:_(p3), [[UV43:%[0-9]+]]:_(p3), [[UV44:%[0-9]+]]:_(p3), [[UV45:%[0-9]+]]:_(p3), [[UV46:%[0-9]+]]:_(p3), [[UV47:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[LOAD2]](<16 x p3>) + ; CHECK: [[UV48:%[0-9]+]]:_(p3), [[UV49:%[0-9]+]]:_(p3), [[UV50:%[0-9]+]]:_(p3), [[UV51:%[0-9]+]]:_(p3), [[UV52:%[0-9]+]]:_(p3), [[UV53:%[0-9]+]]:_(p3), [[UV54:%[0-9]+]]:_(p3), [[UV55:%[0-9]+]]:_(p3), [[UV56:%[0-9]+]]:_(p3), [[UV57:%[0-9]+]]:_(p3), [[UV58:%[0-9]+]]:_(p3), [[UV59:%[0-9]+]]:_(p3), [[UV60:%[0-9]+]]:_(p3), [[UV61:%[0-9]+]]:_(p3), [[UV62:%[0-9]+]]:_(p3), [[UV63:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[LOAD3]](<16 x p3>) + ; CHECK: G_STORE [[UV]](p3), [[FRAME_INDEX]](p5) :: (store 4 into %stack.0, align 256, addrspace 5) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) + ; CHECK: G_STORE [[UV1]](p3), [[PTR_ADD3]](p5) :: (store 4 into %stack.0 + 4, align 256, addrspace 5) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) + ; CHECK: G_STORE [[UV2]](p3), [[PTR_ADD4]](p5) :: (store 4 into %stack.0 + 8, align 256, addrspace 5) + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) + ; CHECK: G_STORE [[UV3]](p3), [[PTR_ADD5]](p5) :: (store 4 into %stack.0 + 12, align 256, addrspace 5) + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) + ; CHECK: G_STORE [[UV4]](p3), [[PTR_ADD6]](p5) :: (store 4 into %stack.0 + 16, align 256, addrspace 5) + ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) + ; CHECK: G_STORE [[UV5]](p3), [[PTR_ADD7]](p5) :: (store 4 into %stack.0 + 20, align 256, addrspace 5) + ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) + ; CHECK: G_STORE [[UV6]](p3), [[PTR_ADD8]](p5) :: (store 4 into %stack.0 + 24, align 256, addrspace 5) + ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) + ; CHECK: G_STORE [[UV7]](p3), [[PTR_ADD9]](p5) :: (store 4 into %stack.0 + 28, align 256, addrspace 5) + ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) + ; CHECK: G_STORE [[UV8]](p3), [[PTR_ADD10]](p5) :: (store 4 into %stack.0 + 32, align 256, addrspace 5) + ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 + ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) + ; CHECK: G_STORE [[UV9]](p3), [[PTR_ADD11]](p5) :: (store 4 into %stack.0 + 36, align 256, addrspace 5) + ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) + ; CHECK: G_STORE [[UV10]](p3), [[PTR_ADD12]](p5) :: (store 4 into %stack.0 + 40, align 256, addrspace 5) + ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 + ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) + ; CHECK: G_STORE [[UV11]](p3), [[PTR_ADD13]](p5) :: (store 4 into %stack.0 + 44, align 256, addrspace 5) + ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) + ; CHECK: G_STORE [[UV12]](p3), [[PTR_ADD14]](p5) :: (store 4 into %stack.0 + 48, align 256, addrspace 5) + ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 + ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) + ; CHECK: G_STORE [[UV13]](p3), [[PTR_ADD15]](p5) :: (store 4 into %stack.0 + 52, align 256, addrspace 5) + ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 + ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) + ; CHECK: G_STORE [[UV14]](p3), [[PTR_ADD16]](p5) :: (store 4 into %stack.0 + 56, align 256, addrspace 5) + ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 + ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) + ; CHECK: G_STORE [[UV15]](p3), [[PTR_ADD17]](p5) :: (store 4 into %stack.0 + 60, align 256, addrspace 5) + ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) + ; CHECK: G_STORE [[UV16]](p3), [[PTR_ADD18]](p5) :: (store 4 into %stack.0 + 64, align 256, addrspace 5) + ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 + ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) + ; CHECK: G_STORE [[UV17]](p3), [[PTR_ADD19]](p5) :: (store 4 into %stack.0 + 68, align 256, addrspace 5) + ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 + ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) + ; CHECK: G_STORE [[UV18]](p3), [[PTR_ADD20]](p5) :: (store 4 into %stack.0 + 72, align 256, addrspace 5) + ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 + ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) + ; CHECK: G_STORE [[UV19]](p3), [[PTR_ADD21]](p5) :: (store 4 into %stack.0 + 76, align 256, addrspace 5) + ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 + ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) + ; CHECK: G_STORE [[UV20]](p3), [[PTR_ADD22]](p5) :: (store 4 into %stack.0 + 80, align 256, addrspace 5) + ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 + ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) + ; CHECK: G_STORE [[UV21]](p3), [[PTR_ADD23]](p5) :: (store 4 into %stack.0 + 84, align 256, addrspace 5) + ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 + ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) + ; CHECK: G_STORE [[UV22]](p3), [[PTR_ADD24]](p5) :: (store 4 into %stack.0 + 88, align 256, addrspace 5) + ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 + ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) + ; CHECK: G_STORE [[UV23]](p3), [[PTR_ADD25]](p5) :: (store 4 into %stack.0 + 92, align 256, addrspace 5) + ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 + ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) + ; CHECK: G_STORE [[UV24]](p3), [[PTR_ADD26]](p5) :: (store 4 into %stack.0 + 96, align 256, addrspace 5) + ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 + ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) + ; CHECK: G_STORE [[UV25]](p3), [[PTR_ADD27]](p5) :: (store 4 into %stack.0 + 100, align 256, addrspace 5) + ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 + ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) + ; CHECK: G_STORE [[UV26]](p3), [[PTR_ADD28]](p5) :: (store 4 into %stack.0 + 104, align 256, addrspace 5) + ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 + ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) + ; CHECK: G_STORE [[UV27]](p3), [[PTR_ADD29]](p5) :: (store 4 into %stack.0 + 108, align 256, addrspace 5) + ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 + ; CHECK: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) + ; CHECK: G_STORE [[UV28]](p3), [[PTR_ADD30]](p5) :: (store 4 into %stack.0 + 112, align 256, addrspace 5) + ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 + ; CHECK: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) + ; CHECK: G_STORE [[UV29]](p3), [[PTR_ADD31]](p5) :: (store 4 into %stack.0 + 116, align 256, addrspace 5) + ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 + ; CHECK: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) + ; CHECK: G_STORE [[UV30]](p3), [[PTR_ADD32]](p5) :: (store 4 into %stack.0 + 120, align 256, addrspace 5) + ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 + ; CHECK: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) + ; CHECK: G_STORE [[UV31]](p3), [[PTR_ADD33]](p5) :: (store 4 into %stack.0 + 124, align 256, addrspace 5) + ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) + ; CHECK: G_STORE [[UV32]](p3), [[PTR_ADD34]](p5) :: (store 4 into %stack.0 + 128, align 256, addrspace 5) + ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 + ; CHECK: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) + ; CHECK: G_STORE [[UV33]](p3), [[COPY1]](p5) :: (store 4 into %stack.0 + 132, align 256, addrspace 5) + ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 + ; CHECK: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) + ; CHECK: G_STORE [[UV34]](p3), [[PTR_ADD36]](p5) :: (store 4 into %stack.0 + 136, align 256, addrspace 5) + ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 + ; CHECK: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) + ; CHECK: G_STORE [[UV35]](p3), [[PTR_ADD37]](p5) :: (store 4 into %stack.0 + 140, align 256, addrspace 5) + ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 + ; CHECK: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) + ; CHECK: G_STORE [[UV36]](p3), [[PTR_ADD38]](p5) :: (store 4 into %stack.0 + 144, align 256, addrspace 5) + ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 + ; CHECK: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) + ; CHECK: G_STORE [[UV37]](p3), [[PTR_ADD39]](p5) :: (store 4 into %stack.0 + 148, align 256, addrspace 5) + ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 + ; CHECK: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) + ; CHECK: G_STORE [[UV38]](p3), [[PTR_ADD40]](p5) :: (store 4 into %stack.0 + 152, align 256, addrspace 5) + ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 + ; CHECK: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) + ; CHECK: G_STORE [[UV39]](p3), [[PTR_ADD41]](p5) :: (store 4 into %stack.0 + 156, align 256, addrspace 5) + ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 + ; CHECK: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) + ; CHECK: G_STORE [[UV40]](p3), [[PTR_ADD42]](p5) :: (store 4 into %stack.0 + 160, align 256, addrspace 5) + ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 + ; CHECK: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) + ; CHECK: G_STORE [[UV41]](p3), [[PTR_ADD43]](p5) :: (store 4 into %stack.0 + 164, align 256, addrspace 5) + ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 + ; CHECK: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) + ; CHECK: G_STORE [[UV42]](p3), [[PTR_ADD44]](p5) :: (store 4 into %stack.0 + 168, align 256, addrspace 5) + ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 + ; CHECK: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) + ; CHECK: G_STORE [[UV43]](p3), [[PTR_ADD45]](p5) :: (store 4 into %stack.0 + 172, align 256, addrspace 5) + ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 + ; CHECK: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) + ; CHECK: G_STORE [[UV44]](p3), [[PTR_ADD46]](p5) :: (store 4 into %stack.0 + 176, align 256, addrspace 5) + ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 + ; CHECK: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) + ; CHECK: G_STORE [[UV45]](p3), [[PTR_ADD47]](p5) :: (store 4 into %stack.0 + 180, align 256, addrspace 5) + ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 + ; CHECK: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) + ; CHECK: G_STORE [[UV46]](p3), [[PTR_ADD48]](p5) :: (store 4 into %stack.0 + 184, align 256, addrspace 5) + ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 + ; CHECK: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) + ; CHECK: G_STORE [[UV47]](p3), [[PTR_ADD49]](p5) :: (store 4 into %stack.0 + 188, align 256, addrspace 5) + ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 + ; CHECK: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) + ; CHECK: G_STORE [[UV48]](p3), [[PTR_ADD50]](p5) :: (store 4 into %stack.0 + 192, align 256, addrspace 5) + ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 + ; CHECK: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) + ; CHECK: G_STORE [[UV49]](p3), [[PTR_ADD51]](p5) :: (store 4 into %stack.0 + 196, align 256, addrspace 5) + ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 + ; CHECK: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) + ; CHECK: G_STORE [[UV50]](p3), [[PTR_ADD52]](p5) :: (store 4 into %stack.0 + 200, align 256, addrspace 5) + ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 + ; CHECK: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) + ; CHECK: G_STORE [[UV51]](p3), [[PTR_ADD53]](p5) :: (store 4 into %stack.0 + 204, align 256, addrspace 5) + ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 + ; CHECK: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) + ; CHECK: G_STORE [[UV52]](p3), [[PTR_ADD54]](p5) :: (store 4 into %stack.0 + 208, align 256, addrspace 5) + ; CHECK: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 + ; CHECK: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) + ; CHECK: G_STORE [[UV53]](p3), [[PTR_ADD55]](p5) :: (store 4 into %stack.0 + 212, align 256, addrspace 5) + ; CHECK: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 + ; CHECK: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) + ; CHECK: G_STORE [[UV54]](p3), [[PTR_ADD56]](p5) :: (store 4 into %stack.0 + 216, align 256, addrspace 5) + ; CHECK: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 + ; CHECK: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) + ; CHECK: G_STORE [[UV55]](p3), [[PTR_ADD57]](p5) :: (store 4 into %stack.0 + 220, align 256, addrspace 5) + ; CHECK: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 + ; CHECK: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) + ; CHECK: G_STORE [[UV56]](p3), [[PTR_ADD58]](p5) :: (store 4 into %stack.0 + 224, align 256, addrspace 5) + ; CHECK: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 + ; CHECK: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) + ; CHECK: G_STORE [[UV57]](p3), [[PTR_ADD59]](p5) :: (store 4 into %stack.0 + 228, align 256, addrspace 5) + ; CHECK: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 + ; CHECK: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) + ; CHECK: G_STORE [[UV58]](p3), [[PTR_ADD60]](p5) :: (store 4 into %stack.0 + 232, align 256, addrspace 5) + ; CHECK: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 + ; CHECK: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) + ; CHECK: G_STORE [[UV59]](p3), [[PTR_ADD61]](p5) :: (store 4 into %stack.0 + 236, align 256, addrspace 5) + ; CHECK: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 + ; CHECK: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) + ; CHECK: G_STORE [[UV60]](p3), [[PTR_ADD62]](p5) :: (store 4 into %stack.0 + 240, align 256, addrspace 5) + ; CHECK: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 + ; CHECK: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) + ; CHECK: G_STORE [[UV61]](p3), [[PTR_ADD63]](p5) :: (store 4 into %stack.0 + 244, align 256, addrspace 5) + ; CHECK: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 + ; CHECK: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) + ; CHECK: G_STORE [[UV62]](p3), [[PTR_ADD64]](p5) :: (store 4 into %stack.0 + 248, align 256, addrspace 5) + ; CHECK: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 + ; CHECK: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) + ; CHECK: G_STORE [[UV63]](p3), [[PTR_ADD65]](p5) :: (store 4 into %stack.0 + 252, align 256, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD35]](p5) :: (load 4 from %stack.0 + 132, addrspace 5) + ; CHECK: S_ENDPGM 0, implicit [[LOAD4]](p3) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CONSTANT i32 33 + %2:_(<64 x p3>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %3:_(p3) = G_EXTRACT_VECTOR_ELT %2, %1 + S_ENDPGM 0, implicit %3 +... + --- name: extract_vector_elt_varidx_v64s32