From 35de877e8c3b6285c03987c6e89985ee3cdd30c0 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 13 Nov 2018 20:26:27 +0000 Subject: [PATCH] Fixed DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT i1 handling Legalizer used to request an ext load from i8 to i1 when promoting vector element type to i8. Fixed. Differential Revision: https://reviews.llvm.org/D54440 llvm-svn: 346795 --- .../SelectionDAG/LegalizeVectorTypes.cpp | 9 ++++++ .../CodeGen/AMDGPU/extract_vector_dynelt.ll | 28 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 88abd84366a9..899a7be5d6e5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1934,6 +1934,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + + // FIXME: This is to handle i1 vectors with elements promoted to i8. + // i1 vector handling needs general improvement. + if (N->getValueType(0).bitsLT(EltVT)) { + SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0)); + } + return DAG.getExtLoad( ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll new file mode 100644 index 000000000000..b09bcad446be --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s + +; GCN-LABEL: {{^}}bit4_extelt: +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 +; GCN-DAG: buffer_store_byte [[ZERO]], +; GCN-DAG: buffer_store_byte [[ONE]], +; GCN-DAG: buffer_store_byte [[ZERO]], +; GCN-DAG: buffer_store_byte [[ONE]], +; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], +; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[LOAD]] +; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]] +define amdgpu_kernel void @bit4_extelt(i32 addrspace(1)* %out, i32 %sel) { +entry: + %ext = extractelement <4 x i1> , i32 %sel + %zext = zext i1 %ext to i32 + store i32 %zext, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}bit128_extelt: +define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) { +entry: + %ext = extractelement <128 x i1> , i32 %sel + %zext = zext i1 %ext to i32 + store i32 %zext, i32 addrspace(1)* %out + ret void +}