From 2bd8e74b94119b478ba36db336035234a9b834e6 Mon Sep 17 00:00:00 2001 From: Piotr Sobczak Date: Mon, 11 Jul 2022 21:24:56 +0200 Subject: [PATCH] [AMDGPU] Fix bitcast v4i64/v16i16 Fix a regression introduced in D128865. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D129375 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- llvm/lib/Target/AMDGPU/SIInstructions.td | 8 +++ llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll | 72 ++++++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 094d5cd58673..d16da2a8b86b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -352,7 +352,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // TODO: Generalize to more vector types. setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}, {MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8, - MVT::v4i16, MVT::v4f16, MVT::v16i16, MVT::v16f16}, + MVT::v4i16, MVT::v4f16}, Custom); // Deal with vec3 vector operations when widened to vec4. diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 8972bce30dc6..ce8c03bb8d64 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1449,6 +1449,14 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; // 512-bit bitcast def : BitConvert ; diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll index 568e66b5756e..4ae7193fea4f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll @@ -310,3 +310,75 @@ define amdgpu_kernel void @bitcast_f32_to_v1i32(i32 addrspace(1)* %out) { store i32 %v1, i32 addrspace(1)* %out ret void } + +; FUNC-LABEL: {{^}}bitcast_v4i64_to_v16i16: +define amdgpu_kernel void @bitcast_v4i64_to_v16i16(i32 %cond, <16 x i16> addrspace(1)* %out, <4 x i64> %value) { +entry: + %cmp0 = icmp eq i32 %cond, 0 + br i1 %cmp0, label %if, label %end + +if: + %phi_value = phi <4 x i64> [zeroinitializer, %entry], [%value, %if] + %cast = bitcast <4 x i64> %phi_value to <16 x i16> + %cmp1 = icmp eq i32 %cond, 1 + br i1 %cmp1, label %if, label %end + +end: + %phi_cast = phi <16 x i16> [zeroinitializer, %entry], [%cast, %if] + store <16 x i16> %phi_cast, <16 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}bitcast_v4f64_to_v16f16: +define amdgpu_kernel void @bitcast_v4f64_to_v16f16(i32 %cond, <16 x half> addrspace(1)* %out, <4 x double> %value) { +entry: + %cmp0 = icmp eq i32 %cond, 0 + br i1 %cmp0, label %if, label %end + +if: + %phi_value = phi <4 x double> [zeroinitializer, %entry], [%value, %if] + %cast = bitcast <4 x double> %phi_value to <16 x half> + %cmp1 = icmp eq i32 %cond, 1 + br i1 %cmp1, label %if, label %end + +end: + %phi_cast = phi <16 x half> [zeroinitializer, %entry], [%cast, %if] + store <16 x half> %phi_cast, <16 x half> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}bitcast_v16i16_to_v4i64: +define amdgpu_kernel void @bitcast_v16i16_to_v4i64(i32 %cond, <4 x i64> addrspace(1)* %out, <16 x i16> %value) { +entry: + %cmp0 = icmp eq i32 %cond, 0 + br i1 %cmp0, label %if, label %end + +if: + %phi_value = phi <16 x i16> [zeroinitializer, %entry], [%value, %if] + %cast = bitcast <16 x i16> %phi_value to <4 x i64> + %cmp1 = icmp eq i32 %cond, 1 + br i1 %cmp1, label %if, label %end + +end: + %phi_cast = phi <4 x i64> [zeroinitializer, %entry], [%cast, %if] + store <4 x i64> %phi_cast, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}bitcast_v16f16_to_v4f64: +define amdgpu_kernel void @bitcast_v16f16_to_v4f64(i32 %cond, <4 x double> addrspace(1)* %out, <16 x half> %value) { +entry: + %cmp0 = icmp eq i32 %cond, 0 + br i1 %cmp0, label %if, label %end + +if: + %phi_value = phi <16 x half> [zeroinitializer, %entry], [%value, %if] + %cast = bitcast <16 x half> %phi_value to <4 x double> + %cmp1 = icmp eq i32 %cond, 1 + br i1 %cmp1, label %if, label %end + +end: + %phi_cast = phi <4 x double> [zeroinitializer, %entry], [%cast, %if] + store <4 x double> %phi_cast, <4 x double> addrspace(1)* %out + ret void +}