forked from OSchip/llvm-project
[AMDGPU] Fix bitcast v4i64/v16i16
Fix a regression introduced in D128865. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D129375
This commit is contained in:
parent
f3939dc509
commit
2bd8e74b94
|
@ -352,7 +352,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||||
// TODO: Generalize to more vector types.
|
// TODO: Generalize to more vector types.
|
||||||
setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT},
|
setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT},
|
||||||
{MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
|
{MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
|
||||||
MVT::v4i16, MVT::v4f16, MVT::v16i16, MVT::v16f16},
|
MVT::v4i16, MVT::v4f16},
|
||||||
Custom);
|
Custom);
|
||||||
|
|
||||||
// Deal with vec3 vector operations when widened to vec4.
|
// Deal with vec3 vector operations when widened to vec4.
|
||||||
|
|
|
@ -1449,6 +1449,14 @@ def : BitConvert <v8i32, v16f16, VReg_256>;
|
||||||
def : BitConvert <v8i32, v16i16, VReg_256>;
|
def : BitConvert <v8i32, v16i16, VReg_256>;
|
||||||
def : BitConvert <v8f32, v16f16, VReg_256>;
|
def : BitConvert <v8f32, v16f16, VReg_256>;
|
||||||
def : BitConvert <v8f32, v16i16, VReg_256>;
|
def : BitConvert <v8f32, v16i16, VReg_256>;
|
||||||
|
def : BitConvert <v16f16, v4i64, VReg_256>;
|
||||||
|
def : BitConvert <v16i16, v4i64, VReg_256>;
|
||||||
|
def : BitConvert <v16f16, v4f64, VReg_256>;
|
||||||
|
def : BitConvert <v16i16, v4f64, VReg_256>;
|
||||||
|
def : BitConvert <v4i64, v16f16, VReg_256>;
|
||||||
|
def : BitConvert <v4i64, v16i16, VReg_256>;
|
||||||
|
def : BitConvert <v4f64, v16f16, VReg_256>;
|
||||||
|
def : BitConvert <v4f64, v16i16, VReg_256>;
|
||||||
|
|
||||||
// 512-bit bitcast
|
// 512-bit bitcast
|
||||||
def : BitConvert <v16i32, v16f32, VReg_512>;
|
def : BitConvert <v16i32, v16f32, VReg_512>;
|
||||||
|
|
|
@ -310,3 +310,75 @@ define amdgpu_kernel void @bitcast_f32_to_v1i32(i32 addrspace(1)* %out) {
|
||||||
store i32 %v1, i32 addrspace(1)* %out
|
store i32 %v1, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}bitcast_v4i64_to_v16i16:
|
||||||
|
define amdgpu_kernel void @bitcast_v4i64_to_v16i16(i32 %cond, <16 x i16> addrspace(1)* %out, <4 x i64> %value) {
|
||||||
|
entry:
|
||||||
|
%cmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %cmp0, label %if, label %end
|
||||||
|
|
||||||
|
if:
|
||||||
|
%phi_value = phi <4 x i64> [zeroinitializer, %entry], [%value, %if]
|
||||||
|
%cast = bitcast <4 x i64> %phi_value to <16 x i16>
|
||||||
|
%cmp1 = icmp eq i32 %cond, 1
|
||||||
|
br i1 %cmp1, label %if, label %end
|
||||||
|
|
||||||
|
end:
|
||||||
|
%phi_cast = phi <16 x i16> [zeroinitializer, %entry], [%cast, %if]
|
||||||
|
store <16 x i16> %phi_cast, <16 x i16> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}bitcast_v4f64_to_v16f16:
|
||||||
|
define amdgpu_kernel void @bitcast_v4f64_to_v16f16(i32 %cond, <16 x half> addrspace(1)* %out, <4 x double> %value) {
|
||||||
|
entry:
|
||||||
|
%cmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %cmp0, label %if, label %end
|
||||||
|
|
||||||
|
if:
|
||||||
|
%phi_value = phi <4 x double> [zeroinitializer, %entry], [%value, %if]
|
||||||
|
%cast = bitcast <4 x double> %phi_value to <16 x half>
|
||||||
|
%cmp1 = icmp eq i32 %cond, 1
|
||||||
|
br i1 %cmp1, label %if, label %end
|
||||||
|
|
||||||
|
end:
|
||||||
|
%phi_cast = phi <16 x half> [zeroinitializer, %entry], [%cast, %if]
|
||||||
|
store <16 x half> %phi_cast, <16 x half> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}bitcast_v16i16_to_v4i64:
|
||||||
|
define amdgpu_kernel void @bitcast_v16i16_to_v4i64(i32 %cond, <4 x i64> addrspace(1)* %out, <16 x i16> %value) {
|
||||||
|
entry:
|
||||||
|
%cmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %cmp0, label %if, label %end
|
||||||
|
|
||||||
|
if:
|
||||||
|
%phi_value = phi <16 x i16> [zeroinitializer, %entry], [%value, %if]
|
||||||
|
%cast = bitcast <16 x i16> %phi_value to <4 x i64>
|
||||||
|
%cmp1 = icmp eq i32 %cond, 1
|
||||||
|
br i1 %cmp1, label %if, label %end
|
||||||
|
|
||||||
|
end:
|
||||||
|
%phi_cast = phi <4 x i64> [zeroinitializer, %entry], [%cast, %if]
|
||||||
|
store <4 x i64> %phi_cast, <4 x i64> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}bitcast_v16f16_to_v4f64:
|
||||||
|
define amdgpu_kernel void @bitcast_v16f16_to_v4f64(i32 %cond, <4 x double> addrspace(1)* %out, <16 x half> %value) {
|
||||||
|
entry:
|
||||||
|
%cmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %cmp0, label %if, label %end
|
||||||
|
|
||||||
|
if:
|
||||||
|
%phi_value = phi <16 x half> [zeroinitializer, %entry], [%value, %if]
|
||||||
|
%cast = bitcast <16 x half> %phi_value to <4 x double>
|
||||||
|
%cmp1 = icmp eq i32 %cond, 1
|
||||||
|
br i1 %cmp1, label %if, label %end
|
||||||
|
|
||||||
|
end:
|
||||||
|
%phi_cast = phi <4 x double> [zeroinitializer, %entry], [%cast, %if]
|
||||||
|
store <4 x double> %phi_cast, <4 x double> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue