forked from OSchip/llvm-project
[AMDGPU] Expand vector mulhu/mulhs
Differential revision: https://reviews.llvm.org/D26077 llvm-svn: 285684
This commit is contained in:
parent
d0a9d1499c
commit
8a89d3662a
|
@ -359,6 +359,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
|
||||
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
|
||||
setOperationAction(ISD::MUL, VT, Expand);
|
||||
setOperationAction(ISD::MULHU, VT, Expand);
|
||||
setOperationAction(ISD::MULHS, VT, Expand);
|
||||
setOperationAction(ISD::OR, VT, Expand);
|
||||
setOperationAction(ISD::SHL, VT, Expand);
|
||||
setOperationAction(ISD::SRA, VT, Expand);
|
||||
|
|
|
@ -156,3 +156,16 @@ define void @v_sdiv_i25(i32 addrspace(1)* %out, i25 addrspace(1)* %in) {
|
|||
; store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
; ret void
|
||||
; }
|
||||
|
||||
; FUNC-LABEL: @scalarize_mulhs_4xi32
|
||||
; SI: v_mul_hi_i32
|
||||
; SI: v_mul_hi_i32
|
||||
; SI: v_mul_hi_i32
|
||||
; SI: v_mul_hi_i32
|
||||
|
||||
define void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocapture readonly %in, <4 x i32> addrspace(1)* nocapture %out) {
|
||||
%1 = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
|
||||
%2 = sdiv <4 x i32> %1, <i32 53668, i32 53668, i32 53668, i32 53668>
|
||||
store <4 x i32> %2, <4 x i32> addrspace(1)* %out, align 16
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -145,3 +145,16 @@ define void @v_udiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* %in) {
|
|||
store i32 %result.ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @scalarize_mulhu_4xi32
|
||||
; SI: v_mul_hi_u32
|
||||
; SI: v_mul_hi_u32
|
||||
; SI: v_mul_hi_u32
|
||||
; SI: v_mul_hi_u32
|
||||
|
||||
define void @scalarize_mulhu_4xi32(<4 x i32> addrspace(1)* nocapture readonly %in, <4 x i32> addrspace(1)* nocapture %out) {
|
||||
%1 = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
|
||||
%2 = udiv <4 x i32> %1, <i32 53668, i32 53668, i32 53668, i32 53668>
|
||||
store <4 x i32> %2, <4 x i32> addrspace(1)* %out, align 16
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue