forked from OSchip/llvm-project
R600/SI: Fix B64 VALU shifts on VI
SI only has standard versions. VI only has REV versions. Tested-by: Michel Dänzer <michel.daenzer@amd.com> llvm-svn: 228037
This commit is contained in:
parent
690c5baa6d
commit
707a6d0c20
|
@ -2046,6 +2046,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
|||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::S_LSHL_B64:
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_LSHLREV_B64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::S_ASHR_I64:
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_ASHRREV_I64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::S_LSHR_B64:
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_LSHRREV_B64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
|
||||
case AMDGPU::S_BFE_U64:
|
||||
case AMDGPU::S_BFM_B64:
|
||||
|
|
|
@ -802,6 +802,7 @@ def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
|
|||
}
|
||||
|
||||
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
|
||||
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
|
||||
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
|
||||
|
||||
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
|
||||
|
|
|
@ -1805,6 +1805,20 @@ defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
|
|||
|
||||
} // End SubtargetPredicate = isSICI
|
||||
|
||||
let SubtargetPredicate = isVI in {
|
||||
|
||||
defm V_LSHLREV_B64 : VOP3Inst <vop3<0, 0x28f>, "v_lshlrev_b64",
|
||||
VOP_I64_I32_I64
|
||||
>;
|
||||
defm V_LSHRREV_B64 : VOP3Inst <vop3<0, 0x290>, "v_lshrrev_b64",
|
||||
VOP_I64_I32_I64
|
||||
>;
|
||||
defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
|
||||
VOP_I64_I32_I64
|
||||
>;
|
||||
|
||||
} // End SubtargetPredicate = isVI
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Pseudo Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s
|
||||
|
||||
; FUNC-LABEL: {{^}}s_rotl_i64:
|
||||
; SI-DAG: s_lshl_b64
|
||||
; SI-DAG: s_sub_i32
|
||||
; SI-DAG: s_lshr_b64
|
||||
; SI: s_or_b64
|
||||
; SI: s_endpgm
|
||||
; BOTH-LABEL: {{^}}s_rotl_i64:
|
||||
; BOTH-DAG: s_lshl_b64
|
||||
; BOTH-DAG: s_sub_i32
|
||||
; BOTH-DAG: s_lshr_b64
|
||||
; BOTH: s_or_b64
|
||||
; BOTH: s_endpgm
|
||||
define void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
|
||||
entry:
|
||||
%0 = shl i64 %x, %y
|
||||
|
@ -17,13 +17,15 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_rotl_i64:
|
||||
; BOTH-LABEL: {{^}}v_rotl_i64:
|
||||
; SI-DAG: v_lshl_b64
|
||||
; SI-DAG: v_sub_i32
|
||||
; VI-DAG: v_lshlrev_b64
|
||||
; BOTH-DAG: v_sub_i32
|
||||
; SI: v_lshr_b64
|
||||
; SI: v_or_b32
|
||||
; SI: v_or_b32
|
||||
; SI: s_endpgm
|
||||
; VI: v_lshrrev_b64
|
||||
; BOTH: v_or_b32
|
||||
; BOTH: v_or_b32
|
||||
; BOTH: s_endpgm
|
||||
define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
|
||||
entry:
|
||||
%x = load i64 addrspace(1)* %xptr, align 8
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s
|
||||
|
||||
; FUNC-LABEL: {{^}}s_rotr_i64:
|
||||
; SI-DAG: s_sub_i32
|
||||
; SI-DAG: s_lshr_b64
|
||||
; SI-DAG: s_lshl_b64
|
||||
; SI: s_or_b64
|
||||
; BOTH-LABEL: {{^}}s_rotr_i64:
|
||||
; BOTH-DAG: s_sub_i32
|
||||
; BOTH-DAG: s_lshr_b64
|
||||
; BOTH-DAG: s_lshl_b64
|
||||
; BOTH: s_or_b64
|
||||
define void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
|
||||
entry:
|
||||
%tmp0 = sub i64 64, %y
|
||||
|
@ -16,12 +16,14 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_rotr_i64:
|
||||
; SI-DAG: v_sub_i32
|
||||
; BOTH-LABEL: {{^}}v_rotr_i64:
|
||||
; BOTH-DAG: v_sub_i32
|
||||
; SI-DAG: v_lshr_b64
|
||||
; SI-DAG: v_lshl_b64
|
||||
; SI: v_or_b32
|
||||
; SI: v_or_b32
|
||||
; VI-DAG: v_lshrrev_b64
|
||||
; VI-DAG: v_lshlrev_b64
|
||||
; BOTH: v_or_b32
|
||||
; BOTH: v_or_b32
|
||||
define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
|
||||
entry:
|
||||
%x = load i64 addrspace(1)* %xptr, align 8
|
||||
|
@ -34,7 +36,7 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_rotr_v2i64:
|
||||
; BOTH-LABEL: {{^}}s_rotr_v2i64:
|
||||
define void @s_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> %x, <2 x i64> %y) {
|
||||
entry:
|
||||
%tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
|
||||
|
@ -45,7 +47,7 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_rotr_v2i64:
|
||||
; BOTH-LABEL: {{^}}v_rotr_v2i64:
|
||||
define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
|
||||
entry:
|
||||
%x = load <2 x i64> addrspace(1)* %xptr, align 8
|
||||
|
|
|
@ -66,7 +66,7 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
|
|||
;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK: {{^}}shl_i64:
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
|
||||
|
@ -104,8 +104,8 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
|||
;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK: {{^}}shl_v2i64:
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
|
||||
|
@ -165,10 +165,10 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in
|
|||
;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK: {{^}}shl_v4i64:
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
|
||||
|
|
|
@ -85,7 +85,7 @@ entry:
|
|||
;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK-LABEL: {{^}}ashr_i64_2:
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
entry:
|
||||
|
@ -128,8 +128,8 @@ entry:
|
|||
;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK-LABEL: {{^}}ashr_v2i64:
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
|
||||
|
@ -197,10 +197,10 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
|
|||
;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK-LABEL: {{^}}ashr_v4i64:
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
|
||||
|
|
Loading…
Reference in New Issue