[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI -check-prefix=GFX89 -check-prefix=GCN %s
|
|
|
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=GCN %s
|
|
|
|
|
|
|
|
# GFX89-LABEL: {{^}}name: vop1_instructions
|
|
|
|
|
|
|
|
# GFX89: %{{[0-9]+}} = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
|
|
|
|
# GFX89: %{{[0-9]+}} = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
|
|
|
|
# GFX89: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
|
|
|
|
# GFX89: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
|
|
|
|
# GFX89: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
|
|
|
|
|
|
|
|
|
|
|
|
# GFX89: %{{[0-9]+}} = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 6, 0, 5, implicit %exec
|
|
|
|
# GFX89: %{{[0-9]+}} = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
|
|
|
|
# GFX89: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
|
|
|
|
# GFX89: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
|
|
|
|
# GFX89: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
|
|
|
|
|
|
|
|
|
|
|
|
# VI: %{{[0-9]+}} = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_CVT_F32_I32_e64 %{{[0-9]+}}, 0, 1, implicit %exec
|
|
|
|
|
|
|
|
# GFX9: %{{[0-9]+}} = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit %exec
|
|
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
name: vop1_instructions
|
|
|
|
tracksRegLiveness: true
|
|
|
|
registers:
|
|
|
|
- { id: 0, class: vreg_64 }
|
|
|
|
- { id: 1, class: vreg_64 }
|
|
|
|
- { id: 2, class: sreg_64 }
|
|
|
|
- { id: 3, class: vgpr_32 }
|
|
|
|
- { id: 4, class: sreg_32_xm0 }
|
|
|
|
- { id: 5, class: sreg_32_xm0 }
|
|
|
|
- { id: 6, class: sreg_32_xm0 }
|
|
|
|
- { id: 7, class: sreg_32_xm0 }
|
|
|
|
- { id: 8, class: sreg_32 }
|
|
|
|
- { id: 9, class: vgpr_32 }
|
|
|
|
- { id: 10, class: vgpr_32 }
|
|
|
|
- { id: 11, class: vgpr_32 }
|
|
|
|
- { id: 12, class: vgpr_32 }
|
|
|
|
- { id: 13, class: vgpr_32 }
|
|
|
|
- { id: 14, class: vgpr_32 }
|
|
|
|
- { id: 15, class: vgpr_32 }
|
|
|
|
- { id: 16, class: vgpr_32 }
|
|
|
|
- { id: 17, class: vgpr_32 }
|
|
|
|
- { id: 18, class: vgpr_32 }
|
|
|
|
- { id: 19, class: vgpr_32 }
|
|
|
|
- { id: 20, class: vgpr_32 }
|
|
|
|
- { id: 21, class: vgpr_32 }
|
|
|
|
- { id: 22, class: vgpr_32 }
|
|
|
|
- { id: 23, class: vgpr_32 }
|
|
|
|
- { id: 24, class: vgpr_32 }
|
|
|
|
- { id: 25, class: vgpr_32 }
|
|
|
|
- { id: 26, class: vgpr_32 }
|
|
|
|
- { id: 27, class: vgpr_32 }
|
|
|
|
- { id: 28, class: vgpr_32 }
|
|
|
|
- { id: 29, class: vgpr_32 }
|
|
|
|
- { id: 30, class: vgpr_32 }
|
|
|
|
- { id: 31, class: vgpr_32 }
|
|
|
|
- { id: 32, class: vgpr_32 }
|
|
|
|
- { id: 33, class: vgpr_32 }
|
|
|
|
- { id: 34, class: vgpr_32 }
|
|
|
|
- { id: 35, class: vgpr_32 }
|
|
|
|
- { id: 36, class: vgpr_32 }
|
|
|
|
- { id: 37, class: vgpr_32 }
|
|
|
|
- { id: 38, class: vgpr_32 }
|
|
|
|
- { id: 39, class: vgpr_32 }
|
|
|
|
- { id: 40, class: vgpr_32 }
|
|
|
|
- { id: 41, class: vgpr_32 }
|
|
|
|
- { id: 42, class: vgpr_32 }
|
|
|
|
- { id: 43, class: vgpr_32 }
|
|
|
|
- { id: 44, class: vgpr_32 }
|
|
|
|
- { id: 45, class: vgpr_32 }
|
|
|
|
- { id: 46, class: vgpr_32 }
|
|
|
|
- { id: 47, class: vgpr_32 }
|
|
|
|
- { id: 48, class: vgpr_32 }
|
|
|
|
- { id: 100, class: vgpr_32 }
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31
|
|
|
|
|
|
|
|
%2 = COPY %sgpr30_sgpr31
|
|
|
|
%1 = COPY %vgpr2_vgpr3
|
|
|
|
%0 = COPY %vgpr0_vgpr1
|
|
|
|
%3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
|
|
|
|
|
|
|
|
%5 = S_MOV_B32 65535
|
|
|
|
%6 = S_MOV_B32 65535
|
|
|
|
|
|
|
|
%10 = V_LSHRREV_B32_e64 16, %3, implicit %exec
|
|
|
|
%11 = V_MOV_B32_e32 %10, implicit %exec
|
|
|
|
%12 = V_LSHLREV_B32_e64 16, %11, implicit %exec
|
|
|
|
%14 = V_FRACT_F32_e32 123, implicit %exec
|
|
|
|
%15 = V_LSHLREV_B32_e64 16, %14, implicit %exec
|
|
|
|
%16 = V_LSHRREV_B32_e64 16, %15, implicit %exec
|
|
|
|
%17 = V_SIN_F32_e32 %16, implicit %exec
|
|
|
|
%18 = V_LSHLREV_B32_e64 16, %17, implicit %exec
|
|
|
|
%19 = V_LSHRREV_B32_e64 16, %18, implicit %exec
|
|
|
|
%20 = V_CVT_U32_F32_e32 %19, implicit %exec
|
|
|
|
%21 = V_LSHLREV_B32_e64 16, %20, implicit %exec
|
|
|
|
%23 = V_CVT_F32_I32_e32 123, implicit %exec
|
|
|
|
%24 = V_LSHLREV_B32_e64 16, %23, implicit %exec
|
|
|
|
|
|
|
|
%25 = V_LSHRREV_B32_e64 16, %3, implicit %exec
|
|
|
|
%26 = V_MOV_B32_e64 %25, implicit %exec
|
|
|
|
%26 = V_LSHLREV_B32_e64 16, %26, implicit %exec
|
|
|
|
%27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit %exec
|
|
|
|
%28 = V_LSHLREV_B32_e64 16, %27, implicit %exec
|
|
|
|
%29 = V_LSHRREV_B32_e64 16, %28, implicit %exec
|
|
|
|
%30 = V_SIN_F32_e64 0, %29, 0, 0, implicit %exec
|
|
|
|
%31 = V_LSHLREV_B32_e64 16, %30, implicit %exec
|
|
|
|
%32 = V_LSHRREV_B32_e64 16, %31, implicit %exec
|
|
|
|
%33 = V_CVT_U32_F32_e64 0, %32, 0, 0, implicit %exec
|
|
|
|
%34 = V_LSHLREV_B32_e64 16, %33, implicit %exec
|
|
|
|
%35 = V_CVT_F32_I32_e64 %6, 0, 0, implicit %exec
|
|
|
|
%36 = V_LSHLREV_B32_e64 16, %35, implicit %exec
|
|
|
|
|
|
|
|
|
|
|
|
%37 = V_LSHRREV_B32_e64 16, %36, implicit %exec
|
|
|
|
%38 = V_FRACT_F32_e64 1, %37, 0, 0, implicit %exec
|
|
|
|
%39 = V_LSHLREV_B32_e64 16, %38, implicit %exec
|
|
|
|
%40 = V_LSHRREV_B32_e64 16, %39, implicit %exec
|
|
|
|
%41 = V_SIN_F32_e64 0, %40, 1, 0, implicit %exec
|
|
|
|
%42 = V_LSHLREV_B32_e64 16, %41, implicit %exec
|
|
|
|
%43 = V_LSHRREV_B32_e64 16, %42, implicit %exec
|
|
|
|
%44 = V_CVT_U32_F32_e64 1, %43, 0, 0, implicit %exec
|
|
|
|
%45 = V_LSHLREV_B32_e64 16, %44, implicit %exec
|
|
|
|
%46 = V_LSHRREV_B32_e64 16, %45, implicit %exec
|
|
|
|
%47 = V_CVT_F32_I32_e64 %46, 0, 1, implicit %exec
|
|
|
|
%48 = V_LSHLREV_B32_e64 16, %47, implicit %exec
|
|
|
|
|
|
|
|
|
|
|
|
%100 = V_MOV_B32_e32 %48, implicit %exec
|
|
|
|
|
|
|
|
FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
|
|
|
|
%sgpr30_sgpr31 = COPY %2
|
|
|
|
S_SETPC_B64_return %sgpr30_sgpr31
|
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# GCN-LABEL: {{^}}name: vop2_instructions
|
|
|
|
|
|
|
|
|
|
|
|
# VI: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
|
|
|
|
|
|
|
|
# GFX9: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_MAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_MAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec
|
|
|
|
|
|
|
|
|
|
|
|
# VI: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
|
|
|
|
|
|
|
|
# GFX9: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_MAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_MAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec
|
|
|
|
|
|
|
|
|
|
|
|
# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, %{{[0-9]+}}, 1, 0, 6, 0, 6, 1, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec
|
|
|
|
|
|
|
|
# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_MAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec
|
|
|
|
|
|
|
|
name: vop2_instructions
|
|
|
|
tracksRegLiveness: true
|
|
|
|
registers:
|
|
|
|
- { id: 0, class: vreg_64 }
|
|
|
|
- { id: 1, class: vreg_64 }
|
|
|
|
- { id: 2, class: sreg_64 }
|
|
|
|
- { id: 3, class: vgpr_32 }
|
|
|
|
- { id: 4, class: sreg_32_xm0 }
|
|
|
|
- { id: 5, class: sreg_32_xm0 }
|
|
|
|
- { id: 6, class: sreg_32_xm0 }
|
|
|
|
- { id: 7, class: sreg_32_xm0 }
|
|
|
|
- { id: 8, class: sreg_32 }
|
|
|
|
- { id: 9, class: vgpr_32 }
|
|
|
|
- { id: 10, class: vgpr_32 }
|
|
|
|
- { id: 11, class: vgpr_32 }
|
|
|
|
- { id: 12, class: vgpr_32 }
|
|
|
|
- { id: 13, class: vgpr_32 }
|
|
|
|
- { id: 14, class: vgpr_32 }
|
|
|
|
- { id: 15, class: vgpr_32 }
|
|
|
|
- { id: 16, class: vgpr_32 }
|
|
|
|
- { id: 17, class: vgpr_32 }
|
|
|
|
- { id: 18, class: vgpr_32 }
|
|
|
|
- { id: 19, class: vgpr_32 }
|
|
|
|
- { id: 20, class: vgpr_32 }
|
|
|
|
- { id: 21, class: vgpr_32 }
|
|
|
|
- { id: 22, class: vgpr_32 }
|
|
|
|
- { id: 23, class: vgpr_32 }
|
|
|
|
- { id: 24, class: vgpr_32 }
|
|
|
|
- { id: 25, class: vgpr_32 }
|
|
|
|
- { id: 26, class: vgpr_32 }
|
|
|
|
- { id: 27, class: vgpr_32 }
|
|
|
|
- { id: 28, class: vgpr_32 }
|
|
|
|
- { id: 29, class: vgpr_32 }
|
|
|
|
- { id: 30, class: vgpr_32 }
|
|
|
|
- { id: 31, class: vgpr_32 }
|
|
|
|
- { id: 32, class: vgpr_32 }
|
|
|
|
- { id: 33, class: vgpr_32 }
|
|
|
|
- { id: 34, class: vgpr_32 }
|
|
|
|
- { id: 35, class: vgpr_32 }
|
|
|
|
- { id: 36, class: vgpr_32 }
|
|
|
|
- { id: 37, class: vgpr_32 }
|
|
|
|
- { id: 38, class: vgpr_32 }
|
|
|
|
- { id: 39, class: vgpr_32 }
|
|
|
|
- { id: 40, class: vgpr_32 }
|
|
|
|
- { id: 41, class: vgpr_32 }
|
|
|
|
- { id: 42, class: vgpr_32 }
|
|
|
|
- { id: 43, class: vgpr_32 }
|
|
|
|
- { id: 44, class: vgpr_32 }
|
|
|
|
- { id: 45, class: vgpr_32 }
|
|
|
|
- { id: 46, class: vgpr_32 }
|
|
|
|
- { id: 47, class: vgpr_32 }
|
|
|
|
- { id: 48, class: vgpr_32 }
|
|
|
|
- { id: 49, class: vgpr_32 }
|
|
|
|
- { id: 50, class: vgpr_32 }
|
|
|
|
- { id: 51, class: vgpr_32 }
|
|
|
|
- { id: 52, class: vgpr_32 }
|
|
|
|
- { id: 53, class: vgpr_32 }
|
|
|
|
- { id: 54, class: vgpr_32 }
|
|
|
|
- { id: 55, class: vgpr_32 }
|
|
|
|
- { id: 56, class: vgpr_32 }
|
|
|
|
- { id: 57, class: vgpr_32 }
|
|
|
|
- { id: 58, class: vgpr_32 }
|
|
|
|
- { id: 59, class: vgpr_32 }
|
|
|
|
- { id: 60, class: vgpr_32 }
|
|
|
|
- { id: 100, class: vgpr_32 }
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31
|
|
|
|
|
|
|
|
%2 = COPY %sgpr30_sgpr31
|
|
|
|
%1 = COPY %vgpr2_vgpr3
|
|
|
|
%0 = COPY %vgpr0_vgpr1
|
|
|
|
%3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
|
|
|
|
|
|
|
|
%5 = S_MOV_B32 65535
|
|
|
|
%6 = S_MOV_B32 65535
|
|
|
|
|
|
|
|
%11 = V_LSHRREV_B32_e64 16, %3, implicit %exec
|
|
|
|
%12 = V_AND_B32_e32 %6, %11, implicit %exec
|
|
|
|
%13 = V_LSHLREV_B32_e64 16, %12, implicit %exec
|
|
|
|
%14 = V_LSHRREV_B32_e64 16, %13, implicit %exec
|
|
|
|
%15 = V_BFE_U32 %13, 8, 8, implicit %exec
|
|
|
|
%16 = V_ADD_F32_e32 %14, %15, implicit %exec
|
|
|
|
%17 = V_LSHLREV_B32_e64 16, %16, implicit %exec
|
|
|
|
%18 = V_LSHRREV_B32_e64 16, %17, implicit %exec
|
|
|
|
%19 = V_BFE_U32 %17, 8, 8, implicit %exec
|
|
|
|
%20 = V_SUB_F16_e32 %18, %19, implicit %exec
|
|
|
|
%21 = V_LSHLREV_B32_e64 16, %20, implicit %exec
|
|
|
|
%22 = V_BFE_U32 %20, 8, 8, implicit %exec
|
|
|
|
%23 = V_MAC_F32_e32 %21, %22, %22, implicit %exec
|
|
|
|
%24 = V_LSHLREV_B32_e64 16, %23, implicit %exec
|
|
|
|
%25 = V_LSHRREV_B32_e64 16, %24, implicit %exec
|
|
|
|
%26 = V_BFE_U32 %24, 8, 8, implicit %exec
|
|
|
|
%27 = V_MAC_F16_e32 %25, %26, %26, implicit %exec
|
|
|
|
%28 = V_LSHLREV_B32_e64 16, %27, implicit %exec
|
|
|
|
|
|
|
|
%29 = V_LSHRREV_B32_e64 16, %28, implicit %exec
|
|
|
|
%30 = V_AND_B32_e64 23, %29, implicit %exec
|
|
|
|
%31 = V_LSHLREV_B32_e64 16, %30, implicit %exec
|
|
|
|
%32 = V_LSHRREV_B32_e64 16, %31, implicit %exec
|
|
|
|
%33 = V_BFE_U32 %31, 8, 8, implicit %exec
|
|
|
|
%34 = V_ADD_F32_e64 0, %32, 0, %33, 0, 0, implicit %exec
|
|
|
|
%35 = V_LSHLREV_B32_e64 16, %34, implicit %exec
|
|
|
|
%37 = V_BFE_U32 %35, 8, 8, implicit %exec
|
|
|
|
%38 = V_SUB_F16_e64 0, 23, 0, %37, 0, 0, implicit %exec
|
|
|
|
%39 = V_LSHLREV_B32_e64 16, %38, implicit %exec
|
|
|
|
%40 = V_BFE_U32 %39, 8, 8, implicit %exec
|
|
|
|
%41 = V_MAC_F32_e64 0, 23, 0, %40, 0, %40, 0, 0, implicit %exec
|
|
|
|
%42 = V_LSHLREV_B32_e64 16, %41, implicit %exec
|
|
|
|
%43 = V_LSHRREV_B32_e64 16, %42, implicit %exec
|
|
|
|
%44 = V_BFE_U32 %42, 8, 8, implicit %exec
|
|
|
|
%45 = V_MAC_F16_e64 0, %43, 0, %44, 0, %44, 0, 0, implicit %exec
|
|
|
|
%46 = V_LSHLREV_B32_e64 16, %45, implicit %exec
|
|
|
|
|
|
|
|
%47 = V_LSHRREV_B32_e64 16, %46, implicit %exec
|
|
|
|
%48 = V_BFE_U32 %46, 8, 8, implicit %exec
|
|
|
|
%49 = V_ADD_F32_e64 0, %47, 1, %48, 0, 0, implicit %exec
|
|
|
|
%50 = V_LSHLREV_B32_e64 16, %49, implicit %exec
|
|
|
|
%51 = V_BFE_U32 %50, 8, 8, implicit %exec
|
|
|
|
%52 = V_SUB_F16_e64 1, 23, 1, %51, 0, 0, implicit %exec
|
|
|
|
%53 = V_LSHLREV_B32_e64 16, %52, implicit %exec
|
|
|
|
%54 = V_BFE_U32 %53, 8, 8, implicit %exec
|
|
|
|
%55 = V_MAC_F32_e64 1, 23, 1, %54, 1, %54, 1, 0, implicit %exec
|
|
|
|
%56 = V_LSHLREV_B32_e64 16, %55, implicit %exec
|
|
|
|
%57 = V_LSHRREV_B32_e64 16, %56, implicit %exec
|
|
|
|
%58 = V_BFE_U32 %56, 8, 8, implicit %exec
|
|
|
|
%59 = V_MAC_F16_e64 1, %57, 1, %58, 1, %58, 0, 2, implicit %exec
|
|
|
|
%60 = V_LSHLREV_B32_e64 16, %59, implicit %exec
|
|
|
|
|
|
|
|
%100 = V_MOV_B32_e32 %60, implicit %exec
|
|
|
|
|
|
|
|
FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
|
|
|
|
%sgpr30_sgpr31 = COPY %2
|
|
|
|
S_SETPC_B64_return %sgpr30_sgpr31
|
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
|
|
|
|
# GCN-LABEL: {{^}}name: vopc_instructions
|
|
|
|
|
|
|
|
# GFX89: %{{[0-9]+}} = V_MOV_B32_e32 123, implicit %exec
|
|
|
|
# GFX89: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
|
|
|
|
# GFX89: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
# GFX89: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
|
|
|
|
# GFX89: %vcc = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
|
|
|
|
|
|
|
|
# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
# VI: %{{[0-9]+}} = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, implicit-def %exec, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
# VI: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %3, 0, 6, 4, implicit-def %vcc, implicit %exec
|
|
|
|
# VI: %{{[0-9]+}} = V_CMPX_EQ_I32_e64 23, killed %{{[0-9]+}}, implicit-def %exec, implicit %exec
|
|
|
|
|
|
|
|
# GFX9: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_MOV_B32_e32 23, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
# GFX9: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_MOV_B32_e32 23, implicit %exec
|
|
|
|
# GFX9: %{{[0-9]+}} = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
|
|
|
|
|
|
|
|
# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
# VI: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
# VI: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
|
2017-07-18 22:23:26 +08:00
|
|
|
# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, implicit %exec
|
|
|
|
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
# GFX9: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, implicit-def %exec, implicit %exec
|
|
|
|
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
|
|
|
|
|
|
|
|
name: vopc_instructions
|
|
|
|
tracksRegLiveness: true
|
|
|
|
registers:
|
|
|
|
- { id: 0, class: vreg_64 }
|
|
|
|
- { id: 1, class: vreg_64 }
|
|
|
|
- { id: 2, class: sreg_64 }
|
|
|
|
- { id: 3, class: vgpr_32 }
|
|
|
|
- { id: 4, class: sreg_32_xm0 }
|
|
|
|
- { id: 5, class: sreg_32_xm0 }
|
|
|
|
- { id: 6, class: sreg_32_xm0 }
|
|
|
|
- { id: 7, class: sreg_32_xm0 }
|
|
|
|
- { id: 8, class: sreg_32 }
|
|
|
|
- { id: 9, class: vgpr_32 }
|
|
|
|
- { id: 10, class: vgpr_32 }
|
|
|
|
- { id: 11, class: vgpr_32 }
|
|
|
|
- { id: 12, class: vgpr_32 }
|
|
|
|
- { id: 13, class: vgpr_32 }
|
|
|
|
- { id: 14, class: vgpr_32 }
|
|
|
|
- { id: 15, class: vgpr_32 }
|
|
|
|
- { id: 16, class: vgpr_32 }
|
|
|
|
- { id: 17, class: vgpr_32 }
|
|
|
|
- { id: 18, class: sreg_64 }
|
|
|
|
- { id: 19, class: sreg_64 }
|
|
|
|
- { id: 20, class: vgpr_32 }
|
|
|
|
- { id: 21, class: vgpr_32 }
|
|
|
|
- { id: 22, class: vgpr_32 }
|
|
|
|
- { id: 23, class: vgpr_32 }
|
|
|
|
- { id: 24, class: vgpr_32 }
|
|
|
|
- { id: 25, class: vgpr_32 }
|
|
|
|
- { id: 26, class: vgpr_32 }
|
|
|
|
- { id: 27, class: vgpr_32 }
|
|
|
|
- { id: 100, class: vgpr_32 }
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31
|
|
|
|
|
|
|
|
%2 = COPY %sgpr30_sgpr31
|
|
|
|
%1 = COPY %vgpr2_vgpr3
|
|
|
|
%0 = COPY %vgpr0_vgpr1
|
|
|
|
%3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
|
|
|
|
|
|
|
|
%5 = S_MOV_B32 65535
|
|
|
|
%6 = S_MOV_B32 65535
|
|
|
|
|
|
|
|
%10 = V_AND_B32_e64 %5, %3, implicit %exec
|
|
|
|
V_CMP_EQ_F32_e32 123, killed %10, implicit-def %vcc, implicit %exec
|
|
|
|
%11 = V_AND_B32_e64 %5, %3, implicit %exec
|
|
|
|
V_CMPX_GT_F32_e32 123, killed %11, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
%12 = V_AND_B32_e64 %5, %3, implicit %exec
|
|
|
|
V_CMP_LT_I32_e32 123, killed %12, implicit-def %vcc, implicit %exec
|
|
|
|
%13 = V_AND_B32_e64 %5, %3, implicit %exec
|
|
|
|
V_CMPX_EQ_I32_e32 123, killed %13, implicit-def %vcc, implicit-def %exec, implicit %exec
|
|
|
|
|
|
|
|
%14 = V_AND_B32_e64 %5, %3, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %14, 0, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
%15 = V_AND_B32_e64 %5, %3, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
%18 = V_CMPX_GT_F32_e64 0, 23, 0, killed %15, 0, implicit-def %exec, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
%16 = V_AND_B32_e64 %5, %3, implicit %exec
|
|
|
|
%vcc = V_CMP_LT_I32_e64 %6, killed %16, implicit %exec
|
|
|
|
%17 = V_AND_B32_e64 %5, %3, implicit %exec
|
|
|
|
%19 = V_CMPX_EQ_I32_e64 23, killed %17, implicit-def %exec, implicit %exec
|
|
|
|
|
|
|
|
%20 = V_AND_B32_e64 %5, %3, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %20, 1, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
%21 = V_AND_B32_e64 %5, %3, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
%vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %21, 0, implicit-def %exec, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
%23 = V_AND_B32_e64 %5, %3, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %23, 1, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
%24 = V_AND_B32_e64 %5, %3, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
%vcc = V_CMPX_GT_F32_e64 1, 23, 0, killed %24, 0, implicit-def %exec, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
%25 = V_AND_B32_e64 %5, %3, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
%vcc = V_CMPX_GT_F32_e64 0, 23, 1, killed %25, 0, implicit-def %exec, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
%26 = V_AND_B32_e64 %5, %3, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
%vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %26, 0, implicit-def %exec, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
%27 = V_AND_B32_e64 %5, %3, implicit %exec
|
2017-07-18 22:23:26 +08:00
|
|
|
%vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %27, 1, implicit-def %exec, implicit %exec
|
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
2017-06-27 23:02:23 +08:00
|
|
|
|
|
|
|
|
|
|
|
%100 = V_MOV_B32_e32 %vcc_lo, implicit %exec
|
|
|
|
|
|
|
|
FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
|
|
|
|
%sgpr30_sgpr31 = COPY %2
|
|
|
|
S_SETPC_B64_return %sgpr30_sgpr31
|