forked from OSchip/llvm-project
473 lines
18 KiB
Plaintext
473 lines
18 KiB
Plaintext
|
# RUN: llc -march=amdgcn -mcpu=tonga -run-pass=gcn-dpp-combine -o - %s | FileCheck %s
|
||
|
|
||
|
---
|
||
|
# old is undefined: only combine when masks are fully enabled and
|
||
|
# bound_ctrl:0 is set, otherwise the result of DPP VALU op can be undefined.
|
||
|
# CHECK-LABEL: name: old_is_undef
|
||
|
# CHECK: %2:vgpr_32 = IMPLICIT_DEF
|
||
|
# VOP2:
|
||
|
# CHECK: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec
|
||
|
# CHECK: %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
|
||
|
# CHECK: %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
|
||
|
# CHECK: %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
|
||
|
# VOP1:
|
||
|
# CHECK: %12:vgpr_32 = V_NOT_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
|
||
|
# CHECK: %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
|
||
|
# CHECK: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
|
||
|
# CHECK: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
|
||
|
name: old_is_undef
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
%0:vgpr_32 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
%2:vgpr_32 = IMPLICIT_DEF
|
||
|
|
||
|
; VOP2
|
||
|
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
|
||
|
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
|
||
|
|
||
|
%5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
|
||
|
%6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
|
||
|
|
||
|
%7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
|
||
|
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
|
||
|
|
||
|
%9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
|
||
|
|
||
|
; VOP1
|
||
|
%11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
|
||
|
%12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
|
||
|
|
||
|
%13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
|
||
|
%14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
|
||
|
|
||
|
%15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
|
||
|
%16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
|
||
|
|
||
|
%17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
|
||
|
...
|
||
|
|
||
|
# old is zero cases:
|
||
|
|
||
|
# CHECK-LABEL: name: old_is_0
|
||
|
|
||
|
# VOP2:
|
||
|
# case 1: old is zero, masks are fully enabled, bound_ctrl:0 is on:
|
||
|
# the DPP mov result would be either zero ({src lane disabled}|{src lane is
|
||
|
# out of range}) or active src lane result - can combine with old = undef.
|
||
|
# undef is preffered as it makes life easier for the regalloc.
|
||
|
# CHECK: [[U1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||
|
# CHECK: %4:vgpr_32 = V_ADD_U32_dpp [[U1]], %0, %1, 1, 15, 15, 1, implicit $exec
|
||
|
|
||
|
# case 2: old is zero, masks are fully enabled, bound_ctrl:0 is off:
|
||
|
# as the DPP mov old is zero this case is no different from case 1 - combine it
|
||
|
# setting bound_ctrl0 on for the combined DPP VALU op to make old undefined
|
||
|
# CHECK: [[U2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||
|
# CHECK: %6:vgpr_32 = V_ADD_U32_dpp [[U2]], %0, %1, 1, 15, 15, 1, implicit $exec
|
||
|
|
||
|
# case 3: masks are partialy disabled, bound_ctrl:0 is on:
|
||
|
# the DPP mov result would be either zero ({src lane disabled}|{src lane is
|
||
|
# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or
|
||
|
# active src lane result - can combine with old = src1 of the VALU op.
|
||
|
# The VALU op should have the same masks as DPP mov as they select lanes
|
||
|
# with identity value.
|
||
|
# Special case: the bound_ctrl for the combined DPP VALU op isn't important
|
||
|
# here but let's make it off to keep the combiner's logic simpler.
|
||
|
# CHECK: %8:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
|
||
|
|
||
|
# case 4: masks are partialy disabled, bound_ctrl:0 is off:
|
||
|
# the DPP mov result would be either zero ({src lane disabled}|{src lane is
|
||
|
# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or
|
||
|
# active src lane result - can combine with old = src1 of the VALU op.
|
||
|
# The VALU op should have the same masks as DPP mov as they select
|
||
|
# lanes with identity value
|
||
|
# CHECK: %10:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
|
||
|
|
||
|
# VOP1:
|
||
|
# see case 1
|
||
|
# CHECK: [[U3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||
|
# CHECK: %12:vgpr_32 = V_NOT_B32_dpp [[U3]], %0, 1, 15, 15, 1, implicit $exec
|
||
|
# see case 2
|
||
|
# CHECK: [[U4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||
|
# CHECK: %14:vgpr_32 = V_NOT_B32_dpp [[U4]], %0, 1, 15, 15, 1, implicit $exec
|
||
|
# case 3 and 4 not appliable as there is no way to specify unchanged result
|
||
|
# for the unary VALU op
|
||
|
# CHECK: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
|
||
|
# CHECK: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
|
||
|
|
||
|
name: old_is_0
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
%0:vgpr_32 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||
|
|
||
|
; VOP2
|
||
|
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
|
||
|
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
|
||
|
|
||
|
%5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
|
||
|
%6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
|
||
|
|
||
|
%7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
|
||
|
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
|
||
|
|
||
|
%9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
|
||
|
|
||
|
; VOP1
|
||
|
%11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
|
||
|
%12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
|
||
|
|
||
|
%13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
|
||
|
%14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
|
||
|
|
||
|
%15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
|
||
|
%16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
|
||
|
|
||
|
%17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
|
||
|
...
|
||
|
|
||
|
# old is nonzero identity cases:
|
||
|
|
||
|
# old is nonzero identity, masks are fully enabled, bound_ctrl:0 is off:
|
||
|
# the DPP mov result would be either identity ({src lane disabled}|{out of
|
||
|
# range}) or src lane result - can combine with old = src1 of the VALU op
|
||
|
# The DPP VALU op should have the same masks (and bctrl) as DPP mov as they
|
||
|
# select lanes with identity value
|
||
|
|
||
|
# CHECK-LABEL: name: nonzero_old_is_identity_masks_enabled_bctl_off
|
||
|
# CHECK: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec
|
||
|
# CHECK: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec
|
||
|
# CHECK: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec
|
||
|
# CHECK: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec
|
||
|
|
||
|
name: nonzero_old_is_identity_masks_enabled_bctl_off
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
%0:vgpr_32 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||
|
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
|
||
|
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
|
||
|
|
||
|
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
|
||
|
%6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 0, implicit $exec
|
||
|
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
|
||
|
|
||
|
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
|
||
|
%9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 15, 15, 0, implicit $exec
|
||
|
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
|
||
|
|
||
|
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
|
||
|
%12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 15, 0, implicit $exec
|
||
|
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
|
||
|
...
|
||
|
|
||
|
# old is nonzero identity, masks are partially enabled, bound_ctrl:0 is off:
|
||
|
# the DPP mov result would be either identity ({src lane disabled}|{src lane is
|
||
|
# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or
|
||
|
# active src lane result - can combine with old = src1 of the VALU op.
|
||
|
# The DPP VALU op should have the same masks (and bctrl) as DPP mov as they
|
||
|
# select lanes with identity value
|
||
|
|
||
|
# CHECK-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl_off
|
||
|
# CHECK: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
|
||
|
# CHECK: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
|
||
|
# CHECK: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
|
||
|
# CHECK: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
|
||
|
|
||
|
name: nonzero_old_is_identity_masks_partially_disabled_bctl_off
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
%0:vgpr_32 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||
|
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
|
||
|
|
||
|
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
|
||
|
%6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
|
||
|
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
|
||
|
|
||
|
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
|
||
|
%9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
|
||
|
|
||
|
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
|
||
|
%12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
|
||
|
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
|
||
|
...
|
||
|
|
||
|
# old is nonzero identity, masks are partially enabled, bound_ctrl:0 is on:
|
||
|
# the DPP mov result may have 3 different values:
|
||
|
# 1. the active src lane result
|
||
|
# 2. 0 if the src lane is disabled|out of range
|
||
|
# 3. DPP mov's old value if the mov's dest VGPR write is disabled by masks
|
||
|
# can't combine
|
||
|
|
||
|
# CHECK-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl0
|
||
|
# CHECK: %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
|
||
|
# CHECK: %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
|
||
|
# CHECK: %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
|
||
|
# CHECK: %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
|
||
|
|
||
|
name: nonzero_old_is_identity_masks_partially_disabled_bctl0
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
%0:vgpr_32 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||
|
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
|
||
|
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
|
||
|
|
||
|
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
|
||
|
%6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 1, implicit $exec
|
||
|
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
|
||
|
|
||
|
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
|
||
|
%9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 1, implicit $exec
|
||
|
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
|
||
|
|
||
|
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
|
||
|
%12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 1, implicit $exec
|
||
|
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
|
||
|
...
|
||
|
|
||
|
# when the DPP source isn't a src0 operand the operation should be commuted if possible
|
||
|
# CHECK-LABEL: name: dpp_commute
|
||
|
# CHECK: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
|
||
|
# CHECK: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
|
||
|
# CHECK: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
|
||
|
# CHECK: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
|
||
|
# CHECK: %16:vgpr_32 = V_SUBREV_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
|
||
|
# CHECK: %19:vgpr_32 = V_ADD_I32_e32 5, %18, implicit-def $vcc, implicit $exec
|
||
|
name: dpp_commute
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
|
||
|
%0:vgpr_32 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||
|
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%4:vgpr_32 = V_MUL_U32_U24_e32 %1, %3, implicit $exec
|
||
|
|
||
|
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
|
||
|
%6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
|
||
|
%7:vgpr_32 = V_AND_B32_e32 %1, %6, implicit $exec
|
||
|
|
||
|
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
|
||
|
%9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%10:vgpr_32 = V_MAX_I32_e32 %1, %9, implicit $exec
|
||
|
|
||
|
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
|
||
|
%12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
|
||
|
%13:vgpr_32 = V_MIN_I32_e32 %1, %12, implicit $exec
|
||
|
|
||
|
%14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||
|
%15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%16:vgpr_32 = V_SUB_I32_e32 %1, %15, implicit-def $vcc, implicit $exec
|
||
|
|
||
|
; this cannot be combined because immediate as src0 isn't commutable
|
||
|
%17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||
|
%18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%19:vgpr_32 = V_ADD_I32_e32 5, %18, implicit-def $vcc, implicit $exec
|
||
|
...
|
||
|
|
||
|
# check for floating point modifiers
|
||
|
# CHECK-LABEL: name: add_f32_e64
|
||
|
# CHECK: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
|
||
|
# CHECK: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $exec
|
||
|
# CHECK: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $exec
|
||
|
# CHECK: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $exec
|
||
|
# CHECK: %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $exec
|
||
|
|
||
|
name: add_f32_e64
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
|
||
|
%0:vgpr_32 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
%2:vgpr_32 = IMPLICIT_DEF
|
||
|
|
||
|
; this shouldn't be combined as omod is set
|
||
|
%3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
|
||
|
%4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $exec
|
||
|
|
||
|
; this should be combined as all modifiers are default
|
||
|
%5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
|
||
|
%6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $exec
|
||
|
|
||
|
; this should be combined as modifiers other than abs|neg are default
|
||
|
%7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
|
||
|
%8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $exec
|
||
|
|
||
|
; this shouldn't be combined as modifiers aren't abs|neg
|
||
|
%9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
|
||
|
%10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $exec
|
||
|
...
|
||
|
|
||
|
# tests on sequences of dpp consumers
|
||
|
# CHECK-LABEL: name: dpp_seq
|
||
|
# CHECK: %4:vgpr_32 = V_ADD_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
|
||
|
# CHECK: %5:vgpr_32 = V_SUBREV_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
|
||
|
# CHECK: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
|
||
|
# broken sequence:
|
||
|
# CHECK: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
|
||
|
|
||
|
name: dpp_seq
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
%0:vgpr_32 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||
|
|
||
|
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%4:vgpr_32 = V_ADD_I32_e32 %3, %1, implicit-def $vcc, implicit $exec
|
||
|
%5:vgpr_32 = V_SUB_I32_e32 %1, %3, implicit-def $vcc, implicit $exec
|
||
|
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
|
||
|
|
||
|
%7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
|
||
|
%8:vgpr_32 = V_ADD_I32_e32 %7, %1, implicit-def $vcc, implicit $exec
|
||
|
; this breaks the sequence
|
||
|
%9:vgpr_32 = V_SUB_I32_e32 5, %7, implicit-def $vcc, implicit $exec
|
||
|
...
|
||
|
|
||
|
# old reg def is in diff BB - cannot combine
|
||
|
# CHECK-LABEL: name: old_in_diff_bb
|
||
|
# CHECK: %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
|
||
|
|
||
|
name: old_in_diff_bb
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
successors: %bb.1
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
|
||
|
%0:vgpr_32 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||
|
S_BRANCH %bb.1
|
||
|
|
||
|
bb.1:
|
||
|
%3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
|
||
|
%4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
|
||
|
...
|
||
|
|
||
|
# old reg def is in diff BB but bound_ctrl:0 - can combine
|
||
|
# CHECK-LABEL: name: old_in_diff_bb_bctrl_zero
|
||
|
# CHECK: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec
|
||
|
|
||
|
name: old_in_diff_bb_bctrl_zero
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
successors: %bb.1
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
|
||
|
%0:vgpr_32 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||
|
S_BRANCH %bb.1
|
||
|
|
||
|
bb.1:
|
||
|
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
|
||
|
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
|
||
|
...
|
||
|
|
||
|
# EXEC mask changed between def and use - cannot combine
|
||
|
# CHECK-LABEL: name: exec_changed
|
||
|
# CHECK: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
|
||
|
|
||
|
name: exec_changed
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
|
||
|
%0:vgpr_32 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||
|
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
|
||
|
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
|
||
|
%5:sreg_64 = COPY $exec, implicit-def $exec
|
||
|
%6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
|
||
|
...
|
||
|
|
||
|
# test if $old definition is correctly tracked through subreg manipulation pseudos
|
||
|
|
||
|
# CHECK-LABEL: name: mul_old_subreg
|
||
|
# CHECK: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec
|
||
|
|
||
|
name: mul_old_subreg
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
|
||
|
%0:vreg_64 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||
|
%3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
|
||
|
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
|
||
|
%5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
|
||
|
%6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
|
||
|
%7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
|
||
|
...
|
||
|
|
||
|
# CHECK-LABEL: name: add_old_subreg
|
||
|
# CHECK: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec
|
||
|
|
||
|
name: add_old_subreg
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
|
||
|
%0:vreg_64 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||
|
%3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
|
||
|
%4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
|
||
|
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
|
||
|
...
|
||
|
|
||
|
# CHECK-LABEL: name: add_old_subreg_undef
|
||
|
# CHECK: %5:vgpr_32 = V_ADD_U32_dpp %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec
|
||
|
|
||
|
name: add_old_subreg_undef
|
||
|
tracksRegLiveness: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $vgpr0, $vgpr1
|
||
|
|
||
|
%0:vreg_64 = COPY $vgpr0
|
||
|
%1:vgpr_32 = COPY $vgpr1
|
||
|
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||
|
%3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
|
||
|
%4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
|
||
|
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
|