forked from OSchip/llvm-project
GlobalISel: moreElementsVector for FP min/max
This commit is contained in:
parent
9e1a2a668b
commit
9fd31fdbd3
|
@ -3317,7 +3317,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
|
|||
case TargetOpcode::G_SMIN:
|
||||
case TargetOpcode::G_SMAX:
|
||||
case TargetOpcode::G_UMIN:
|
||||
case TargetOpcode::G_UMAX: {
|
||||
case TargetOpcode::G_UMAX:
|
||||
case TargetOpcode::G_FMINNUM:
|
||||
case TargetOpcode::G_FMAXNUM:
|
||||
case TargetOpcode::G_FMINNUM_IEEE:
|
||||
case TargetOpcode::G_FMAXNUM_IEEE:
|
||||
case TargetOpcode::G_FMINIMUM:
|
||||
case TargetOpcode::G_FMAXIMUM: {
|
||||
Observer.changingInstr(MI);
|
||||
moreElementsVectorSrc(MI, MoreTy, 1);
|
||||
moreElementsVectorSrc(MI, MoreTy, 2);
|
||||
|
|
|
@ -354,6 +354,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|||
|
||||
if (ST.hasVOP3PInsts()) {
|
||||
MinNumMaxNum.customFor(FPTypesPK16)
|
||||
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
|
||||
.clampMaxNumElements(0, S16, 2)
|
||||
.clampScalar(0, S16, S64)
|
||||
.scalarize(0);
|
||||
|
|
|
@ -477,36 +477,22 @@ body: |
|
|||
; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
|
||||
; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
|
||||
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
|
||||
; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
|
||||
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
|
||||
; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
|
||||
; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
|
||||
; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0
|
||||
; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[EXTRACT3]]
|
||||
; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[EXTRACT4]]
|
||||
; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0
|
||||
; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
|
||||
; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
|
||||
; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]]
|
||||
; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]]
|
||||
; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
|
||||
; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0
|
||||
; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[FMINNUM_IEEE]](<2 x s16>), 0
|
||||
; GFX9: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0
|
||||
; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0
|
||||
; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32
|
||||
; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT1]](<3 x s16>), 0
|
||||
; GFX9: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32
|
||||
; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT6]]
|
||||
; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT7]]
|
||||
; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
|
||||
; GFX9: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT5]](<3 x s16>), 0
|
||||
; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[FMINNUM_IEEE1]](s16), 32
|
||||
; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0
|
||||
; GFX9: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT8]](<3 x s16>), 0
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>)
|
||||
; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]]
|
||||
; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]]
|
||||
; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
|
||||
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>)
|
||||
; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
|
||||
; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<3 x s16>) = G_EXTRACT %0, 0
|
||||
|
|
|
@ -477,36 +477,22 @@ body: |
|
|||
; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
|
||||
; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
|
||||
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
|
||||
; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
|
||||
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
|
||||
; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
|
||||
; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
|
||||
; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0
|
||||
; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[EXTRACT3]]
|
||||
; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[EXTRACT4]]
|
||||
; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0
|
||||
; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
|
||||
; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
|
||||
; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]]
|
||||
; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]]
|
||||
; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
|
||||
; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0
|
||||
; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[FMINNUM_IEEE]](<2 x s16>), 0
|
||||
; GFX9: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0
|
||||
; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0
|
||||
; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32
|
||||
; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT1]](<3 x s16>), 0
|
||||
; GFX9: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32
|
||||
; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT6]]
|
||||
; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT7]]
|
||||
; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
|
||||
; GFX9: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT5]](<3 x s16>), 0
|
||||
; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[FMINNUM_IEEE1]](s16), 32
|
||||
; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0
|
||||
; GFX9: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT8]](<3 x s16>), 0
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>)
|
||||
; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]]
|
||||
; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]]
|
||||
; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
|
||||
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>)
|
||||
; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
|
||||
; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<3 x s16>) = G_EXTRACT %0, 0
|
||||
|
|
Loading…
Reference in New Issue