From 9fd31fdbd3049e3e45fc046bedb9011d0c828e87 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 27 Jul 2019 17:47:08 -0400 Subject: [PATCH] GlobalISel: moreElementsVector for FP min/max --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 8 +++- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 1 + .../AMDGPU/GlobalISel/legalize-fmaxnum.mir | 42 +++++++------------ .../AMDGPU/GlobalISel/legalize-fminnum.mir | 42 +++++++------------ 4 files changed, 36 insertions(+), 57 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 6a56a4e3c65b..d64c061071ae 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3317,7 +3317,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_SMIN: case TargetOpcode::G_SMAX: case TargetOpcode::G_UMIN: - case TargetOpcode::G_UMAX: { + case TargetOpcode::G_UMAX: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMAXIMUM: { Observer.changingInstr(MI); moreElementsVectorSrc(MI, MoreTy, 1); moreElementsVectorSrc(MI, MoreTy, 2); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 63db7a379ebf..99a7d893c931 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -354,6 +354,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasVOP3PInsts()) { MinNumMaxNum.customFor(FPTypesPK16) + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .clampMaxNumElements(0, S16, 2) .clampScalar(0, S16, S64) .scalarize(0); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir index 08c9b357ac07..7fa8c441fa48 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -477,36 +477,22 @@ body: | ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[EXTRACT3]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[EXTRACT4]] + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] + ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 - ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[FMINNUM_IEEE]](<2 x s16>), 0 - ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32 - ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32 - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT6]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT7]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT5]](<3 x s16>), 0 - ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[FMINNUM_IEEE1]](s16), 32 - ; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 - ; GFX9: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT8]](<3 x s16>), 0 - ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>) + ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] + ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] + ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir index 08c9b357ac07..7fa8c441fa48 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -477,36 +477,22 @@ body: | ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[EXTRACT3]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[EXTRACT4]] + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] + ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 - ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[FMINNUM_IEEE]](<2 x s16>), 0 - ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32 - ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32 - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT6]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT7]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT5]](<3 x s16>), 0 - ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[FMINNUM_IEEE1]](s16), 32 - ; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 - ; GFX9: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT8]](<3 x s16>), 0 - ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>) + ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] + ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] + ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0