From e6cebd0d69ec29aa41f560d3d20a3da452146697 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 25 Jan 2019 04:37:33 +0000 Subject: [PATCH] GlobalISel: fewerElementsVector for more cast types llvm-svn: 352166 --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 5 +++ .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 9 ++-- .../AMDGPU/GlobalISel/legalize-fptosi.mir | 42 +++++++++++++++++++ .../AMDGPU/GlobalISel/legalize-fptoui.mir | 35 ++++++++++++++++ .../AMDGPU/GlobalISel/legalize-fptrunc.mir | 39 +++++++++++++++++ .../AMDGPU/GlobalISel/legalize-sitofp.mir | 35 ++++++++++++++++ 6 files changed, 162 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 5be14018fb9f..d616d7311f82 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1511,6 +1511,11 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_SEXT: case TargetOpcode::G_ANYEXT: case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 5ad9a43bb960..e3f34404e3d9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -148,7 +148,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .clampScalar(0, S32, S64); getActionDefinitionsBuilder(G_FPTRUNC) - .legalFor({{S32, S64}, {S16, S32}}); + .legalFor({{S32, S64}, {S16, S32}}) + .scalarize(0); getActionDefinitionsBuilder(G_FPEXT) .legalFor({{S64, S32}, {S32, S16}}) @@ -171,10 +172,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .scalarize(0); getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) - .legalFor({{S32, S32}, {S64, S32}}); + .legalFor({{S32, S32}, {S64, S32}}) + .scalarize(0); getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) - .legalFor({{S32, S32}, {S32, S64}}); + .legalFor({{S32, S32}, {S32, S64}}) + .scalarize(0); getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND}) .legalFor({S32, S64}); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir index 60c2e2aecaaf..2da5ffd1d034 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir @@ -9,8 +9,11 @@ body: | ; CHECK-LABEL: name: test_fptosi_s32_to_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[FPTOSI]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FPTOSI %0 + $vgpr0 = COPY %1 ... --- @@ -21,6 +24,45 @@ body: | ; CHECK-LABEL: name: test_fptosi_s64_to_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; CHECK: $vgpr0 = COPY [[FPTOSI]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_FPTOSI %0 + $vgpr0 = COPY %1 +... + +--- +name: test_fptosi_v2s32_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_fptosi_v2s32_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[UV]](s32) + ; CHECK: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOSI]](s32), [[FPTOSI1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_FPTOSI %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_fptosi_v2s64_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_fptosi_v2s64_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[UV]](s64) + ; CHECK: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[UV1]](s64) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOSI]](s32), [[FPTOSI1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s32>) = G_FPTOSI %0 + $vgpr0_vgpr1 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir index c77b8ed21514..f82c72bd2332 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir @@ -31,3 +31,38 @@ body: | $vgpr0 = COPY %1 ... +--- +name: test_fptoui_v2s32_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_fptoui_v2s32_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[UV]](s32) + ; CHECK: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOUI]](s32), [[FPTOUI1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_FPTOUI %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_fptoui_v2s64_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_fptoui_v2s64_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[UV]](s64) + ; CHECK: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[UV1]](s64) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOUI]](s32), [[FPTOUI1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s32>) = G_FPTOUI %0 + $vgpr0_vgpr1 = COPY %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir index f2ed866424e4..5a66e8a6c2e0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir @@ -32,3 +32,42 @@ body: | %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... + +--- +name: test_fptrunc_v2s64_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_fptrunc_v2s64_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[UV]](s64) + ; CHECK: [[FPTRUNC1:%[0-9]+]]:_(s32) = G_FPTRUNC [[UV1]](s64) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTRUNC]](s32), [[FPTRUNC1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s32>) = G_FPTRUNC %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_fptrunc_v2s32_to_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_fptrunc_v2s32_to_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV]](s32) + ; CHECK: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV1]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_FPTRUNC %0 + %2:_(<2 x s32>) = G_ANYEXT %1 + $vgpr0_vgpr1 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir index aa86c5f02533..6a45e73a18c3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir @@ -31,3 +31,38 @@ body: | $vgpr0_vgpr1 = COPY %1 ... +--- +name: test_sitofp_v2s32_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sitofp_v2s32_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[UV]](s32) + ; CHECK: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SITOFP]](s32), [[SITOFP1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_SITOFP %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_sitofp_v2s32_to_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sitofp_v2s32_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV]](s32) + ; CHECK: [[SITOFP1:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SITOFP]](s64), [[SITOFP1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_SITOFP %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +...