From 56342642a087f2cd8b11b84a123f7e5151322cdc Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Tue, 23 Apr 2019 18:20:44 +0000 Subject: [PATCH] [AArch64][GlobalISel] Legalize G_INTRINSIC_TRUNC Same patch as G_FCEIL etc. Add the missing switch case in widenScalar, add G_INTRINSIC_TRUNC to the correct rule in AArch64LegalizerInfo.cpp, and add a test. llvm-svn: 359021 --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 1 + .../Target/AArch64/AArch64LegalizerInfo.cpp | 2 +- .../GlobalISel/legalize-intrinsic-trunc.mir | 203 ++++++++++++++++++ .../GlobalISel/legalizer-info-validation.mir | 2 +- 4 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 4b592b339874..9521e67a6e84 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1327,6 +1327,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FEXP: case TargetOpcode::G_FEXP2: case TargetOpcode::G_FPOW: + case TargetOpcode::G_INTRINSIC_TRUNC: assert(TypeIdx == 0); Observer.changingInstr(MI); diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp index d58eb51b3439..1b3020e9f7c0 100644 --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -131,7 +131,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64}); getActionDefinitionsBuilder( - {G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT, G_FMA}) + {G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT, G_FMA, G_INTRINSIC_TRUNC}) // If we don't have full FP16 support, then scalarize the elements of // vectors containing fp16 types. .fewerElementsIf( diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir new file mode 100644 index 000000000000..c552a55323a7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir @@ -0,0 +1,203 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -run-pass=legalizer -simplify-mir -aarch64-neon-syntax=apple -mattr=-fullfp16 -o - | FileCheck %s --check-prefix=NO-FP16 +# RUN: llc %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -run-pass=legalizer -simplify-mir -aarch64-neon-syntax=apple -mattr=+fullfp16 -o - | FileCheck %s --check-prefix=FP16 + +... +--- +name: test_f16.intrinsic_trunc +alignment: 2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $h0 + ; NO-FP16-LABEL: name: test_f16.intrinsic_trunc + ; NO-FP16: liveins: $h0 + ; NO-FP16: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[COPY]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; NO-FP16: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; NO-FP16: $h0 = COPY [[FPTRUNC]](s16) + ; NO-FP16: RET_ReallyLR implicit $h0 + ; FP16-LABEL: name: test_f16.intrinsic_trunc + ; FP16: liveins: $h0 + ; FP16: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[COPY]] + ; FP16: $h0 = COPY [[INTRINSIC_TRUNC]](s16) + ; FP16: RET_ReallyLR implicit $h0 + %0:_(s16) = COPY $h0 + %1:_(s16) = G_INTRINSIC_TRUNC %0 + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 + +... +--- +name: test_v4f16.intrinsic_trunc +alignment: 2 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0 + + ; NO-FP16-LABEL: name: test_v4f16.intrinsic_trunc + ; NO-FP16: liveins: $d0 + ; NO-FP16: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; NO-FP16: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; NO-FP16: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT1]] + ; NO-FP16: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) + ; NO-FP16: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT2]] + ; NO-FP16: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) + ; NO-FP16: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT3]] + ; NO-FP16: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](s32) + ; NO-FP16: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; NO-FP16: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; NO-FP16: RET_ReallyLR implicit $d0 + ; FP16-LABEL: name: test_v4f16.intrinsic_trunc + ; FP16: liveins: $d0 + ; FP16: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC_TRUNC [[COPY]] + ; FP16: $d0 = COPY [[INTRINSIC_TRUNC]](<4 x s16>) + ; FP16: RET_ReallyLR implicit $d0 + %0:_(<4 x s16>) = COPY $d0 + %1:_(<4 x s16>) = G_INTRINSIC_TRUNC %0 + $d0 = COPY %1(<4 x s16>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v8f16.intrinsic_trunc +alignment: 2 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + + ; NO-FP16-LABEL: name: test_v8f16.intrinsic_trunc + ; NO-FP16: liveins: $q0 + ; NO-FP16: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; NO-FP16: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; NO-FP16: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT1]] + ; NO-FP16: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) + ; NO-FP16: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT2]] + ; NO-FP16: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) + ; NO-FP16: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT3]] + ; NO-FP16: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](s32) + ; NO-FP16: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC4:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT4]] + ; NO-FP16: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC4]](s32) + ; NO-FP16: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC5:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT5]] + ; NO-FP16: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC5]](s32) + ; NO-FP16: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC6:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT6]] + ; NO-FP16: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC6]](s32) + ; NO-FP16: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16) + ; NO-FP16: [[INTRINSIC_TRUNC7:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] + ; NO-FP16: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC7]](s32) + ; NO-FP16: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16) + ; NO-FP16: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; NO-FP16: RET_ReallyLR implicit $q0 + ; FP16-LABEL: name: test_v8f16.intrinsic_trunc + ; FP16: liveins: $q0 + ; FP16: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC_TRUNC [[COPY]] + ; FP16: $q0 = COPY [[INTRINSIC_TRUNC]](<8 x s16>) + ; FP16: RET_ReallyLR implicit $q0 + %0:_(<8 x s16>) = COPY $q0 + %1:_(<8 x s16>) = G_INTRINSIC_TRUNC %0 + $q0 = COPY %1(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2f32.intrinsic_trunc +alignment: 2 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0 + + ; NO-FP16-LABEL: name: test_v2f32.intrinsic_trunc + ; NO-FP16: liveins: $d0 + ; NO-FP16: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; NO-FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC_TRUNC [[COPY]] + ; NO-FP16: $d0 = COPY [[INTRINSIC_TRUNC]](<2 x s32>) + ; NO-FP16: RET_ReallyLR implicit $d0 + ; FP16-LABEL: name: test_v2f32.intrinsic_trunc + ; FP16: liveins: $d0 + ; FP16: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC_TRUNC [[COPY]] + ; FP16: $d0 = COPY [[INTRINSIC_TRUNC]](<2 x s32>) + ; FP16: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = G_INTRINSIC_TRUNC %0 + $d0 = COPY %1(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v4f32.intrinsic_trunc +alignment: 2 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + + ; NO-FP16-LABEL: name: test_v4f32.intrinsic_trunc + ; NO-FP16: liveins: $q0 + ; NO-FP16: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; NO-FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_TRUNC [[COPY]] + ; NO-FP16: $q0 = COPY [[INTRINSIC_TRUNC]](<4 x s32>) + ; NO-FP16: RET_ReallyLR implicit $q0 + ; FP16-LABEL: name: test_v4f32.intrinsic_trunc + ; FP16: liveins: $q0 + ; FP16: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_TRUNC [[COPY]] + ; FP16: $q0 = COPY [[INTRINSIC_TRUNC]](<4 x s32>) + ; FP16: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = G_INTRINSIC_TRUNC %0 + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2f64.intrinsic_trunc +alignment: 2 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + + ; NO-FP16-LABEL: name: test_v2f64.intrinsic_trunc + ; NO-FP16: liveins: $q0 + ; NO-FP16: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; NO-FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_TRUNC [[COPY]] + ; NO-FP16: $q0 = COPY [[INTRINSIC_TRUNC]](<2 x s64>) + ; NO-FP16: RET_ReallyLR implicit $q0 + ; FP16-LABEL: name: test_v2f64.intrinsic_trunc + ; FP16: liveins: $q0 + ; FP16: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; FP16: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_TRUNC [[COPY]] + ; FP16: $q0 = COPY [[INTRINSIC_TRUNC]](<2 x s64>) + ; FP16: RET_ReallyLR implicit $q0 + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = G_INTRINSIC_TRUNC %0 + $q0 = COPY %1(<2 x s64>) + RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index e4a6f3ca4337..f149a6c45278 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -88,7 +88,7 @@ # DEBUG: .. the first uncovered type index: 2, OK # # DEBUG-NEXT: G_INTRINSIC_TRUNC (opcode {{[0-9]+}}): 1 type index -# DEBUG: .. type index coverage check SKIPPED: no rules defined +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected # # DEBUG-NEXT: G_INTRINSIC_ROUND (opcode {{[0-9]+}}): 1 type index # DEBUG: .. type index coverage check SKIPPED: no rules defined