2019-12-13 00:35:16 +08:00
|
|
|
//===- LegalizerTest.cpp --------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "GISelMITest.h"
|
|
|
|
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
|
|
|
|
|
|
|
|
using namespace LegalizeActions;
|
|
|
|
using namespace LegalizeMutations;
|
|
|
|
using namespace LegalityPredicates;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
::testing::AssertionResult isNullMIPtr(const MachineInstr *MI) {
|
|
|
|
if (MI == nullptr)
|
|
|
|
return ::testing::AssertionSuccess();
|
|
|
|
std::string MIBuffer;
|
|
|
|
raw_string_ostream MISStream(MIBuffer);
|
|
|
|
MI->print(MISStream, /*IsStandalone=*/true, /*SkipOpers=*/false,
|
|
|
|
/*SkipDebugLoc=*/false, /*AddNewLine=*/false);
|
|
|
|
return ::testing::AssertionFailure()
|
|
|
|
<< "unable to legalize instruction: " << MISStream.str();
|
|
|
|
}
|
|
|
|
|
[Legalizer] Making artifact combining order-independent
Legalization algorithm is complicated by two facts:
1) While regular instructions should be possible to legalize in
an isolated, per-instruction, context-free manner, legalization
artifacts can only be eliminated in pairs, which could be deeply, and
ultimately arbitrary nested: { [ () ] }, where which paranthesis kind
depicts an artifact kind, like extend, unmerge, etc. Such structure
can only be fully eliminated by simple local combines if they are
attempted in a particular order (inside out), or alternatively by
repeated scans each eliminating only one innermost pair, resulting in
O(n^2) complexity.
2) Some artifacts might in fact be regular instructions that could (and
sometimes should) be legalized by the target-specific rules. Which
means failure to eliminate all artifacts on the first iteration is
not a failure, they need to be tried as instructions, which may
produce more artifacts, including the ones that are in fact regular
instructions, resulting in a non-constant number of iterations
required to finish the process.
I trust the recently introduced termination condition (no new artifacts
were created during as-a-regular-instruction-retrial of artifacts not
eliminated on the previous iteration) to be efficient in providing
termination, but only performing the legalization in full if and only if
at each step such chains of artifacts are successfully eliminated in
full as well.
Which is currently not guaranteed, as the artifact combines are applied
only once and in an arbitrary order that has to do with the order of
creation or insertion of artifacts into their worklist, which is a no
particular order.
In this patch I make a small change to the artifact combiner, making it
to re-insert into the worklist immediate (modulo a look-through copies)
artifact users of each vreg that changes its definition due to an
artifact combine.
Here the first scan through the artifacts worklist, while not
being done in any guaranteed order, only needs to find the innermost
pair(s) of artifacts that could be immediately combined out. After that
the process follows def-use chains, making them shorter at each step, thus
combining everything that can be combined in O(n) time.
Reviewers: volkan, aditya_nandakumar, qcolombet, paquette, aemerson, dsanders
Reviewed By: aditya_nandakumar, paquette
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71448
2019-12-13 05:20:41 +08:00
|
|
|
DefineLegalizerInfo(ALegalizer, {
|
|
|
|
auto p0 = LLT::pointer(0, 64);
|
|
|
|
auto v2s8 = LLT::vector(2, 8);
|
|
|
|
auto v2s16 = LLT::vector(2, 16);
|
|
|
|
getActionDefinitionsBuilder(G_LOAD)
|
|
|
|
.legalForTypesWithMemDesc({{s16, p0, 8, 8}})
|
|
|
|
.scalarize(0)
|
|
|
|
.clampScalar(0, s16, s16);
|
|
|
|
getActionDefinitionsBuilder(G_PTR_ADD).legalFor({{p0, s64}});
|
|
|
|
getActionDefinitionsBuilder(G_CONSTANT).legalFor({s32, s64});
|
|
|
|
getActionDefinitionsBuilder(G_BUILD_VECTOR)
|
|
|
|
.legalFor({{v2s16, s16}})
|
|
|
|
.clampScalar(1, s16, s16);
|
|
|
|
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).legalFor({{v2s8, s16}});
|
|
|
|
getActionDefinitionsBuilder(G_ANYEXT).legalFor({{s32, s16}});
|
|
|
|
getActionDefinitionsBuilder(G_ZEXT).legalFor({{s32, s16}});
|
|
|
|
getActionDefinitionsBuilder(G_SEXT).legalFor({{s32, s16}});
|
|
|
|
getActionDefinitionsBuilder(G_AND).legalFor({s32});
|
|
|
|
getActionDefinitionsBuilder(G_SEXT_INREG).lower();
|
|
|
|
getActionDefinitionsBuilder(G_ASHR).legalFor({{s32, s32}});
|
|
|
|
getActionDefinitionsBuilder(G_SHL).legalFor({{s32, s32}});
|
2019-12-22 01:32:00 +08:00
|
|
|
})
|
[Legalizer] Making artifact combining order-independent
Legalization algorithm is complicated by two facts:
1) While regular instructions should be possible to legalize in
an isolated, per-instruction, context-free manner, legalization
artifacts can only be eliminated in pairs, which could be deeply, and
ultimately arbitrary nested: { [ () ] }, where which paranthesis kind
depicts an artifact kind, like extend, unmerge, etc. Such structure
can only be fully eliminated by simple local combines if they are
attempted in a particular order (inside out), or alternatively by
repeated scans each eliminating only one innermost pair, resulting in
O(n^2) complexity.
2) Some artifacts might in fact be regular instructions that could (and
sometimes should) be legalized by the target-specific rules. Which
means failure to eliminate all artifacts on the first iteration is
not a failure, they need to be tried as instructions, which may
produce more artifacts, including the ones that are in fact regular
instructions, resulting in a non-constant number of iterations
required to finish the process.
I trust the recently introduced termination condition (no new artifacts
were created during as-a-regular-instruction-retrial of artifacts not
eliminated on the previous iteration) to be efficient in providing
termination, but only performing the legalization in full if and only if
at each step such chains of artifacts are successfully eliminated in
full as well.
Which is currently not guaranteed, as the artifact combines are applied
only once and in an arbitrary order that has to do with the order of
creation or insertion of artifacts into their worklist, which is a no
particular order.
In this patch I make a small change to the artifact combiner, making it
to re-insert into the worklist immediate (modulo a look-through copies)
artifact users of each vreg that changes its definition due to an
artifact combine.
Here the first scan through the artifacts worklist, while not
being done in any guaranteed order, only needs to find the innermost
pair(s) of artifacts that could be immediately combined out. After that
the process follows def-use chains, making them shorter at each step, thus
combining everything that can be combined in O(n) time.
Reviewers: volkan, aditya_nandakumar, qcolombet, paquette, aemerson, dsanders
Reviewed By: aditya_nandakumar, paquette
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71448
2019-12-13 05:20:41 +08:00
|
|
|
|
2020-03-23 06:12:25 +08:00
|
|
|
TEST_F(AArch64GISelMITest, BasicLegalizerTest) {
|
2019-12-13 00:35:16 +08:00
|
|
|
StringRef MIRString = R"(
|
|
|
|
%vptr:_(p0) = COPY $x4
|
|
|
|
%v:_(<2 x s8>) = G_LOAD %vptr:_(p0) :: (load 2, align 1)
|
|
|
|
$h4 = COPY %v:_(<2 x s8>)
|
|
|
|
)";
|
|
|
|
setUp(MIRString.rtrim(' '));
|
|
|
|
if (!TM)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ALegalizerInfo LI(MF->getSubtarget());
|
|
|
|
|
|
|
|
Legalizer::MFResult Result =
|
|
|
|
Legalizer::legalizeMachineFunction(*MF, LI, {}, B);
|
|
|
|
|
|
|
|
EXPECT_TRUE(isNullMIPtr(Result.FailedOn));
|
|
|
|
EXPECT_TRUE(Result.Changed);
|
|
|
|
|
|
|
|
StringRef CheckString = R"(
|
|
|
|
CHECK: %vptr:_(p0) = COPY $x4
|
|
|
|
CHECK-NEXT: [[LOAD_0:%[0-9]+]]:_(s16) = G_LOAD %vptr:_(p0) :: (load 1)
|
|
|
|
CHECK-NEXT: [[OFFSET_1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
|
|
|
CHECK-NEXT: [[VPTR_1:%[0-9]+]]:_(p0) = G_PTR_ADD %vptr:_, [[OFFSET_1]]:_(s64)
|
2020-04-08 05:05:29 +08:00
|
|
|
CHECK-NEXT: [[LOAD_1:%[0-9]+]]:_(s16) = G_LOAD [[VPTR_1]]:_(p0) :: (load 1 + 1)
|
2019-12-13 00:35:16 +08:00
|
|
|
CHECK-NEXT: [[V0:%[0-9]+]]:_(s16) = COPY [[LOAD_0]]:_(s16)
|
|
|
|
CHECK-NEXT: [[V1:%[0-9]+]]:_(s16) = COPY [[LOAD_1]]:_(s16)
|
|
|
|
CHECK-NEXT: %v:_(<2 x s8>) = G_BUILD_VECTOR_TRUNC [[V0]]:_(s16), [[V1]]:_(s16)
|
|
|
|
CHECK-NEXT: $h4 = COPY %v:_(<2 x s8>)
|
|
|
|
)";
|
|
|
|
|
|
|
|
EXPECT_TRUE(CheckMachineFunction(*MF, CheckString)) << *MF;
|
|
|
|
}
|
|
|
|
|
[Legalizer] Making artifact combining order-independent
Legalization algorithm is complicated by two facts:
1) While regular instructions should be possible to legalize in
an isolated, per-instruction, context-free manner, legalization
artifacts can only be eliminated in pairs, which could be deeply, and
ultimately arbitrary nested: { [ () ] }, where which paranthesis kind
depicts an artifact kind, like extend, unmerge, etc. Such structure
can only be fully eliminated by simple local combines if they are
attempted in a particular order (inside out), or alternatively by
repeated scans each eliminating only one innermost pair, resulting in
O(n^2) complexity.
2) Some artifacts might in fact be regular instructions that could (and
sometimes should) be legalized by the target-specific rules. Which
means failure to eliminate all artifacts on the first iteration is
not a failure, they need to be tried as instructions, which may
produce more artifacts, including the ones that are in fact regular
instructions, resulting in a non-constant number of iterations
required to finish the process.
I trust the recently introduced termination condition (no new artifacts
were created during as-a-regular-instruction-retrial of artifacts not
eliminated on the previous iteration) to be efficient in providing
termination, but only performing the legalization in full if and only if
at each step such chains of artifacts are successfully eliminated in
full as well.
Which is currently not guaranteed, as the artifact combines are applied
only once and in an arbitrary order that has to do with the order of
creation or insertion of artifacts into their worklist, which is a no
particular order.
In this patch I make a small change to the artifact combiner, making it
to re-insert into the worklist immediate (modulo a look-through copies)
artifact users of each vreg that changes its definition due to an
artifact combine.
Here the first scan through the artifacts worklist, while not
being done in any guaranteed order, only needs to find the innermost
pair(s) of artifacts that could be immediately combined out. After that
the process follows def-use chains, making them shorter at each step, thus
combining everything that can be combined in O(n) time.
Reviewers: volkan, aditya_nandakumar, qcolombet, paquette, aemerson, dsanders
Reviewed By: aditya_nandakumar, paquette
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71448
2019-12-13 05:20:41 +08:00
|
|
|
// Making sure the legalization finishes successfully w/o failure to combine
|
|
|
|
// away all the legalization artifacts regardless of the order of their
|
|
|
|
// creation.
|
2020-03-23 06:12:25 +08:00
|
|
|
TEST_F(AArch64GISelMITest, UnorderedArtifactCombiningTest) {
|
[Legalizer] Making artifact combining order-independent
Legalization algorithm is complicated by two facts:
1) While regular instructions should be possible to legalize in
an isolated, per-instruction, context-free manner, legalization
artifacts can only be eliminated in pairs, which could be deeply, and
ultimately arbitrary nested: { [ () ] }, where which paranthesis kind
depicts an artifact kind, like extend, unmerge, etc. Such structure
can only be fully eliminated by simple local combines if they are
attempted in a particular order (inside out), or alternatively by
repeated scans each eliminating only one innermost pair, resulting in
O(n^2) complexity.
2) Some artifacts might in fact be regular instructions that could (and
sometimes should) be legalized by the target-specific rules. Which
means failure to eliminate all artifacts on the first iteration is
not a failure, they need to be tried as instructions, which may
produce more artifacts, including the ones that are in fact regular
instructions, resulting in a non-constant number of iterations
required to finish the process.
I trust the recently introduced termination condition (no new artifacts
were created during as-a-regular-instruction-retrial of artifacts not
eliminated on the previous iteration) to be efficient in providing
termination, but only performing the legalization in full if and only if
at each step such chains of artifacts are successfully eliminated in
full as well.
Which is currently not guaranteed, as the artifact combines are applied
only once and in an arbitrary order that has to do with the order of
creation or insertion of artifacts into their worklist, which is a no
particular order.
In this patch I make a small change to the artifact combiner, making it
to re-insert into the worklist immediate (modulo a look-through copies)
artifact users of each vreg that changes its definition due to an
artifact combine.
Here the first scan through the artifacts worklist, while not
being done in any guaranteed order, only needs to find the innermost
pair(s) of artifacts that could be immediately combined out. After that
the process follows def-use chains, making them shorter at each step, thus
combining everything that can be combined in O(n) time.
Reviewers: volkan, aditya_nandakumar, qcolombet, paquette, aemerson, dsanders
Reviewed By: aditya_nandakumar, paquette
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71448
2019-12-13 05:20:41 +08:00
|
|
|
StringRef MIRString = R"(
|
|
|
|
%vptr:_(p0) = COPY $x4
|
|
|
|
%v:_(<2 x s8>) = G_LOAD %vptr:_(p0) :: (load 2, align 1)
|
|
|
|
%v0:_(s8), %v1:_(s8) = G_UNMERGE_VALUES %v:_(<2 x s8>)
|
|
|
|
%v0_ext:_(s16) = G_ANYEXT %v0:_(s8)
|
|
|
|
$h4 = COPY %v0_ext:_(s16)
|
|
|
|
)";
|
|
|
|
setUp(MIRString.rtrim(' '));
|
|
|
|
if (!TM)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ALegalizerInfo LI(MF->getSubtarget());
|
|
|
|
|
|
|
|
// The events here unfold as follows:
|
|
|
|
// 1. First, the function is scanned pre-forming the worklist of artifacts:
|
|
|
|
//
|
|
|
|
// UNMERGE (1): pushed into the worklist first, will be processed last.
|
|
|
|
// |
|
|
|
|
// ANYEXT (2)
|
|
|
|
//
|
|
|
|
// 2. Second, the load is scalarized, and then its destination is widened,
|
|
|
|
// forming the following chain of legalization artifacts:
|
|
|
|
//
|
|
|
|
// TRUNC (4): created last, will be processed first.
|
|
|
|
// |
|
|
|
|
// BUILD_VECTOR (3)
|
|
|
|
// |
|
|
|
|
// UNMERGE (1): pushed into the worklist first, will be processed last.
|
|
|
|
// |
|
|
|
|
// ANYEXT (2)
|
|
|
|
//
|
|
|
|
// 3. Third, the artifacts are attempted to be combined in pairs, looking
|
|
|
|
// through the def-use chain from the roots towards the leafs, visiting the
|
|
|
|
// roots in order they happen to be in the worklist:
|
|
|
|
// (4) - (trunc): can not be combined;
|
|
|
|
// (3) - (build_vector (trunc)): can not be combined;
|
|
|
|
// (2) - (anyext (unmerge)): can not be combined;
|
|
|
|
// (1) - (unmerge (build_vector)): combined and eliminated;
|
|
|
|
//
|
|
|
|
// leaving the function in the following state:
|
|
|
|
//
|
|
|
|
// TRUNC (1): moved to non-artifact instructions worklist first.
|
|
|
|
// |
|
|
|
|
// ANYEXT (2): also moved to non-artifact instructions worklist.
|
|
|
|
//
|
|
|
|
// Every other instruction is successfully legalized in full.
|
|
|
|
// If combining (unmerge (build_vector)) does not re-insert every artifact
|
|
|
|
// that had its def-use chain modified (shortened) into the artifact
|
|
|
|
// worklist (here it's just ANYEXT), the process moves on onto the next
|
|
|
|
// outer loop iteration of the top-level legalization algorithm here, w/o
|
|
|
|
// performing all the artifact combines possible. Let's consider this
|
|
|
|
// scenario first:
|
|
|
|
// 4.A. Neither TRUNC, nor ANYEXT can be legalized in isolation, both of them
|
|
|
|
// get moved to the retry worklist, but no additional artifacts were
|
|
|
|
// created in the process, thus algorithm concludes no progress could be
|
|
|
|
// made, and fails.
|
|
|
|
// 4.B. If, however, combining (unmerge (build_vector)) had re-inserted
|
|
|
|
// ANYEXT into the worklist (as ANYEXT's source changes, not by value,
|
|
|
|
// but by implementation), (anyext (trunc)) combine happens next, which
|
|
|
|
// fully eliminates all the artifacts and legalization succeeds.
|
|
|
|
//
|
|
|
|
// We're looking into making sure that (4.B) happens here, not (4.A). Note
|
|
|
|
// that in that case the first scan through the artifacts worklist, while not
|
|
|
|
// being done in any guaranteed order, only needs to find the innermost
|
|
|
|
// pair(s) of artifacts that could be immediately combined out. After that
|
|
|
|
// the process follows def-use chains, making them shorter at each step, thus
|
|
|
|
// combining everything that can be combined in O(n) time.
|
|
|
|
Legalizer::MFResult Result =
|
|
|
|
Legalizer::legalizeMachineFunction(*MF, LI, {}, B);
|
|
|
|
|
|
|
|
EXPECT_TRUE(isNullMIPtr(Result.FailedOn));
|
|
|
|
EXPECT_TRUE(Result.Changed);
|
|
|
|
|
|
|
|
StringRef CheckString = R"(
|
|
|
|
CHECK: %vptr:_(p0) = COPY $x4
|
|
|
|
CHECK-NEXT: [[LOAD_0:%[0-9]+]]:_(s16) = G_LOAD %vptr:_(p0) :: (load 1)
|
|
|
|
CHECK: %v0_ext:_(s16) = COPY [[LOAD_0]]:_(s16)
|
|
|
|
CHECK-NEXT: $h4 = COPY %v0_ext:_(s16)
|
|
|
|
)";
|
|
|
|
|
|
|
|
EXPECT_TRUE(CheckMachineFunction(*MF, CheckString)) << *MF;
|
|
|
|
}
|
|
|
|
|
2020-03-23 06:12:25 +08:00
|
|
|
TEST_F(AArch64GISelMITest, UnorderedArtifactCombiningManyCopiesTest) {
|
[Legalizer] Making artifact combining order-independent
Legalization algorithm is complicated by two facts:
1) While regular instructions should be possible to legalize in
an isolated, per-instruction, context-free manner, legalization
artifacts can only be eliminated in pairs, which could be deeply, and
ultimately arbitrary nested: { [ () ] }, where which paranthesis kind
depicts an artifact kind, like extend, unmerge, etc. Such structure
can only be fully eliminated by simple local combines if they are
attempted in a particular order (inside out), or alternatively by
repeated scans each eliminating only one innermost pair, resulting in
O(n^2) complexity.
2) Some artifacts might in fact be regular instructions that could (and
sometimes should) be legalized by the target-specific rules. Which
means failure to eliminate all artifacts on the first iteration is
not a failure, they need to be tried as instructions, which may
produce more artifacts, including the ones that are in fact regular
instructions, resulting in a non-constant number of iterations
required to finish the process.
I trust the recently introduced termination condition (no new artifacts
were created during as-a-regular-instruction-retrial of artifacts not
eliminated on the previous iteration) to be efficient in providing
termination, but only performing the legalization in full if and only if
at each step such chains of artifacts are successfully eliminated in
full as well.
Which is currently not guaranteed, as the artifact combines are applied
only once and in an arbitrary order that has to do with the order of
creation or insertion of artifacts into their worklist, which is a no
particular order.
In this patch I make a small change to the artifact combiner, making it
to re-insert into the worklist immediate (modulo a look-through copies)
artifact users of each vreg that changes its definition due to an
artifact combine.
Here the first scan through the artifacts worklist, while not
being done in any guaranteed order, only needs to find the innermost
pair(s) of artifacts that could be immediately combined out. After that
the process follows def-use chains, making them shorter at each step, thus
combining everything that can be combined in O(n) time.
Reviewers: volkan, aditya_nandakumar, qcolombet, paquette, aemerson, dsanders
Reviewed By: aditya_nandakumar, paquette
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71448
2019-12-13 05:20:41 +08:00
|
|
|
StringRef MIRString = R"(
|
|
|
|
%vptr:_(p0) = COPY $x4
|
|
|
|
%v:_(<2 x s8>) = G_LOAD %vptr:_(p0) :: (load 2, align 1)
|
|
|
|
%vc0:_(<2 x s8>) = COPY %v:_(<2 x s8>)
|
|
|
|
%vc1:_(<2 x s8>) = COPY %v:_(<2 x s8>)
|
|
|
|
%vc00:_(s8), %vc01:_(s8) = G_UNMERGE_VALUES %vc0:_(<2 x s8>)
|
|
|
|
%vc10:_(s8), %vc11:_(s8) = G_UNMERGE_VALUES %vc1:_(<2 x s8>)
|
|
|
|
%v0t:_(s8) = COPY %vc00:_(s8)
|
|
|
|
%v0:_(s8) = COPY %v0t:_(s8)
|
|
|
|
%v1t:_(s8) = COPY %vc11:_(s8)
|
|
|
|
%v1:_(s8) = COPY %v1t:_(s8)
|
|
|
|
%v0_zext:_(s32) = G_ZEXT %v0:_(s8)
|
|
|
|
%v1_sext:_(s32) = G_SEXT %v1:_(s8)
|
|
|
|
$w4 = COPY %v0_zext:_(s32)
|
|
|
|
$w5 = COPY %v1_sext:_(s32)
|
|
|
|
)";
|
|
|
|
setUp(MIRString.rtrim(' '));
|
|
|
|
if (!TM)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ALegalizerInfo LI(MF->getSubtarget());
|
|
|
|
|
|
|
|
Legalizer::MFResult Result =
|
|
|
|
Legalizer::legalizeMachineFunction(*MF, LI, {}, B);
|
|
|
|
|
|
|
|
EXPECT_TRUE(isNullMIPtr(Result.FailedOn));
|
|
|
|
EXPECT_TRUE(Result.Changed);
|
|
|
|
|
|
|
|
StringRef CheckString = R"(
|
|
|
|
CHECK: %vptr:_(p0) = COPY $x4
|
|
|
|
CHECK-NEXT: [[LOAD_0:%[0-9]+]]:_(s16) = G_LOAD %vptr:_(p0) :: (load 1)
|
|
|
|
CHECK-NEXT: [[OFFSET_1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
|
|
|
CHECK-NEXT: [[VPTR_1:%[0-9]+]]:_(p0) = G_PTR_ADD %vptr:_, [[OFFSET_1]]:_(s64)
|
2020-04-08 05:05:29 +08:00
|
|
|
CHECK-NEXT: [[LOAD_1:%[0-9]+]]:_(s16) = G_LOAD [[VPTR_1]]:_(p0) :: (load 1 + 1)
|
[Legalizer] Making artifact combining order-independent
Legalization algorithm is complicated by two facts:
1) While regular instructions should be possible to legalize in
an isolated, per-instruction, context-free manner, legalization
artifacts can only be eliminated in pairs, which could be deeply, and
ultimately arbitrary nested: { [ () ] }, where which paranthesis kind
depicts an artifact kind, like extend, unmerge, etc. Such structure
can only be fully eliminated by simple local combines if they are
attempted in a particular order (inside out), or alternatively by
repeated scans each eliminating only one innermost pair, resulting in
O(n^2) complexity.
2) Some artifacts might in fact be regular instructions that could (and
sometimes should) be legalized by the target-specific rules. Which
means failure to eliminate all artifacts on the first iteration is
not a failure, they need to be tried as instructions, which may
produce more artifacts, including the ones that are in fact regular
instructions, resulting in a non-constant number of iterations
required to finish the process.
I trust the recently introduced termination condition (no new artifacts
were created during as-a-regular-instruction-retrial of artifacts not
eliminated on the previous iteration) to be efficient in providing
termination, but only performing the legalization in full if and only if
at each step such chains of artifacts are successfully eliminated in
full as well.
Which is currently not guaranteed, as the artifact combines are applied
only once and in an arbitrary order that has to do with the order of
creation or insertion of artifacts into their worklist, which is a no
particular order.
In this patch I make a small change to the artifact combiner, making it
to re-insert into the worklist immediate (modulo a look-through copies)
artifact users of each vreg that changes its definition due to an
artifact combine.
Here the first scan through the artifacts worklist, while not
being done in any guaranteed order, only needs to find the innermost
pair(s) of artifacts that could be immediately combined out. After that
the process follows def-use chains, making them shorter at each step, thus
combining everything that can be combined in O(n) time.
Reviewers: volkan, aditya_nandakumar, qcolombet, paquette, aemerson, dsanders
Reviewed By: aditya_nandakumar, paquette
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71448
2019-12-13 05:20:41 +08:00
|
|
|
CHECK-NEXT: [[FF_MASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
|
|
|
|
CHECK-NEXT: [[V0_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD_0]]:_(s16)
|
|
|
|
CHECK-NEXT: %v0_zext:_(s32) = G_AND [[V0_EXT]]:_, [[FF_MASK]]:_
|
|
|
|
CHECK-NEXT: [[V1_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD_1]]:_(s16)
|
|
|
|
CHECK-NEXT: [[SHAMNT:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
|
|
|
|
CHECK-NEXT: [[V1_SHL:%[0-9]+]]:_(s32) = G_SHL [[V1_EXT]]:_, [[SHAMNT]]:_(s32)
|
|
|
|
CHECK-NEXT: %v1_sext:_(s32) = G_ASHR [[V1_SHL]]:_, [[SHAMNT]]:_(s32)
|
|
|
|
CHECK-NEXT: $w4 = COPY %v0_zext:_(s32)
|
|
|
|
CHECK-NEXT: $w5 = COPY %v1_sext:_(s32)
|
|
|
|
)";
|
|
|
|
|
|
|
|
EXPECT_TRUE(CheckMachineFunction(*MF, CheckString)) << *MF;
|
|
|
|
}
|
|
|
|
|
2019-12-13 00:35:16 +08:00
|
|
|
} // namespace
|