2019-09-05 05:29:10 +08:00
|
|
|
//===---------- MIRVRegNamerUtils.cpp - MIR VReg Renaming Utilities -------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "MIRVRegNamerUtils.h"
|
2020-04-20 21:33:21 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
|
#include "llvm/IR/Constants.h"
|
2019-10-19 08:22:07 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2019-09-05 05:29:10 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "mir-vregnamer-utils"
|
|
|
|
|
2019-12-10 03:54:09 +08:00
|
|
|
using VRegRenameMap = std::map<unsigned, unsigned>;
|
2019-09-05 05:29:10 +08:00
|
|
|
|
2019-12-10 03:54:09 +08:00
|
|
|
bool VRegRenamer::doVRegRenaming(const VRegRenameMap &VRM) {
|
|
|
|
bool Changed = false;
|
2019-09-05 05:29:10 +08:00
|
|
|
|
2019-12-10 03:54:09 +08:00
|
|
|
for (const auto &E : VRM) {
|
|
|
|
Changed = Changed || !MRI.reg_empty(E.first);
|
|
|
|
MRI.replaceRegWith(E.first, E.second);
|
2019-09-05 05:29:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
2019-12-10 03:54:09 +08:00
|
|
|
VRegRenameMap
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
VRegRenamer::getVRegRenameMap(const std::vector<NamedVReg> &VRegs) {
|
2019-12-10 03:54:09 +08:00
|
|
|
|
|
|
|
StringMap<unsigned> VRegNameCollisionMap;
|
|
|
|
|
|
|
|
auto GetUniqueVRegName = [&VRegNameCollisionMap](const NamedVReg &Reg) {
|
|
|
|
if (VRegNameCollisionMap.find(Reg.getName()) == VRegNameCollisionMap.end())
|
|
|
|
VRegNameCollisionMap[Reg.getName()] = 0;
|
|
|
|
const unsigned Counter = ++VRegNameCollisionMap[Reg.getName()];
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
return Reg.getName() + "__" + std::to_string(Counter);
|
|
|
|
};
|
|
|
|
|
2019-12-10 03:54:09 +08:00
|
|
|
VRegRenameMap VRM;
|
|
|
|
for (const auto &VReg : VRegs) {
|
|
|
|
const unsigned Reg = VReg.getReg();
|
|
|
|
VRM[Reg] = createVirtualRegisterWithLowerName(Reg, GetUniqueVRegName(VReg));
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
}
|
2019-12-10 03:54:09 +08:00
|
|
|
return VRM;
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
}
|
2019-09-05 05:29:10 +08:00
|
|
|
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
|
|
|
|
std::string S;
|
|
|
|
raw_string_ostream OS(S);
|
2019-12-10 03:54:09 +08:00
|
|
|
|
|
|
|
// Gets a hashable artifact from a given MachineOperand (ie an unsigned).
|
|
|
|
auto GetHashableMO = [this](const MachineOperand &MO) -> unsigned {
|
2019-12-14 13:58:44 +08:00
|
|
|
switch (MO.getType()) {
|
2019-12-17 02:23:03 +08:00
|
|
|
case MachineOperand::MO_CImmediate:
|
|
|
|
return hash_combine(MO.getType(), MO.getTargetFlags(),
|
|
|
|
MO.getCImm()->getZExtValue());
|
|
|
|
case MachineOperand::MO_FPImmediate:
|
|
|
|
return hash_combine(
|
|
|
|
MO.getType(), MO.getTargetFlags(),
|
|
|
|
MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
|
2019-12-14 13:58:44 +08:00
|
|
|
case MachineOperand::MO_Register:
|
|
|
|
if (Register::isVirtualRegister(MO.getReg()))
|
|
|
|
return MRI.getVRegDef(MO.getReg())->getOpcode();
|
2019-12-10 03:54:09 +08:00
|
|
|
return MO.getReg();
|
2019-12-17 07:49:03 +08:00
|
|
|
case MachineOperand::MO_Immediate:
|
|
|
|
return MO.getImm();
|
|
|
|
case MachineOperand::MO_TargetIndex:
|
|
|
|
return MO.getOffset() | (MO.getTargetFlags() << 16);
|
2020-01-14 02:30:20 +08:00
|
|
|
case MachineOperand::MO_FrameIndex:
|
2020-03-10 12:31:01 +08:00
|
|
|
case MachineOperand::MO_ConstantPoolIndex:
|
2020-04-12 13:09:08 +08:00
|
|
|
case MachineOperand::MO_JumpTableIndex:
|
2020-01-14 02:30:20 +08:00
|
|
|
return llvm::hash_value(MO);
|
2019-12-14 13:58:44 +08:00
|
|
|
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
// We could explicitly handle all the types of the MachineOperand,
|
|
|
|
// here but we can just return a common number until we find a
|
|
|
|
// compelling test case where this is bad. The only side effect here
|
2019-12-10 03:54:09 +08:00
|
|
|
// is contributing to a hash collision but there's enough information
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
// (Opcodes,other registers etc) that this will likely not be a problem.
|
2019-12-14 13:58:44 +08:00
|
|
|
|
2019-12-17 07:49:03 +08:00
|
|
|
// TODO: Handle the following Index/ID/Predicate cases. They can
|
2019-12-14 13:58:44 +08:00
|
|
|
// be hashed on in a stable manner.
|
|
|
|
case MachineOperand::MO_CFIIndex:
|
|
|
|
case MachineOperand::MO_IntrinsicID:
|
|
|
|
case MachineOperand::MO_Predicate:
|
|
|
|
|
|
|
|
// In the cases below we havn't found a way to produce an artifact that will
|
|
|
|
// result in a stable hash, in most cases because they are pointers. We want
|
|
|
|
// stable hashes because we want the hash to be the same run to run.
|
|
|
|
case MachineOperand::MO_MachineBasicBlock:
|
|
|
|
case MachineOperand::MO_ExternalSymbol:
|
|
|
|
case MachineOperand::MO_GlobalAddress:
|
|
|
|
case MachineOperand::MO_BlockAddress:
|
|
|
|
case MachineOperand::MO_RegisterMask:
|
|
|
|
case MachineOperand::MO_RegisterLiveOut:
|
|
|
|
case MachineOperand::MO_Metadata:
|
|
|
|
case MachineOperand::MO_MCSymbol:
|
|
|
|
case MachineOperand::MO_ShuffleMask:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
llvm_unreachable("Unexpected MachineOperandType.");
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
};
|
2019-12-10 03:54:09 +08:00
|
|
|
|
2019-12-10 17:40:36 +08:00
|
|
|
SmallVector<unsigned, 16> MIOperands = {MI.getOpcode(), MI.getFlags()};
|
2019-12-10 03:54:09 +08:00
|
|
|
llvm::transform(MI.uses(), std::back_inserter(MIOperands), GetHashableMO);
|
|
|
|
|
2019-12-11 14:39:17 +08:00
|
|
|
for (const auto *Op : MI.memoperands()) {
|
|
|
|
MIOperands.push_back((unsigned)Op->getSize());
|
|
|
|
MIOperands.push_back((unsigned)Op->getFlags());
|
|
|
|
MIOperands.push_back((unsigned)Op->getOffset());
|
|
|
|
MIOperands.push_back((unsigned)Op->getOrdering());
|
|
|
|
MIOperands.push_back((unsigned)Op->getAddrSpace());
|
|
|
|
MIOperands.push_back((unsigned)Op->getSyncScopeID());
|
[Alignment][NFC] MachineMemOperand::getAlign/getBaseAlign
Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790
Reviewers: courbet
Subscribers: arsenm, dschuff, sdardis, nemanjai, jvesely, nhaehnle, sbc100, jgravelle-google, hiraditya, aheejin, kbarton, jrtc27, atanasyan, jfb, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D76925
2020-03-27 21:51:59 +08:00
|
|
|
MIOperands.push_back((unsigned)Op->getBaseAlign().value());
|
2019-12-11 14:39:17 +08:00
|
|
|
MIOperands.push_back((unsigned)Op->getFailureOrdering());
|
|
|
|
}
|
|
|
|
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
auto HashMI = hash_combine_range(MIOperands.begin(), MIOperands.end());
|
|
|
|
return std::to_string(HashMI).substr(0, 5);
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned VRegRenamer::createVirtualRegister(unsigned VReg) {
|
2019-12-10 03:54:09 +08:00
|
|
|
assert(Register::isVirtualRegister(VReg) && "Expected Virtual Registers");
|
|
|
|
std::string Name = getInstructionOpcodeHash(*MRI.getVRegDef(VReg));
|
|
|
|
return createVirtualRegisterWithLowerName(VReg, Name);
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
}
|
2019-09-05 05:29:10 +08:00
|
|
|
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
bool VRegRenamer::renameInstsInMBB(MachineBasicBlock *MBB) {
|
|
|
|
std::vector<NamedVReg> VRegs;
|
2019-12-12 16:27:47 +08:00
|
|
|
std::string Prefix = "bb" + std::to_string(CurrentBBNumber) + "_";
|
2019-12-10 03:54:09 +08:00
|
|
|
for (MachineInstr &Candidate : *MBB) {
|
[MirNamer][Canonicalizer]: Perform instruction semantic based renaming
https://reviews.llvm.org/D70210
Previously:
Due to sensitivity of the algorithm with gaps, and extra instructions,
when diffing, often we see naming being off by a few. Makes the diff
unreadable even for tests with 7 and 8 instructions respectively.
Naming can change depending on candidates (and order of picking
candidates). Suddenly if there's one extra instruction somewhere, the
entire subtree would be named completely differently.
No consistent naming of similar instructions which occur in different
functions. If we try to do something like count the frequency
distribution of various differences across suite, then the above
sensitivity issues are going to result in poor results.
Instead:
Name instruction based on semantics of the instruction (hash of the
opcode and operands). Essentially for a given instruction that occurs in
any module/function it'll be named similarly (ie semantic). This has
some nice properties
Can easily look at many instructions and just check the hash and if
they're named similarly, then it's the same instruction. Makes it very
easy to spot the same instruction both multiple times, as well as across
many functions (useful for frequency distribution).
Independent of traversal/candidates/depth of graph. No need to keep
track of last index/gaps/skip count etc.
No off by few issues with diffs. I've tried the old vs new
implementation in files ranging from 30 to 700 instructions. In both
cases with the old algorithm, diffs are a sea of red, where as for the
semantic version, in both cases, the diffs line up beautifully.
Simplified implementation of the main loop (simple iteration) , no keep
track of what's visited and not.
Handle collision just by incrementing a counter. Roughly
bb[N]_hash_[CollisionCount].
Additionally with the new implementation, we can probably avoid doing
the hoisting of instructions to various places, as they'll likely be
named the same resulting in differences only based on collision (ie
regardless of whether the instruction is hoisted or not/close to use or
not, it'll be named the same hash which should result in use of the
instruction be identical with the only change being the collision count)
which is very easy to spot visually.
2019-11-16 00:23:32 +08:00
|
|
|
// Don't rename stores/branches.
|
|
|
|
if (Candidate.mayStore() || Candidate.isBranch())
|
|
|
|
continue;
|
|
|
|
if (!Candidate.getNumOperands())
|
|
|
|
continue;
|
|
|
|
// Look for instructions that define VRegs in operand 0.
|
|
|
|
MachineOperand &MO = Candidate.getOperand(0);
|
|
|
|
// Avoid non regs, instructions defining physical regs.
|
|
|
|
if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
|
|
|
|
continue;
|
|
|
|
VRegs.push_back(
|
|
|
|
NamedVReg(MO.getReg(), Prefix + getInstructionOpcodeHash(Candidate)));
|
2019-09-05 05:29:10 +08:00
|
|
|
}
|
|
|
|
|
2019-12-10 03:54:09 +08:00
|
|
|
return VRegs.size() ? doVRegRenaming(getVRegRenameMap(VRegs)) : false;
|
2019-09-05 05:29:10 +08:00
|
|
|
}
|
|
|
|
|
2019-12-10 03:54:09 +08:00
|
|
|
unsigned VRegRenamer::createVirtualRegisterWithLowerName(unsigned VReg,
|
|
|
|
StringRef Name) {
|
|
|
|
std::string LowerName = Name.lower();
|
|
|
|
const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg);
|
|
|
|
return RC ? MRI.createVirtualRegister(RC, LowerName)
|
|
|
|
: MRI.createGenericVirtualRegister(MRI.getType(VReg), LowerName);
|
2019-09-05 05:29:10 +08:00
|
|
|
}
|