[Hexagon] v67+ HVX register pairs should support either direction

Assembler now permits pairs like 'v0:1', which are encoded
differently from the odd-first pairs like 'v1:0'.

The compiler will require more work to leverage these new register
pairs.
This commit is contained in:
Brian Cain 2018-08-14 16:17:46 -05:00 committed by Brian Cain
parent 705306526b
commit bf3b86bc2f
16 changed files with 407 additions and 98 deletions

View File

@ -1294,9 +1294,28 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
SMLoc IDLoc) {
MCContext &Context = getParser().getContext();
const MCRegisterInfo *RI = getContext().getRegisterInfo();
std::string r = "r";
std::string v = "v";
std::string Colon = ":";
const std::string r = "r";
const std::string v = "v";
const std::string Colon = ":";
using RegPairVals = std::pair<unsigned, unsigned>;
auto GetRegPair = [this, r](RegPairVals RegPair) {
const std::string R1 = r + utostr(RegPair.first);
const std::string R2 = r + utostr(RegPair.second);
return std::make_pair(matchRegister(R1), matchRegister(R2));
};
auto GetScalarRegs = [RI, GetRegPair](unsigned RegPair) {
const unsigned Lower = RI->getEncodingValue(RegPair);
const RegPairVals RegPair_ = std::make_pair(Lower + 1, Lower);
return GetRegPair(RegPair_);
};
auto GetVecRegs = [GetRegPair](unsigned VecRegPair) {
const RegPairVals RegPair =
HexagonMCInstrInfo::GetVecRegPairIndices(VecRegPair);
return GetRegPair(RegPair);
};
bool is32bit = false; // used to distinguish between CONST32 and CONST64
switch (Inst.getOpcode()) {
@ -1388,14 +1407,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
// Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
case Hexagon::A2_tfrp: {
MCOperand &MO = Inst.getOperand(1);
unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
std::string R1 = r + utostr(RegPairNum + 1);
StringRef Reg1(R1);
MO.setReg(matchRegister(Reg1));
// Add a new operand for the second register in the pair.
std::string R2 = r + utostr(RegPairNum);
StringRef Reg2(R2);
Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
const std::pair<unsigned, unsigned> RegPair = GetScalarRegs(MO.getReg());
MO.setReg(RegPair.first);
Inst.addOperand(MCOperand::createReg(RegPair.second));
Inst.setOpcode(Hexagon::A2_combinew);
break;
}
@ -1403,14 +1417,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
case Hexagon::A2_tfrpt:
case Hexagon::A2_tfrpf: {
MCOperand &MO = Inst.getOperand(2);
unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
std::string R1 = r + utostr(RegPairNum + 1);
StringRef Reg1(R1);
MO.setReg(matchRegister(Reg1));
// Add a new operand for the second register in the pair.
std::string R2 = r + utostr(RegPairNum);
StringRef Reg2(R2);
Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
const std::pair<unsigned, unsigned> RegPair = GetScalarRegs(MO.getReg());
MO.setReg(RegPair.first);
Inst.addOperand(MCOperand::createReg(RegPair.second));
Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
? Hexagon::C2_ccombinewt
: Hexagon::C2_ccombinewf);
@ -1419,14 +1428,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
case Hexagon::A2_tfrptnew:
case Hexagon::A2_tfrpfnew: {
MCOperand &MO = Inst.getOperand(2);
unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
std::string R1 = r + utostr(RegPairNum + 1);
StringRef Reg1(R1);
MO.setReg(matchRegister(Reg1));
// Add a new operand for the second register in the pair.
std::string R2 = r + utostr(RegPairNum);
StringRef Reg2(R2);
Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
const std::pair<unsigned, unsigned> RegPair = GetScalarRegs(MO.getReg());
MO.setReg(RegPair.first);
Inst.addOperand(MCOperand::createReg(RegPair.second));
Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
? Hexagon::C2_ccombinewnewt
: Hexagon::C2_ccombinewnewf);
@ -1436,12 +1440,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
// Translate a "$Vdd = $Vss" to "$Vdd = vcombine($Vs, $Vt)"
case Hexagon::V6_vassignp: {
MCOperand &MO = Inst.getOperand(1);
unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
std::string R1 = v + utostr(RegPairNum + 1);
MO.setReg(MatchRegisterName(R1));
// Add a new operand for the second register in the pair.
std::string R2 = v + utostr(RegPairNum);
Inst.addOperand(MCOperand::createReg(MatchRegisterName(R2)));
const std::pair<unsigned, unsigned> RegPair = GetVecRegs(MO.getReg());
MO.setReg(RegPair.first);
Inst.addOperand(MCOperand::createReg(RegPair.second));
Inst.setOpcode(Hexagon::V6_vcombine);
break;
}

View File

@ -498,9 +498,13 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
} else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) {
unsigned Producer =
HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg();
if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0;
else if (SubregBit)
if (HexagonMCInstrInfo::IsVecRegPair(Producer)) {
const bool Rev = HexagonMCInstrInfo::IsReverseVecRegPair(Producer);
const unsigned ProdPairIndex =
Rev ? Producer - Hexagon::WR0 : Producer - Hexagon::W0;
Producer = (ProdPairIndex << 1) + SubregBit + Hexagon::V0;
} else if (SubregBit)
// Hexagon PRM 10.11 New-value operands
// Nt[0] is reserved and should always be encoded as zero.
return MCDisassembler::Fail;
@ -606,12 +610,16 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
static const MCPhysReg HvxWRDecoderTable[] = {
Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3,
Hexagon::W4, Hexagon::W5, Hexagon::W6, Hexagon::W7,
Hexagon::W8, Hexagon::W9, Hexagon::W10, Hexagon::W11,
Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15};
Hexagon::W0, Hexagon::WR0, Hexagon::W1, Hexagon::WR1, Hexagon::W2,
Hexagon::WR2, Hexagon::W3, Hexagon::WR3, Hexagon::W4, Hexagon::WR4,
Hexagon::W5, Hexagon::WR5, Hexagon::W6, Hexagon::WR6, Hexagon::W7,
Hexagon::WR7, Hexagon::W8, Hexagon::WR8, Hexagon::W9, Hexagon::WR9,
Hexagon::W10, Hexagon::WR10, Hexagon::W11, Hexagon::WR11, Hexagon::W12,
Hexagon::WR12, Hexagon::W13, Hexagon::WR13, Hexagon::W14, Hexagon::WR14,
Hexagon::W15, Hexagon::WR15,
};
return (DecodeRegisterClass(Inst, RegNo >> 1, HvxWRDecoderTable));
return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable);
}
LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily.

View File

@ -172,6 +172,13 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
Reserved.set(Hexagon::C8);
Reserved.set(Hexagon::USR_OVF);
// Leveraging these registers will require more work to recognize
// the new semantics posed, Hi/LoVec patterns, etc.
// Note well: if enabled, they should be restricted to only
// where `HST.useHVXOps() && HST.hasV67Ops()` is true.
for (auto Reg : Hexagon_MC::GetVectRegRev())
Reserved.set(Reg);
if (MF.getSubtarget<HexagonSubtarget>().hasReservedR19())
Reserved.set(Hexagon::R19);

View File

@ -18,6 +18,12 @@ let Namespace = "Hexagon" in {
let HWEncoding{4-0} = num;
}
// These registers are used to preserve a distinction between
// vector register pairs of differing order.
class HexagonFakeReg<string n> : Register<n> {
let isArtificial = 1;
}
class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs,
list<string> alt = []> :
RegisterWithSubRegs<n, subregs> {
@ -30,6 +36,13 @@ let Namespace = "Hexagon" in {
class Ri<bits<5> num, string n, list<string> alt = []> :
HexagonReg<num, n, alt>;
// Rp - false/pseudo registers. These registers are used
// to provide a distinct set of aliases for both styles of vector
// register pairs without encountering subregister indexing constraints.
class R_fake<string n> :
HexagonFakeReg<n>;
// Rf - 32-bit floating-point registers.
class Rf<bits<5> num, string n> : HexagonReg<num, n>;
@ -81,6 +94,7 @@ let Namespace = "Hexagon" in {
def isub_hi : SubRegIndex<32, 32>;
def vsub_lo : SubRegIndex<512>;
def vsub_hi : SubRegIndex<512, 512>;
def vsub_fake: SubRegIndex<512>;
def wsub_lo : SubRegIndex<1024>;
def wsub_hi : SubRegIndex<1024, 1024>;
def subreg_overflow : SubRegIndex<1, 0>;
@ -183,27 +197,49 @@ let Namespace = "Hexagon" in {
foreach i = 0-31 in {
def V#i : Ri<i, "v"#i>, DwarfRegNum<[!add(i, 99)]>;
def VF#i : R_fake<"__"#!add(i,999999)>, DwarfRegNum<[!add(i, 999999)]>;
def VFR#i : R_fake<"__"#!add(i,9999999)>, DwarfRegNum<[!add(i, 9999999)]>;
}
def VTMP : Ri<0, "vtmp">, DwarfRegNum<[131]>;
// Aliases of the V* registers used to hold double vec values.
let SubRegIndices = [vsub_lo, vsub_hi], CoveredBySubRegs = 1 in {
def W0 : Rd< 0, "v1:0", [V0, V1]>, DwarfRegNum<[99]>;
def W1 : Rd< 2, "v3:2", [V2, V3]>, DwarfRegNum<[101]>;
def W2 : Rd< 4, "v5:4", [V4, V5]>, DwarfRegNum<[103]>;
def W3 : Rd< 6, "v7:6", [V6, V7]>, DwarfRegNum<[105]>;
def W4 : Rd< 8, "v9:8", [V8, V9]>, DwarfRegNum<[107]>;
def W5 : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>;
def W6 : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>;
def W7 : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>;
def W8 : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>;
def W9 : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>;
def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>;
def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>;
def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>;
def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>;
def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>;
def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>;
let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in {
def W0 : Rd< 0, "v1:0", [V0, V1, VF0]>, DwarfRegNum<[99]>;
def W1 : Rd< 2, "v3:2", [V2, V3, VF1]>, DwarfRegNum<[101]>;
def W2 : Rd< 4, "v5:4", [V4, V5, VF2]>, DwarfRegNum<[103]>;
def W3 : Rd< 6, "v7:6", [V6, V7, VF3]>, DwarfRegNum<[105]>;
def W4 : Rd< 8, "v9:8", [V8, V9, VF4]>, DwarfRegNum<[107]>;
def W5 : Rd<10, "v11:10", [V10, V11, VF5]>, DwarfRegNum<[109]>;
def W6 : Rd<12, "v13:12", [V12, V13, VF6]>, DwarfRegNum<[111]>;
def W7 : Rd<14, "v15:14", [V14, V15, VF7]>, DwarfRegNum<[113]>;
def W8 : Rd<16, "v17:16", [V16, V17, VF8]>, DwarfRegNum<[115]>;
def W9 : Rd<18, "v19:18", [V18, V19, VF9]>, DwarfRegNum<[117]>;
def W10 : Rd<20, "v21:20", [V20, V21, VF10]>, DwarfRegNum<[119]>;
def W11 : Rd<22, "v23:22", [V22, V23, VF11]>, DwarfRegNum<[121]>;
def W12 : Rd<24, "v25:24", [V24, V25, VF12]>, DwarfRegNum<[123]>;
def W13 : Rd<26, "v27:26", [V26, V27, VF13]>, DwarfRegNum<[125]>;
def W14 : Rd<28, "v29:28", [V28, V29, VF14]>, DwarfRegNum<[127]>;
def W15 : Rd<30, "v31:30", [V30, V31, VF15]>, DwarfRegNum<[129]>;
}
// Reverse Aliases of the V* registers used to hold double vec values.
let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in {
def WR0 : Rd< 1, "v0:1", [V0, V1, VFR0]>, DwarfRegNum<[161]>;
def WR1 : Rd< 3, "v2:3", [V2, V3, VFR1]>, DwarfRegNum<[162]>;
def WR2 : Rd< 5, "v4:5", [V4, V5, VFR2]>, DwarfRegNum<[163]>;
def WR3 : Rd< 7, "v6:7", [V6, V7, VFR3]>, DwarfRegNum<[164]>;
def WR4 : Rd< 9, "v8:9", [V8, V9, VFR4]>, DwarfRegNum<[165]>;
def WR5 : Rd<11, "v10:11", [V10, V11, VFR5]>, DwarfRegNum<[166]>;
def WR6 : Rd<13, "v12:13", [V12, V13, VFR6]>, DwarfRegNum<[167]>;
def WR7 : Rd<15, "v14:15", [V14, V15, VFR7]>, DwarfRegNum<[168]>;
def WR8 : Rd<17, "v16:17", [V16, V17, VFR8]>, DwarfRegNum<[169]>;
def WR9 : Rd<19, "v18:19", [V18, V19, VFR9]>, DwarfRegNum<[170]>;
def WR10: Rd<21, "v20:21", [V20, V21, VFR10]>, DwarfRegNum<[171]>;
def WR11: Rd<23, "v22:23", [V22, V23, VFR11]>, DwarfRegNum<[172]>;
def WR12: Rd<25, "v24:25", [V24, V25, VFR12]>, DwarfRegNum<[173]>;
def WR13: Rd<27, "v26:27", [V26, V27, VFR13]>, DwarfRegNum<[174]>;
def WR14: Rd<29, "v28:29", [V28, V29, VFR14]>, DwarfRegNum<[175]>;
def WR15: Rd<31, "v30:31", [V30, V31, VFR15]>, DwarfRegNum<[176]>;
}
// Aliases of the V* registers used to hold quad vec values.
@ -314,7 +350,7 @@ def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32], 512,
}
def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024,
(add (sequence "W%u", 0, 15))> {
(add (sequence "W%u", 0, 15), (sequence "WR%u", 0, 15))> {
let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
[RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>;
}
@ -365,6 +401,10 @@ def CtrRegs : RegisterClass<"Hexagon", [i32], 32,
FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI, UTIMERLO, UTIMERHI,
M0, M1, USR)>;
let Size = 64 in
def VectRegRev : RegisterClass<"Hexagon", [i64], 64,
(add (sequence "WR%u", 0, 15))>;
let isAllocatable = 0 in
def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>;

View File

@ -71,9 +71,10 @@ public:
char HexagonVectorPrint::ID = 0;
static bool isVecReg(unsigned Reg) {
return (Reg >= Hexagon::V0 && Reg <= Hexagon::V31)
|| (Reg >= Hexagon::W0 && Reg <= Hexagon::W15)
|| (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3);
return (Reg >= Hexagon::V0 && Reg <= Hexagon::V31) ||
(Reg >= Hexagon::W0 && Reg <= Hexagon::W15) ||
(Reg >= Hexagon::WR0 && Reg <= Hexagon::WR15) ||
(Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3);
}
static std::string getStringReg(unsigned R) {

View File

@ -81,6 +81,9 @@ void HexagonMCChecker::initReg(MCInst const &MCI, unsigned R, unsigned &PredReg,
if (!MCSubRegIterator(*SRI, &RI).isValid())
// Skip super-registers used indirectly.
Uses.insert(*SRI);
if (HexagonMCInstrInfo::IsReverseVecRegPair(R))
ReversePairs.insert(R);
}
void HexagonMCChecker::init(MCInst const &MCI) {
@ -133,6 +136,9 @@ void HexagonMCChecker::init(MCInst const &MCI) {
if (R == Hexagon::C8)
R = Hexagon::USR;
if (HexagonMCInstrInfo::IsReverseVecRegPair(R))
ReversePairs.insert(R);
// Note register definitions, direct ones as well as indirect side-effects.
// Super-registers are not tracked directly, but their components.
for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
@ -192,7 +198,7 @@ HexagonMCChecker::HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII,
MCSubtargetInfo const &STI, MCInst &mcb,
MCRegisterInfo const &ri, bool ReportErrors)
: Context(Context), MCB(mcb), RI(ri), MCII(MCII), STI(STI),
ReportErrors(ReportErrors) {
ReportErrors(ReportErrors), ReversePairs() {
init();
}
@ -200,7 +206,10 @@ HexagonMCChecker::HexagonMCChecker(HexagonMCChecker const &Other,
MCSubtargetInfo const &STI,
bool CopyReportErrors)
: Context(Other.Context), MCB(Other.MCB), RI(Other.RI), MCII(Other.MCII),
STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false) {}
STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false),
ReversePairs() {
init();
}
bool HexagonMCChecker::check(bool FullCheck) {
bool chkP = checkPredicates();
@ -218,8 +227,9 @@ bool HexagonMCChecker::check(bool FullCheck) {
bool chkAXOK = checkAXOK();
bool chkCofMax1 = checkCOFMax1();
bool chkHWLoop = checkHWLoop();
bool chkLegalVecRegPair = checkLegalVecRegPair();
bool chk = chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl &&
chkAXOK && chkCofMax1 && chkHWLoop;
chkAXOK && chkCofMax1 && chkHWLoop && chkLegalVecRegPair;
return chk;
}
@ -729,3 +739,16 @@ void HexagonMCChecker::reportWarning(Twine const &Msg) {
if (ReportErrors)
Context.reportWarning(MCB.getLoc(), Msg);
}
bool HexagonMCChecker::checkLegalVecRegPair() {
const bool IsPermitted = STI.getFeatureBits()[Hexagon::ArchV67];
const bool HasReversePairs = ReversePairs.size() != 0;
if (!IsPermitted && HasReversePairs) {
for (auto R : ReversePairs)
reportError("register pair `" + Twine(RI.getName(R)) +
"' is not permitted for this architecture");
return false;
}
return true;
}

View File

@ -72,6 +72,10 @@ class HexagonMCChecker {
using ReadOnlyIterator = std::set<unsigned>::iterator;
std::set<unsigned> ReadOnly;
// Contains the vector-pair-registers with the even number
// first ("v0:1", e.g.) used/def'd in this packet.
std::set<unsigned> ReversePairs;
void init();
void init(MCInst const &);
void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue);
@ -94,6 +98,7 @@ class HexagonMCChecker {
bool checkAXOK();
bool checkHWLoop();
bool checkCOFMax1();
bool checkLegalVecRegPair();
static void compoundRegisterMap(unsigned &);

View File

@ -391,15 +391,9 @@ void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
static bool RegisterMatches(unsigned Consumer, unsigned Producer,
unsigned Producer2) {
if (Consumer == Producer)
return true;
if (Consumer == Producer2)
return true;
// Calculate if we're a single vector consumer referencing a double producer
if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
if (Consumer >= Hexagon::V0 && Consumer <= Hexagon::V31)
return ((Consumer - Hexagon::V0) >> 1) == (Producer - Hexagon::W0);
return false;
return (Consumer == Producer) || (Consumer == Producer2) ||
HexagonMCInstrInfo::IsSingleConsumerRefPairProducer(Producer,
Consumer);
}
/// EncodeSingleInstruction - Emit a single
@ -735,7 +729,8 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
unsigned SOffset = 0;
unsigned VOffset = 0;
unsigned UseReg = MO.getReg();
unsigned DefReg1, DefReg2;
unsigned DefReg1 = Hexagon::NoRegister;
unsigned DefReg2 = Hexagon::NoRegister;
auto Instrs = HexagonMCInstrInfo::bundleInstructions(*State.Bundle);
const MCOperand *I = Instrs.begin() + State.Index - 1;
@ -746,7 +741,8 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
if (HexagonMCInstrInfo::isImmext(Inst))
continue;
DefReg1 = DefReg2 = 0;
DefReg1 = Hexagon::NoRegister;
DefReg2 = Hexagon::NoRegister;
++SOffset;
if (HexagonMCInstrInfo::isVector(MCII, Inst)) {
// Vector instructions don't count scalars.

View File

@ -676,6 +676,45 @@ bool HexagonMCInstrInfo::isOuterLoop(MCInst const &MCI) {
return (Flags & outerLoopMask) != 0;
}
bool HexagonMCInstrInfo::IsVecRegPair(unsigned VecReg) {
return (VecReg >= Hexagon::W0 && VecReg <= Hexagon::W15) ||
(VecReg >= Hexagon::WR0 && VecReg <= Hexagon::WR15);
}
bool HexagonMCInstrInfo::IsReverseVecRegPair(unsigned VecReg) {
return (VecReg >= Hexagon::WR0 && VecReg <= Hexagon::WR15);
}
bool HexagonMCInstrInfo::IsVecRegSingle(unsigned VecReg) {
return (VecReg >= Hexagon::V0 && VecReg <= Hexagon::V31);
}
std::pair<unsigned, unsigned>
HexagonMCInstrInfo::GetVecRegPairIndices(unsigned VecRegPair) {
assert(IsVecRegPair(VecRegPair) &&
"VecRegPair must be a vector register pair");
const bool IsRev = IsReverseVecRegPair(VecRegPair);
const unsigned PairIndex =
2 * (IsRev ? VecRegPair - Hexagon::WR0 : VecRegPair - Hexagon::W0);
return IsRev ? std::make_pair(PairIndex, PairIndex + 1)
: std::make_pair(PairIndex + 1, PairIndex);
}
bool HexagonMCInstrInfo::IsSingleConsumerRefPairProducer(unsigned Producer,
unsigned Consumer) {
if (IsVecRegPair(Producer) && IsVecRegSingle(Consumer)) {
const unsigned ProdPairIndex = IsReverseVecRegPair(Producer)
? Producer - Hexagon::WR0
: Producer - Hexagon::W0;
const unsigned ConsumerSingleIndex = (Consumer - Hexagon::V0) >> 1;
return ConsumerSingleIndex == ProdPairIndex;
}
return false;
}
bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@ -971,9 +1010,8 @@ unsigned HexagonMCInstrInfo::SubregisterBit(unsigned Consumer,
unsigned Producer2) {
// If we're a single vector consumer of a double producer, set subreg bit
// based on if we're accessing the lower or upper register component
if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
if (Consumer >= Hexagon::V0 && Consumer <= Hexagon::V31)
return (Consumer - Hexagon::V0) & 0x1;
if (IsVecRegPair(Producer) && IsVecRegSingle(Consumer))
return (Consumer - Hexagon::V0) & 0x1;
if (Producer2 != Hexagon::NoRegister)
return Consumer == Producer;
return 0;

View File

@ -351,6 +351,16 @@ bool subInstWouldBeExtended(MCInst const &potentialDuplex);
unsigned SubregisterBit(unsigned Consumer, unsigned Producer,
unsigned Producer2);
bool IsVecRegSingle(unsigned VecReg);
bool IsVecRegPair(unsigned VecReg);
bool IsReverseVecRegPair(unsigned VecReg);
bool IsSingleConsumerRefPairProducer(unsigned Producer, unsigned Consumer);
/// Returns an ordered pair of the constituent register ordinals for
/// each of the elements of \a VecRegPair. For example, Hexagon::W0 ("v0:1")
/// returns { 0, 1 } and Hexagon::W1 ("v3:2") returns { 3, 2 }.
std::pair<unsigned, unsigned> GetVecRegPairIndices(unsigned VecRegPair);
// Attempt to find and replace compound pairs
void tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCContext &Context, MCInst &MCI);

View File

@ -532,6 +532,8 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
return F->second;
}
llvm::ArrayRef<MCPhysReg> Hexagon_MC::GetVectRegRev() { return VectRegRev; }
namespace {
class HexagonMCInstrAnalysis : public MCInstrAnalysis {
public:

View File

@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include <cstdint>
#include <string>
@ -82,6 +83,8 @@ namespace Hexagon_MC {
void addArchSubtarget(MCSubtargetInfo const *STI,
StringRef FS);
unsigned GetELFFlags(const MCSubtargetInfo &STI);
llvm::ArrayRef<MCPhysReg> GetVectRegRev();
}
MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,

View File

@ -2,28 +2,11 @@
; We do not pipeline sigma yet, but the non-pipelined version
; with good scheduling is pretty fast. The compiler generates
; 19 packets, and the assembly version is 16.
; 18 packets, and the assembly version is 16.
; CHECK: loop0(.LBB0_[[LOOP:.]],
; CHECK: .LBB0_[[LOOP]]:
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK-COUNT-17: }
; CHECK: }{{[ \t]*}}:endloop
@g0 = external constant [10 x i16], align 128

View File

@ -0,0 +1,134 @@
;RUN: llc -march=hexagon -mcpu=hexagonv66 -mhvx -filetype=obj < %s -o - | llvm-objdump -mv66 -mhvx -d - | FileCheck --check-prefix=CHECK-V66 %s
;RUN: llc -march=hexagon -mcpu=hexagonv67 -mhvx -filetype=obj < %s -o - | llvm-objdump -mv67 -mhvx -d - | FileCheck --check-prefix=CHECK-V67 %s
; Should not attempt to use v<even>:<odd> 'reverse' vector regpairs
; on old or new arches (should not crash).
; CHECK-V66: vcombine
; CHECK-V67: vcombine
declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>)
declare <16 x i32> @llvm.hexagon.V6.vd0()
declare <32 x i32> @llvm.hexagon.V6.vmpybus(<16 x i32>, i32)
declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>)
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32 )
declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>)
declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32 )
declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>)
declare <16 x i32> @llvm.hexagon.V6.vmpyihb.acc(<16 x i32>, <16 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.vasrhubrndsat(<16 x i32>, <16 x i32>, i32)
declare <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32>, <16 x i32>)
declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32>, <16 x i32>)
define void @Gaussian7x7u8PerRow(i8* %src, i32 %stride, i32 %width, i8* %dst) #0 {
entry:
%mul = mul i32 %stride, 3
%idx.neg = sub i32 0, %mul
%add.ptr = getelementptr i8, i8* %src, i32 %idx.neg
bitcast i8* %add.ptr to <16 x i32>*
%mul1 = shl i32 %stride, 1
%idx.neg2 = sub i32 0, %mul1
%add.ptr3 = getelementptr i8, i8* %src, i32 %idx.neg2
bitcast i8* %add.ptr3 to <16 x i32>*
%idx.neg5 = sub i32 0, %stride
%add.ptr6 = getelementptr i8, i8* %src, i32 %idx.neg5
bitcast i8* %add.ptr6 to <16 x i32>*
bitcast i8* %src to <16 x i32>*
%add.ptr10 = getelementptr i8, i8* %src, i32 %stride
bitcast i8* %add.ptr10 to <16 x i32>*
%add.ptr12 = getelementptr i8, i8* %src, i32 %mul1
bitcast i8* %add.ptr12 to <16 x i32>*
%add.ptr14 = getelementptr i8, i8* %src, i32 %mul
bitcast i8* %add.ptr14 to <16 x i32>*
bitcast i8* %dst to <16 x i32>*
load <16 x i32>, <16 x i32>* %0load <16 x i32>, <16 x i32>* %1load <16 x i32>, <16 x i32>* %2load <16 x i32>, <16 x i32>* %3load <16 x i32>, <16 x i32>* %4load <16 x i32>, <16 x i32>* %5load <16 x i32>, <16 x i32>* %6call <16 x i32> @llvm.hexagon.V6.vd0()
call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %15, <16 x i32> %15)
call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %14, <16 x i32> %8)
call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %13, <16 x i32> %9)
call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %17, <32 x i32> %18, i32 101058054)
call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %12, <16 x i32> %10)
call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %19, <32 x i32> %20, i32 252645135)
call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %21, <16 x i32> %11, i32 336860180)
%cmp155 = icmp sgt i32 %width, 64
br i1 %cmp155, label %for.body.preheader, label %for.end
for.body.preheader: %incdec.ptr20 = getelementptr i8, i8* %add.ptr14%23 = bitcast i8* %incdec.ptr20 to <16 x i32>*
%incdec.ptr19 = getelementptr i8, i8* %add.ptr12%24 = bitcast i8* %incdec.ptr19 to <16 x i32>*
%incdec.ptr18 = getelementptr i8, i8* %add.ptr10%25 = bitcast i8* %incdec.ptr18 to <16 x i32>*
%incdec.ptr17 = getelementptr i8, i8* %src%26 = bitcast i8* %incdec.ptr17 to <16 x i32>*
%incdec.ptr16 = getelementptr i8, i8* %add.ptr6%27 = bitcast i8* %incdec.ptr16 to <16 x i32>*
%incdec.ptr15 = getelementptr i8, i8* %add.ptr3%28 = bitcast i8* %incdec.ptr15 to <16 x i32>*
%incdec.ptr = getelementptr i8, i8* %add.ptr%29 = bitcast i8* %incdec.ptr to <16 x i32>*
br label %for.body
for.body: %optr.0166 = phi <16 x i32>* [ %incdec.ptr28, %for.body ], [ %7, %for.body.preheader ]
%iptr6.0165 = phi <16 x i32>* [ %incdec.ptr27, %for.body ], [ %23, %for.body.preheader ]
%iptr5.0164 = phi <16 x i32>* [ %incdec.ptr26, %for.body ], [ %24, %for.body.preheader ]
%iptr4.0163 = phi <16 x i32>* [ %incdec.ptr25, %for.body ], [ %25, %for.body.preheader ]
%iptr3.0162 = phi <16 x i32>* [ %incdec.ptr24, %for.body ], [ %26, %for.body.preheader ]
%iptr2.0161 = phi <16 x i32>* [ %incdec.ptr23, %for.body ], [ %27, %for.body.preheader ]
%iptr1.0160 = phi <16 x i32>* [ %incdec.ptr22, %for.body ], [ %28, %for.body.preheader ]
%iptr0.0159 = phi <16 x i32>* [ %incdec.ptr21, %for.body ], [ %29, %for.body.preheader ]
%dXV1.0158 = phi <32 x i32> [ %49, %for.body ], [ %22, %for.body.preheader ]
%dXV0.0157 = phi <32 x i32> [ %dXV1.0158, %for.body ], [ %16, %for.body.preheader ]
%i.0156 = phi i32 [ %sub, %for.body ], [ %width, %for.body.preheader ]
%incdec.ptr21 = getelementptr <16 x i32>, <16 x i32>* %iptr0.0159%30 = load <16 x i32>, <16 x i32>* %iptr0.0159%incdec.ptr22 = getelementptr <16 x i32>, <16 x i32>* %iptr1.0160%31 = load <16 x i32>, <16 x i32>* %iptr1.0160%incdec.ptr23 = getelementptr <16 x i32>, <16 x i32>* %iptr2.0161%32 = load <16 x i32>, <16 x i32>* %iptr2.0161%incdec.ptr24 = getelementptr <16 x i32>, <16 x i32>* %iptr3.0162%33 = load <16 x i32>, <16 x i32>* %iptr3.0162%incdec.ptr25 = getelementptr <16 x i32>, <16 x i32>* %iptr4.0163%34 = load <16 x i32>, <16 x i32>* %iptr4.0163%incdec.ptr26 = getelementptr <16 x i32>, <16 x i32>* %iptr5.0164%35 = load <16 x i32>, <16 x i32>* %iptr5.0164%incdec.ptr27 = getelementptr <16 x i32>, <16 x i32>* %iptr6.0165%36 = load <16 x i32>, <16 x i32>* %iptr6.0165, !tbaa !8
call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV1.0158)
call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV0.0157)
call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 2)
call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV1.0158)
call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV0.0157)
call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %40, <16 x i32> %41, i32 2)
call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 4)
call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %36, <16 x i32> %30)
call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %35, <16 x i32> %31)
call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %44, <32 x i32> %45, i32 101058054)
call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %34, <16 x i32> %32)
call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %46, <32 x i32> %47, i32 252645135)
call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %48, <16 x i32> %33, i32 336860180)
call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %49)
call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 2)
call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %49)
call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %52, <16 x i32> %37, i32 2)
call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 4)
call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %37, <16 x i32> %39)
call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %55, <16 x i32> %40)
call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %56, i32 252972820)
call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %40)
call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %58, <16 x i32> %37)
call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %59, i32 252972820)
call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %43)
call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %42)
call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %61, <16 x i32> %62)
call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %57, <32 x i32> %63, i32 17170694)
call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %54, <16 x i32> %42)
call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %39)
call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %65, <16 x i32> %66)
call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %60, <32 x i32> %67, i32 17170694)
call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %64)
call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %64)
call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %69, <16 x i32> %70, i32 12)
call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %68)
call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %68)
call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %72, <16 x i32> %73, i32 12)
call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %74, <16 x i32> %71)
%incdec.ptr28 = getelementptr <16 x i32>, <16 x i32>* %1
store <16 x i32> %75, <16 x i32>* %optr.0166%sub = add i32 %i.0156, -64
%cmp = icmp sgt i32 %sub, 64
br i1 %cmp, label %for.body, label %for.end
for.end: ret void
}
declare <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32>, i32)
declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32>, <16 x i32>)
attributes #0 = { "correctly-rounded-divide-sqrt-fp-math""target-cpu"="hexagonv65" "target-features"="+hvx-length64b,+hvxv65,+v65,-long-calls" "unsafe-fp-math"}
!8 = !{!9, !9, i64 0}
!9 = !{!"omnipotent char", !10}
!10 = !{}
!14 = !{}
!19 = !{}
!24 = !{}

View File

@ -0,0 +1,15 @@
# RUN: not llvm-mc -arch=hexagon -mcpu=hexagonv67 -mhvx -filetype=asm %s 2>%t; FileCheck --implicit-check-not="error:" %s <%t
{
v1:0 = #0
v0:1 = #0
}
# CHECK: error: register `V1' modified more than once
## Unused .tmp:
{
v1.tmp = vmem(r0 + #3)
v0:1 = vaddw(v17:16, v17:16)
}
# CHECK: warning: register `V1' used with `.tmp' but not used in the same packet

View File

@ -0,0 +1,43 @@
# RUN: llvm-mc -filetype=obj -arch=hexagon -mcpu=hexagonv67 -mhvx %s | llvm-objdump -d -mcpu=hexagonv67 -mhvx - | FileCheck %s
# RUN: not llvm-mc -arch=hexagon -mcpu=hexagonv65 -mhvx -filetype=asm %s 2>%t; FileCheck --check-prefix=CHECK-V65 --implicit-check-not="error:" %s <%t
v1:0.w = vadd(v0.h, v1.h) // Normal
# CHECK: 1ca1c080
v0:1.w = vadd(v0.h, v1.h) // Swapped
# CHECK-NEXT: 1ca1c081
# CHECK-V65: error: register pair `WR0' is not permitted for this architecture
## Swapped use:
v1:0.w = vtmpy(v0:1.h,r0.b)
# CHECK-NEXT: 19a0c180
# CHECK-V65: error: register pair `WR0' is not permitted for this architecture
## Swapped def
v0:1 = v3:2
# CHECK-NEXT: 1f42c3e1 { v0:1 = vcombine(v3,v2) }
# CHECK-V65: error: register pair `WR0' is not permitted for this architecture
# Mapped instruction's swapped use:
v1:0 = v2:3
# CHECK-NEXT: v1:0 = vcombine(v2,v3)
## No error for v65, this is now permitted!
## .new producer from pair:
{
v0:1 = vaddw(v0:1, v0:1)
if (!p0) vmem(r0+#0)=v0.new
}
# CHECK-NEXT: v0:1.w = vadd(v0:1.w,v0:1.w)
# CHECK-NEXT: if (!p0) vmem(r0+#0) = v0.new
# CHECK-V65: error: register pair `WR0' is not permitted for this architecture
## Used .tmp, swapped use & def:
{
v0.tmp = vmem(r0 + #3)
v2:3 = vaddw(v0:1, v0:1)
}
# CHECK-NEXT: 1c6141c3 { v2:3.w = vadd(v0:1.w,v0:1.w)
# CHECK-NEXT: v0.tmp = vmem(r0+#3) }
# CHECK-V65: error: register pair `WR0' is not permitted for this architecture
# CHECK-V65: error: register pair `WR1' is not permitted for this architecture