[Hexagon] Generate bitsplit instruction

llvm-svn: 297239
This commit is contained in:
Krzysztof Parzyszek 2017-03-07 23:08:35 +00:00
parent 05641ca556
commit 8e4d2e0512
3 changed files with 170 additions and 1 deletions

View File

@ -48,10 +48,15 @@ static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,
cl::init(true), cl::desc("Preserve subregisters in tied operands"));
static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden,
cl::init(true), cl::desc("Generate extract instructions"));
static cl::opt<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden,
cl::init(true), cl::desc("Generate bitsplit instructions"));
static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden,
cl::init(UINT_MAX));
static unsigned CountExtract = 0;
static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden,
cl::init(UINT_MAX));
static unsigned CountBitSplit = 0;
namespace llvm {
@ -1769,6 +1774,8 @@ namespace {
const BitTracker::RegisterCell &RC);
bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
const BitTracker::RegisterCell &RC);
bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD,
const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
const BitTracker::RegisterCell &RC);
bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
@ -2155,6 +2162,115 @@ bool BitSimplification::genExtractLow(MachineInstr *MI,
return false;
}
bool BitSimplification::genBitSplit(MachineInstr *MI,
BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,
const RegisterSet &AVs) {
if (!GenBitSplit)
return false;
if (CountBitSplit >= MaxBitSplit)
return false;
unsigned Opc = MI->getOpcode();
switch (Opc) {
case Hexagon::A4_bitsplit:
case Hexagon::A4_bitspliti:
return false;
}
unsigned W = RC.width();
if (W != 32)
return false;
auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned {
unsigned Z = C.width();
while (Z > 0 && C[Z-1].is(0))
--Z;
return C.width() - Z;
};
// Count the number of leading zeros in the target RC.
unsigned Z = ctlz(RC);
if (Z == 0 || Z == W)
return false;
// A simplistic analysis: assume the source register (the one being split)
// is fully unknown, and that all its bits are self-references.
const BitTracker::BitValue &B0 = RC[0];
if (B0.Type != BitTracker::BitValue::Ref)
return false;
unsigned SrcR = B0.RefI.Reg;
unsigned SrcSR = 0;
unsigned Pos = B0.RefI.Pos;
// All the non-zero bits should be consecutive bits from the same register.
for (unsigned i = 1; i < W-Z; ++i) {
const BitTracker::BitValue &V = RC[i];
if (V.Type != BitTracker::BitValue::Ref)
return false;
if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i)
return false;
}
// Now, find the other bitfield among AVs.
for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
// The number of leading zeros here should be the number of trailing
// non-zeros in RC.
const BitTracker::RegisterCell &SC = BT.lookup(S);
if (SC.width() != W || ctlz(SC) != W-Z)
continue;
// The Z lower bits should now match SrcR.
const BitTracker::BitValue &S0 = SC[0];
if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR)
continue;
unsigned P = S0.RefI.Pos;
if (Pos <= P && (Pos + W-Z) != P)
continue;
if (P < Pos && (P + Z) != Pos)
continue;
// The starting bitfield position must be at a subregister boundary.
if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32)
continue;
unsigned I;
for (I = 1; I < Z; ++I) {
const BitTracker::BitValue &V = SC[I];
if (V.Type != BitTracker::BitValue::Ref)
break;
if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I)
break;
}
if (I != Z)
continue;
// Generate bitsplit where S is defined.
CountBitSplit++;
MachineInstr *DefS = MRI.getVRegDef(S);
assert(DefS != nullptr);
DebugLoc DL = DefS->getDebugLoc();
MachineBasicBlock &B = *DefS->getParent();
auto At = MI->isPHI() ? B.getFirstNonPHI()
: MachineBasicBlock::iterator(DefS);
if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID)
SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo;
unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR)
.addReg(SrcR, 0, SrcSR)
.addImm(Pos <= P ? W-Z : Z);
if (Pos <= P) {
HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI);
HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI);
} else {
HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI);
HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI);
}
return true;
}
return false;
}
// Check for tstbit simplification opportunity, where the bit being checked
// can be tracked back to another register. For example:
// vreg2 = S2_lsr_i_r vreg1, 5
@ -2451,7 +2567,8 @@ bool BitSimplification::processBlock(MachineBasicBlock &B,
}
if (FRC->getID() == Hexagon::IntRegsRegClassID) {
bool T = simplifyExtractLow(MI, RD, RC, AVB);
bool T = genBitSplit(MI, RD, RC, AVB);
T = T || simplifyExtractLow(MI, RD, RC, AVB);
T = T || genExtractHalf(MI, RD, RC);
T = T || genCombineHalf(MI, RD, RC);
T = T || genExtractLow(MI, RD, RC);

View File

@ -0,0 +1,35 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; REQUIRES: asserts
; This used to crash. Check for some sane output.
; CHECK: call printf
target triple = "hexagon"
@g0 = external local_unnamed_addr global [4 x i64], align 8
@g1 = external hidden unnamed_addr constant [29 x i8], align 1
@g2 = external hidden unnamed_addr constant [29 x i8], align 1
define void @fred() local_unnamed_addr #0 {
b0:
%v1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @g0, i32 0, i32 0), align 8
%v2 = trunc i64 %v1 to i32
%v3 = lshr i64 %v1, 16
%v4 = trunc i64 %v3 to i32
%v5 = and i32 %v4, 255
%v6 = add nuw nsw i32 0, %v5
%v7 = add nuw nsw i32 %v6, 0
%v8 = zext i32 %v7 to i64
%v9 = and i32 %v2, 65535
%v10 = and i32 %v4, 65535
%v11 = add nuw nsw i32 %v10, %v9
%v12 = zext i32 %v11 to i64
tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @g1, i32 0, i32 0), i64 %v8) #0
tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @g2, i32 0, i32 0), i64 %v12) #0
ret void
}
; Function Attrs: nounwind
declare void @printf(i8* nocapture readonly, ...) local_unnamed_addr #0
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" }

View File

@ -0,0 +1,17 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: bitsplit(r{{[0-9]+}},#5)
target triple = "hexagon"
define i32 @fred(i32 %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
entry:
%and = and i32 %a, 31
%shr = lshr i32 %a, 5
%arrayidx = getelementptr inbounds i32, i32* %b, i32 %shr
%0 = load i32, i32* %arrayidx, align 4
%shr1 = lshr i32 %0, %and
%and2 = and i32 %shr1, 1
ret i32 %and2
}
attributes #0 = { norecurse nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double" }