diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index a155762cd4a7..28784f0aa31e 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -48,10 +48,15 @@ static cl::opt PreserveTiedOps("hexbit-keep-tied", cl::Hidden, cl::init(true), cl::desc("Preserve subregisters in tied operands")); static cl::opt GenExtract("hexbit-extract", cl::Hidden, cl::init(true), cl::desc("Generate extract instructions")); +static cl::opt GenBitSplit("hexbit-bitsplit", cl::Hidden, + cl::init(true), cl::desc("Generate bitsplit instructions")); static cl::opt MaxExtract("hexbit-max-extract", cl::Hidden, cl::init(UINT_MAX)); static unsigned CountExtract = 0; +static cl::opt MaxBitSplit("hexbit-max-bitsplit", cl::Hidden, + cl::init(UINT_MAX)); +static unsigned CountBitSplit = 0; namespace llvm { @@ -1769,6 +1774,8 @@ namespace { const BitTracker::RegisterCell &RC); bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC); + bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC, const RegisterSet &AVs); bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC); bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, @@ -2155,6 +2162,115 @@ bool BitSimplification::genExtractLow(MachineInstr *MI, return false; } +bool BitSimplification::genBitSplit(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, + const RegisterSet &AVs) { + if (!GenBitSplit) + return false; + if (CountBitSplit >= MaxBitSplit) + return false; + + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::A4_bitsplit: + case Hexagon::A4_bitspliti: + return false; + } + + unsigned W = RC.width(); + if (W != 32) + return false; + + auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned { + unsigned Z = C.width(); + while (Z > 0 && C[Z-1].is(0)) + --Z; + return C.width() - Z; + }; + + // Count the number of leading zeros in the target RC. + unsigned Z = ctlz(RC); + if (Z == 0 || Z == W) + return false; + + // A simplistic analysis: assume the source register (the one being split) + // is fully unknown, and that all its bits are self-references. + const BitTracker::BitValue &B0 = RC[0]; + if (B0.Type != BitTracker::BitValue::Ref) + return false; + + unsigned SrcR = B0.RefI.Reg; + unsigned SrcSR = 0; + unsigned Pos = B0.RefI.Pos; + + // All the non-zero bits should be consecutive bits from the same register. + for (unsigned i = 1; i < W-Z; ++i) { + const BitTracker::BitValue &V = RC[i]; + if (V.Type != BitTracker::BitValue::Ref) + return false; + if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i) + return false; + } + + // Now, find the other bitfield among AVs. + for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) { + // The number of leading zeros here should be the number of trailing + // non-zeros in RC. + const BitTracker::RegisterCell &SC = BT.lookup(S); + if (SC.width() != W || ctlz(SC) != W-Z) + continue; + // The Z lower bits should now match SrcR. + const BitTracker::BitValue &S0 = SC[0]; + if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR) + continue; + unsigned P = S0.RefI.Pos; + + if (Pos <= P && (Pos + W-Z) != P) + continue; + if (P < Pos && (P + Z) != Pos) + continue; + // The starting bitfield position must be at a subregister boundary. + if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32) + continue; + + unsigned I; + for (I = 1; I < Z; ++I) { + const BitTracker::BitValue &V = SC[I]; + if (V.Type != BitTracker::BitValue::Ref) + break; + if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I) + break; + } + if (I != Z) + continue; + + // Generate bitsplit where S is defined. + CountBitSplit++; + MachineInstr *DefS = MRI.getVRegDef(S); + assert(DefS != nullptr); + DebugLoc DL = DefS->getDebugLoc(); + MachineBasicBlock &B = *DefS->getParent(); + auto At = MI->isPHI() ? B.getFirstNonPHI() + : MachineBasicBlock::iterator(DefS); + if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID) + SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo; + unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); + BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR) + .addReg(SrcR, 0, SrcSR) + .addImm(Pos <= P ? W-Z : Z); + if (Pos <= P) { + HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI); + HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI); + } else { + HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI); + HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI); + } + return true; + } + + return false; +} + // Check for tstbit simplification opportunity, where the bit being checked // can be tracked back to another register. For example: // vreg2 = S2_lsr_i_r vreg1, 5 @@ -2451,7 +2567,8 @@ bool BitSimplification::processBlock(MachineBasicBlock &B, } if (FRC->getID() == Hexagon::IntRegsRegClassID) { - bool T = simplifyExtractLow(MI, RD, RC, AVB); + bool T = genBitSplit(MI, RD, RC, AVB); + T = T || simplifyExtractLow(MI, RD, RC, AVB); T = T || genExtractHalf(MI, RD, RC); T = T || genCombineHalf(MI, RD, RC); T = T || genExtractLow(MI, RD, RC); diff --git a/llvm/test/CodeGen/Hexagon/bit-bitsplit-src.ll b/llvm/test/CodeGen/Hexagon/bit-bitsplit-src.ll new file mode 100644 index 000000000000..2d1c71c709f4 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bit-bitsplit-src.ll @@ -0,0 +1,35 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; REQUIRES: asserts + +; This used to crash. Check for some sane output. +; CHECK: call printf + +target triple = "hexagon" + +@g0 = external local_unnamed_addr global [4 x i64], align 8 +@g1 = external hidden unnamed_addr constant [29 x i8], align 1 +@g2 = external hidden unnamed_addr constant [29 x i8], align 1 + +define void @fred() local_unnamed_addr #0 { +b0: + %v1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @g0, i32 0, i32 0), align 8 + %v2 = trunc i64 %v1 to i32 + %v3 = lshr i64 %v1, 16 + %v4 = trunc i64 %v3 to i32 + %v5 = and i32 %v4, 255 + %v6 = add nuw nsw i32 0, %v5 + %v7 = add nuw nsw i32 %v6, 0 + %v8 = zext i32 %v7 to i64 + %v9 = and i32 %v2, 65535 + %v10 = and i32 %v4, 65535 + %v11 = add nuw nsw i32 %v10, %v9 + %v12 = zext i32 %v11 to i64 + tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @g1, i32 0, i32 0), i64 %v8) #0 + tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @g2, i32 0, i32 0), i64 %v12) #0 + ret void +} + +; Function Attrs: nounwind +declare void @printf(i8* nocapture readonly, ...) local_unnamed_addr #0 + +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } diff --git a/llvm/test/CodeGen/Hexagon/bit-bitsplit.ll b/llvm/test/CodeGen/Hexagon/bit-bitsplit.ll new file mode 100644 index 000000000000..4ae2e4e66508 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bit-bitsplit.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: bitsplit(r{{[0-9]+}},#5) + +target triple = "hexagon" + +define i32 @fred(i32 %a, i32* nocapture readonly %b) local_unnamed_addr #0 { +entry: + %and = and i32 %a, 31 + %shr = lshr i32 %a, 5 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %shr + %0 = load i32, i32* %arrayidx, align 4 + %shr1 = lshr i32 %0, %and + %and2 = and i32 %shr1, 1 + ret i32 %and2 +} + +attributes #0 = { norecurse nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double" }