forked from OSchip/llvm-project
parent
c3f36af8d0
commit
beaca19c7c
|
@ -58,7 +58,7 @@ A "little endian" layout has the least significant byte first (lowest in memory
|
|||
Big endian vector load using ``LDR``.
|
||||
|
||||
|
||||
A vector is a consecutive sequence of items that are operated on simultaneously. To load a 64-bit vector, 64 bits need to be read from memory. In little endian mode, we can do this by just performing a 64-bit load - ``LDR q0, [foo]``. However if we try this in big endian mode, because of the byte swapping the lane indices end up being swapped! The zero'th item as layed out in memory becomes the n'th lane in the vector.
|
||||
A vector is a consecutive sequence of items that are operated on simultaneously. To load a 64-bit vector, 64 bits need to be read from memory. In little endian mode, we can do this by just performing a 64-bit load - ``LDR q0, [foo]``. However if we try this in big endian mode, because of the byte swapping the lane indices end up being swapped! The zero'th item as laid out in memory becomes the n'th lane in the vector.
|
||||
|
||||
.. figure:: ARM-BE-ld1.png
|
||||
:align: right
|
||||
|
|
|
@ -97,7 +97,7 @@ Implementation: Getting from mass and scale to frequency
|
|||
========================================================
|
||||
|
||||
After analysing the complete series of DAGs, each block has a mass (local to
|
||||
its containing loop, if any), and each loop psuedo-node has a loop scale and
|
||||
its containing loop, if any), and each loop pseudo-node has a loop scale and
|
||||
its own mass (from its parent's DAG).
|
||||
|
||||
We can get an initial frequency assignment (with entry frequency of 1.0) by
|
||||
|
|
|
@ -178,7 +178,7 @@ in the following fashion:
|
|||
bl __chkstk
|
||||
sub.w sp, sp, r4
|
||||
|
||||
However, this has the limitation of 32 MiB (±16MiB). In order to accomodate
|
||||
However, this has the limitation of 32 MiB (±16MiB). In order to accommodate
|
||||
larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 4GiB
|
||||
range via a slight deviation. It will generate an indirect jump as follows:
|
||||
|
||||
|
|
|
@ -316,7 +316,7 @@ public:
|
|||
/// 2) This SCC will be the parent of any new SCCs. Thus, this SCC is
|
||||
/// preserved as the root of any new SCC directed graph formed.
|
||||
/// 3) No SCC other than this SCC has its member set changed (this is
|
||||
/// inherent in the definiton of removing such an edge).
|
||||
/// inherent in the definition of removing such an edge).
|
||||
/// 4) All of the parent links of the SCC graph will be updated to reflect
|
||||
/// the new SCC structure.
|
||||
/// 5) All SCCs formed out of this SCC, excluding this SCC, will be
|
||||
|
|
|
@ -423,7 +423,7 @@ void LazyCallGraph::SCC::internalDFS(
|
|||
continue;
|
||||
}
|
||||
|
||||
// Track the lowest link of the childen, if any are still in the stack.
|
||||
// Track the lowest link of the children, if any are still in the stack.
|
||||
// Any child not on the stack will have a LowLink of -1.
|
||||
assert(ChildN.LowLink != 0 &&
|
||||
"Low-link must not be zero with a non-zero DFS number.");
|
||||
|
@ -654,7 +654,7 @@ LazyCallGraph::SCC *LazyCallGraph::getNextSCCInPostOrder() {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Track the lowest link of the childen, if any are still in the stack.
|
||||
// Track the lowest link of the children, if any are still in the stack.
|
||||
assert(ChildN.LowLink != 0 &&
|
||||
"Low-link must not be zero with a non-zero DFS number.");
|
||||
if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink)
|
||||
|
|
|
@ -702,10 +702,10 @@ void ScheduleDAGInstrs::initSUnits() {
|
|||
|
||||
// If this SUnit uses a reserved or unbuffered resource, mark it as such.
|
||||
//
|
||||
// Reserved resources block an instruction from issueing and stall the
|
||||
// Reserved resources block an instruction from issuing and stall the
|
||||
// entire pipeline. These are identified by BufferSize=0.
|
||||
//
|
||||
// Unbuffered resources prevent execution of subsequeny instructions that
|
||||
// Unbuffered resources prevent execution of subsequent instructions that
|
||||
// require the same resources. This is used for in-order execution pipelines
|
||||
// within an out-of-order core. These are identified by BufferSize=1.
|
||||
if (SchedModel.hasInstrSchedModel()) {
|
||||
|
|
|
@ -789,7 +789,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
|
|||
case COFF::IMAGE_REL_ARM_MOV32A:
|
||||
// IMAGE_REL_ARM_BRANCH24, IMAGE_REL_ARM_BLX24, IMAGE_REL_ARM_MOV32A are
|
||||
// only used for ARM mode code, which is documented as being unsupported
|
||||
// by Windows on ARM. Emperical proof indicates that masm is able to
|
||||
// by Windows on ARM. Empirical proof indicates that masm is able to
|
||||
// generate the relocations however the rest of the MSVC toolchain is
|
||||
// unable to handle it.
|
||||
llvm_unreachable("unsupported relocation");
|
||||
|
|
|
@ -163,7 +163,7 @@ void ARM64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
|
|||
return;
|
||||
}
|
||||
|
||||
// Otherwise SBFX/UBFX is the prefered form
|
||||
// Otherwise SBFX/UBFX is the preferred form
|
||||
O << '\t' << (IsSigned ? "sbfx" : "ubfx") << '\t'
|
||||
<< getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
|
||||
<< ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1;
|
||||
|
@ -190,7 +190,7 @@ void ARM64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
|
|||
|
||||
int LSB = ImmR;
|
||||
int Width = ImmS - ImmR + 1;
|
||||
// Otherwise BFXIL the prefered form
|
||||
// Otherwise BFXIL the preferred form
|
||||
O << "\tbfxil\t"
|
||||
<< getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op2.getReg())
|
||||
<< ", #" << LSB << ", #" << Width;
|
||||
|
|
|
@ -7410,7 +7410,7 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
|
|||
// v4f32 or when copying a member from one v4f32 to another.
|
||||
// We also use it for transferring i32 from one register to another,
|
||||
// since it simply copies the same bits.
|
||||
// If we're transfering an i32 from memory to a specific element in a
|
||||
// If we're transferring an i32 from memory to a specific element in a
|
||||
// register, we output a generic DAG that will match the PINSRD
|
||||
// instruction.
|
||||
// TODO: Optimize for AVX cases too (VINSERTPS)
|
||||
|
|
|
@ -107,7 +107,7 @@ namespace {
|
|||
/// In real programs, a GEP index may be more complicated than a simple addition
|
||||
/// of something and a constant integer which can be trivially splitted. For
|
||||
/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
|
||||
/// constant offset, so that we can seperate the index to (a << 3) + b and 5.
|
||||
/// constant offset, so that we can separate the index to (a << 3) + b and 5.
|
||||
///
|
||||
/// Therefore, this class looks into the expression that computes a given GEP
|
||||
/// index, and tries to find a constant integer that can be hoisted to the
|
||||
|
@ -179,7 +179,7 @@ class ConstantOffsetExtractor {
|
|||
};
|
||||
|
||||
/// \brief A pass that tries to split every GEP in the function into a variadic
|
||||
/// base and a constant offset. It is a FuntionPass because searching for the
|
||||
/// base and a constant offset. It is a FunctionPass because searching for the
|
||||
/// constant offset may inspect other basic blocks.
|
||||
class SeparateConstOffsetFromGEP : public FunctionPass {
|
||||
public:
|
||||
|
|
|
@ -12,8 +12,8 @@ entry:
|
|||
; Mass = 1
|
||||
; Backedge mass = 1/3, exit mass = 2/3
|
||||
; Loop scale = 3/2
|
||||
; Psuedo-edges = exit
|
||||
; Psuedo-mass = 1
|
||||
; Pseudo-edges = exit
|
||||
; Pseudo-mass = 1
|
||||
; Frequency = 1*3/2*1 = 3/2
|
||||
; CHECK-NEXT: outer: float = 1.5,
|
||||
outer:
|
||||
|
@ -86,7 +86,7 @@ entry:
|
|||
; Backedge mass = 1/2, exit mass = 1/2
|
||||
; Loop scale = 2
|
||||
; Pseudo-edges = exit
|
||||
; Psuedo-mass = 1
|
||||
; Pseudo-mass = 1
|
||||
; Frequency = 1*2*1 = 2
|
||||
; CHECK-NEXT: outer: float = 2.0,
|
||||
outer:
|
||||
|
@ -98,8 +98,8 @@ outer:
|
|||
; Mass = 1
|
||||
; Backedge mass = 1/3, exit mass = 2/3
|
||||
; Loop scale = 3/2
|
||||
; Psuedo-edges = outer.inc
|
||||
; Psuedo-mass = 1/2
|
||||
; Pseudo-edges = outer.inc
|
||||
; Pseudo-mass = 1/2
|
||||
; Frequency = 2*1*3/2*1/2 = 3/2
|
||||
; CHECK-NEXT: middle: float = 1.5,
|
||||
middle:
|
||||
|
|
|
@ -867,7 +867,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
|
|||
}
|
||||
} else {
|
||||
// Assume all printable operands are desired for now. This can be
|
||||
// overridden in the InstAlias instantiation if neccessary.
|
||||
// overridden in the InstAlias instantiation if necessary.
|
||||
IAP->addOperand(ROName, i, PrintMethodIdx);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue