forked from OSchip/llvm-project
[AArch64] Improve orr+movk sequences for MOVi64imm.
The existing code has three different ways to try to lower a 64-bit immediate to the sequence ORR+MOVK. The result is messy: it misses some possible sequences, and the order of the checks means we sometimes emit two MOVKs when we only need one. Instead, just use a simple loop to try all possible two-instruction ORR+MOVK sequences. Differential Revision: https://reviews.llvm.org/D47176 llvm-svn: 333218
This commit is contained in:
parent
7d60b9052a
commit
9e177882aa
|
@ -66,6 +66,11 @@ private:
|
||||||
MachineBasicBlock::iterator &NextMBBI);
|
MachineBasicBlock::iterator &NextMBBI);
|
||||||
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||||
unsigned BitSize);
|
unsigned BitSize);
|
||||||
|
bool expandMOVImmSimple(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MBBI,
|
||||||
|
unsigned BitSize,
|
||||||
|
unsigned OneChunks,
|
||||||
|
unsigned ZeroChunks);
|
||||||
|
|
||||||
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||||
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
|
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
|
||||||
|
@ -107,57 +112,6 @@ static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
|
||||||
return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
|
return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper function which replicates a 16-bit chunk within a 64-bit
|
|
||||||
/// value. Indices correspond to element numbers in a v4i16.
|
|
||||||
static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
|
|
||||||
assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!");
|
|
||||||
const unsigned ShiftAmt = ToIdx * 16;
|
|
||||||
|
|
||||||
// Replicate the source chunk to the destination position.
|
|
||||||
const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt;
|
|
||||||
// Clear the destination chunk.
|
|
||||||
Imm &= ~(0xFFFFLL << ShiftAmt);
|
|
||||||
// Insert the replicated chunk.
|
|
||||||
return Imm | Chunk;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Helper function which tries to materialize a 64-bit value with an
|
|
||||||
/// ORR + MOVK instruction sequence.
|
|
||||||
static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
|
|
||||||
MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator &MBBI,
|
|
||||||
const AArch64InstrInfo *TII, unsigned ChunkIdx) {
|
|
||||||
assert(ChunkIdx < 4 && "Out of range chunk index specified!");
|
|
||||||
const unsigned ShiftAmt = ChunkIdx * 16;
|
|
||||||
|
|
||||||
uint64_t Encoding;
|
|
||||||
if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
|
|
||||||
// Create the ORR-immediate instruction.
|
|
||||||
MachineInstrBuilder MIB =
|
|
||||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
|
|
||||||
.add(MI.getOperand(0))
|
|
||||||
.addReg(AArch64::XZR)
|
|
||||||
.addImm(Encoding);
|
|
||||||
|
|
||||||
// Create the MOVK instruction.
|
|
||||||
const unsigned Imm16 = getChunk(UImm, ChunkIdx);
|
|
||||||
const unsigned DstReg = MI.getOperand(0).getReg();
|
|
||||||
const bool DstIsDead = MI.getOperand(0).isDead();
|
|
||||||
MachineInstrBuilder MIB1 =
|
|
||||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
|
|
||||||
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
|
||||||
.addReg(DstReg)
|
|
||||||
.addImm(Imm16)
|
|
||||||
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
|
|
||||||
|
|
||||||
transferImpOps(MI, MIB, MIB1);
|
|
||||||
MI.eraseFromParent();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the given 16-bit chunk replicated to full 64-bit width
|
/// Check whether the given 16-bit chunk replicated to full 64-bit width
|
||||||
/// can be materialized with an ORR instruction.
|
/// can be materialized with an ORR instruction.
|
||||||
static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
|
static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
|
||||||
|
@ -440,7 +394,22 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try a MOVI instruction (aka ORR-immediate with the zero register).
|
// Scan the immediate and count the number of 16-bit chunks which are either
|
||||||
|
// all ones or all zeros.
|
||||||
|
unsigned OneChunks = 0;
|
||||||
|
unsigned ZeroChunks = 0;
|
||||||
|
for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
|
||||||
|
const unsigned Chunk = (Imm >> Shift) & Mask;
|
||||||
|
if (Chunk == Mask)
|
||||||
|
OneChunks++;
|
||||||
|
else if (Chunk == 0)
|
||||||
|
ZeroChunks++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Prefer MOVZ/MOVN over ORR because of the rules for the "mov"
|
||||||
|
// alias.
|
||||||
|
|
||||||
|
// Try a single ORR.
|
||||||
uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
|
uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
|
||||||
uint64_t Encoding;
|
uint64_t Encoding;
|
||||||
if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
|
if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
|
||||||
|
@ -455,74 +424,69 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan the immediate and count the number of 16-bit chunks which are either
|
// Two instruction sequences.
|
||||||
// all ones or all zeros.
|
//
|
||||||
unsigned OneChunks = 0;
|
// Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
|
||||||
unsigned ZeroChunks = 0;
|
// fastest sequence with fast literal generation.
|
||||||
|
if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2)
|
||||||
|
return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
|
||||||
|
|
||||||
|
assert(BitSize == 64 && "All 32-bit immediates can be expanded with a"
|
||||||
|
"MOVZ/MOVK pair");
|
||||||
|
|
||||||
|
// Try other two-instruction sequences.
|
||||||
|
|
||||||
|
// 64-bit ORR followed by MOVK.
|
||||||
|
// We try to construct the ORR immediate in three different ways: either we
|
||||||
|
// zero out the chunk which will be replaced, we fill the chunk which will
|
||||||
|
// be replaced with ones, or we take the bit pattern from the other half of
|
||||||
|
// the 64-bit immediate. This is comprehensive because of the way ORR
|
||||||
|
// immediates are constructed.
|
||||||
for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
|
for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
|
||||||
const unsigned Chunk = (Imm >> Shift) & Mask;
|
uint64_t ShiftedMask = (0xFFFFULL << Shift);
|
||||||
if (Chunk == Mask)
|
uint64_t ZeroChunk = UImm & ~ShiftedMask;
|
||||||
OneChunks++;
|
uint64_t OneChunk = UImm | ShiftedMask;
|
||||||
else if (Chunk == 0)
|
uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
|
||||||
ZeroChunks++;
|
uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
|
||||||
}
|
if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
|
||||||
|
AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) ||
|
||||||
|
AArch64_AM::processLogicalImmediate(ReplicateChunk,
|
||||||
|
BitSize, Encoding)) {
|
||||||
|
// Create the ORR-immediate instruction.
|
||||||
|
MachineInstrBuilder MIB =
|
||||||
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
|
||||||
|
.add(MI.getOperand(0))
|
||||||
|
.addReg(AArch64::XZR)
|
||||||
|
.addImm(Encoding);
|
||||||
|
|
||||||
// Since we can't materialize the constant with a single ORR instruction,
|
// Create the MOVK instruction.
|
||||||
// let's see whether we can materialize 3/4 of the constant with an ORR
|
const unsigned Imm16 = getChunk(UImm, Shift / 16);
|
||||||
// instruction and use an additional MOVK instruction to materialize the
|
const unsigned DstReg = MI.getOperand(0).getReg();
|
||||||
// remaining 1/4.
|
const bool DstIsDead = MI.getOperand(0).isDead();
|
||||||
//
|
MachineInstrBuilder MIB1 =
|
||||||
// We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|.
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
|
||||||
//
|
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
||||||
// E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR,
|
.addReg(DstReg)
|
||||||
// we would create the following instruction sequence:
|
.addImm(Imm16)
|
||||||
//
|
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
|
||||||
// ORR x0, xzr, |A|X|A|X|
|
|
||||||
// MOVK x0, |B|, LSL #16
|
|
||||||
//
|
|
||||||
// Only look at 64-bit constants which can't be materialized with a single
|
|
||||||
// instruction e.g. which have less than either three all zero or all one
|
|
||||||
// chunks.
|
|
||||||
//
|
|
||||||
// Ignore 32-bit constants here, they always can be materialized with a
|
|
||||||
// MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized
|
|
||||||
// with a single ORR, the best sequence we can achieve is a ORR + MOVK pair.
|
|
||||||
// Thus we fall back to the default code below which in the best case creates
|
|
||||||
// a single MOVZ/MOVN instruction (in case one chunk is all zero or all one).
|
|
||||||
//
|
|
||||||
if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
|
|
||||||
// If we interpret the 64-bit constant as a v4i16, are elements 0 and 2
|
|
||||||
// identical?
|
|
||||||
if (getChunk(UImm, 0) == getChunk(UImm, 2)) {
|
|
||||||
// See if we can come up with a constant which can be materialized with
|
|
||||||
// ORR-immediate by replicating element 3 into element 1.
|
|
||||||
uint64_t OrrImm = replicateChunk(UImm, 3, 1);
|
|
||||||
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// See if we can come up with a constant which can be materialized with
|
transferImpOps(MI, MIB, MIB1);
|
||||||
// ORR-immediate by replicating element 1 into element 3.
|
MI.eraseFromParent();
|
||||||
OrrImm = replicateChunk(UImm, 1, 3);
|
return true;
|
||||||
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// If we interpret the 64-bit constant as a v4i16, are elements 1 and 3
|
|
||||||
// identical?
|
|
||||||
} else if (getChunk(UImm, 1) == getChunk(UImm, 3)) {
|
|
||||||
// See if we can come up with a constant which can be materialized with
|
|
||||||
// ORR-immediate by replicating element 2 into element 0.
|
|
||||||
uint64_t OrrImm = replicateChunk(UImm, 2, 0);
|
|
||||||
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// See if we can come up with a constant which can be materialized with
|
|
||||||
// ORR-immediate by replicating element 1 into element 3.
|
|
||||||
OrrImm = replicateChunk(UImm, 0, 2);
|
|
||||||
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2))
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: Add more two-instruction sequences.
|
||||||
|
|
||||||
|
// Three instruction sequences.
|
||||||
|
//
|
||||||
|
// Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
|
||||||
|
// the fastest sequence with fast literal generation. (If neither MOVK is
|
||||||
|
// part of a fast literal generation pair, it could be slower than the
|
||||||
|
// four-instruction sequence, but we won't worry about that for now.)
|
||||||
|
if (OneChunks || ZeroChunks)
|
||||||
|
return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
|
||||||
|
|
||||||
// Check for identical 16-bit chunks within the constant and if so materialize
|
// Check for identical 16-bit chunks within the constant and if so materialize
|
||||||
// them with a single ORR instruction. The remaining one or two 16-bit chunks
|
// them with a single ORR instruction. The remaining one or two 16-bit chunks
|
||||||
// will be materialized with MOVK instructions.
|
// will be materialized with MOVK instructions.
|
||||||
|
@ -537,6 +501,23 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
|
||||||
if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
|
if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
// We found no possible two or three instruction sequence; use the general
|
||||||
|
// four-instruction sequence.
|
||||||
|
return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
|
||||||
|
/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
|
||||||
|
bool AArch64ExpandPseudo::expandMOVImmSimple(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MBBI,
|
||||||
|
unsigned BitSize,
|
||||||
|
unsigned OneChunks,
|
||||||
|
unsigned ZeroChunks) {
|
||||||
|
MachineInstr &MI = *MBBI;
|
||||||
|
unsigned DstReg = MI.getOperand(0).getReg();
|
||||||
|
uint64_t Imm = MI.getOperand(1).getImm();
|
||||||
|
const unsigned Mask = 0xFFFF;
|
||||||
|
|
||||||
// Use a MOVZ or MOVN instruction to set the high bits, followed by one or
|
// Use a MOVZ or MOVN instruction to set the high bits, followed by one or
|
||||||
// more MOVK instructions to insert additional 16-bit portions into the
|
// more MOVK instructions to insert additional 16-bit portions into the
|
||||||
// lower bits.
|
// lower bits.
|
||||||
|
|
|
@ -134,18 +134,6 @@ define i64 @mvn_lsl_pattern() nounwind {
|
||||||
ret i64 -279156097024
|
ret i64 -279156097024
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: prefer "mov w0, #-63; movk x0, #31, lsl #32"
|
|
||||||
; or "mov x0, #137438887936; movk x0, #65473"
|
|
||||||
define i64 @mvn32_pattern() nounwind {
|
|
||||||
; CHECK-LABEL: mvn32_pattern:
|
|
||||||
; CHECK: // %bb.0:
|
|
||||||
; CHECK-NEXT: mov x0, #65473
|
|
||||||
; CHECK-NEXT: movk x0, #65535, lsl #16
|
|
||||||
; CHECK-NEXT: movk x0, #31, lsl #32
|
|
||||||
; CHECK-NEXT: ret
|
|
||||||
ret i64 137438953409
|
|
||||||
}
|
|
||||||
|
|
||||||
; FIXME: prefer "mov w0, #-63; movk x0, #17, lsl #32"
|
; FIXME: prefer "mov w0, #-63; movk x0, #17, lsl #32"
|
||||||
define i64 @mvn32_pattern_2() nounwind {
|
define i64 @mvn32_pattern_2() nounwind {
|
||||||
; CHECK-LABEL: mvn32_pattern_2:
|
; CHECK-LABEL: mvn32_pattern_2:
|
||||||
|
@ -281,9 +269,9 @@ define i64 @orr_movk10() nounwind {
|
||||||
define i64 @orr_movk11() nounwind {
|
define i64 @orr_movk11() nounwind {
|
||||||
; CHECK-LABEL: orr_movk11:
|
; CHECK-LABEL: orr_movk11:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mov x0, #-4503599627370241
|
; CHECK-NEXT: mov x0, #-65281
|
||||||
; CHECK-NEXT: movk x0, #57005, lsl #16
|
; CHECK-NEXT: movk x0, #57005, lsl #16
|
||||||
; CHECK-NEXT: movk x0, #65535, lsl #32
|
; CHECK-NEXT: movk x0, #65520, lsl #48
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
ret i64 -4222125209747201
|
ret i64 -4222125209747201
|
||||||
}
|
}
|
||||||
|
@ -318,24 +306,20 @@ entry:
|
||||||
ret i64 -281474976710654
|
ret i64 -281474976710654
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: prefer "mov x0, #-549755813888; movk x0, 2048, lsl #16"
|
|
||||||
define i64 @orr_movk14() nounwind {
|
define i64 @orr_movk14() nounwind {
|
||||||
; CHECK-LABEL: orr_movk14:
|
; CHECK-LABEL: orr_movk14:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mov x0, #134217728
|
; CHECK-NEXT: mov x0, #-549755813888
|
||||||
; CHECK-NEXT: movk x0, #65408, lsl #32
|
; CHECK-NEXT: movk x0, #2048, lsl #16
|
||||||
; CHECK-NEXT: movk x0, #65535, lsl #48
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
ret i64 -549621596160
|
ret i64 -549621596160
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: prefer "mov x0, #549755813887; movk x0, #63487, lsl #16"
|
|
||||||
define i64 @orr_movk15() nounwind {
|
define i64 @orr_movk15() nounwind {
|
||||||
; CHECK-LABEL: orr_movk15:
|
; CHECK-LABEL: orr_movk15:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mov x0, #65535
|
; CHECK-NEXT: mov x0, #549755813887
|
||||||
; CHECK-NEXT: movk x0, #63487, lsl #16
|
; CHECK-NEXT: movk x0, #63487, lsl #16
|
||||||
; CHECK-NEXT: movk x0, #127, lsl #32
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
ret i64 549621596159
|
ret i64 549621596159
|
||||||
}
|
}
|
||||||
|
@ -351,24 +335,121 @@ define i64 @orr_movk16() nounwind {
|
||||||
ret i64 36028661727494142
|
ret i64 36028661727494142
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: prefer "mov x0, #-1099511627776; movk x0, #65280, lsl #16"
|
|
||||||
define i64 @orr_movk17() nounwind {
|
define i64 @orr_movk17() nounwind {
|
||||||
; CHECK-LABEL: orr_movk17:
|
; CHECK-LABEL: orr_movk17:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mov x0, #-71777214294589696
|
; CHECK-NEXT: mov x0, #-1099511627776
|
||||||
; CHECK-NEXT: movk x0, #0
|
; CHECK-NEXT: movk x0, #65280, lsl #16
|
||||||
; CHECK-NEXT: movk x0, #65535, lsl #48
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
ret i64 -1095233437696
|
ret i64 -1095233437696
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: prefer "mov x0, #72340172838076673; and x0, x0, #2199023255296"
|
|
||||||
define i64 @orr_movk18() nounwind {
|
define i64 @orr_movk18() nounwind {
|
||||||
; CHECK-LABEL: orr_movk18:
|
; CHECK-LABEL: orr_movk18:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mov x0, #72340172838076673
|
; CHECK-NEXT: mov x0, #137438887936
|
||||||
; CHECK-NEXT: movk x0, #256
|
; CHECK-NEXT: movk x0, #65473
|
||||||
; CHECK-NEXT: movk x0, #0, lsl #48
|
; CHECK-NEXT: ret
|
||||||
|
ret i64 137438953409
|
||||||
|
}
|
||||||
|
|
||||||
|
; FIXME: prefer "mov x0, #72340172838076673; and x0, x0, #2199023255296"
|
||||||
|
define i64 @orr_and() nounwind {
|
||||||
|
; CHECK-LABEL: orr_and:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov x0, #256
|
||||||
|
; CHECK-NEXT: movk x0, #257, lsl #16
|
||||||
|
; CHECK-NEXT: movk x0, #257, lsl #32
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
ret i64 1103823438080
|
ret i64 1103823438080
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; FIXME: prefer "mov w0, #-1431655766; movk x0, #9, lsl #32"
|
||||||
|
define i64 @movn_movk() nounwind {
|
||||||
|
; CHECK-LABEL: movn_movk:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov x0, #43690
|
||||||
|
; CHECK-NEXT: movk x0, #43690, lsl #16
|
||||||
|
; CHECK-NEXT: movk x0, #9, lsl #32
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
ret i64 41518017194
|
||||||
|
}
|
||||||
|
|
||||||
|
; FIXME: prefer "mov w0, #-13690; orr x0, x0, #0x1111111111111111"
|
||||||
|
define i64 @movn_orr() nounwind {
|
||||||
|
; CHECK-LABEL: movn_orr:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov x0, #-51847
|
||||||
|
; CHECK-NEXT: movk x0, #4369, lsl #32
|
||||||
|
; CHECK-NEXT: movk x0, #4369, lsl #48
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
ret i64 1229782942255887737
|
||||||
|
}
|
||||||
|
|
||||||
|
; FIXME: prefer "mov w0, #-305397761; eor x0, x0, #0x3333333333333333"
|
||||||
|
define i64 @movn_eor() nounwind {
|
||||||
|
; CHECK-LABEL: movn_eor:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov x0, #3689348814741910323
|
||||||
|
; CHECK-NEXT: movk x0, #52428
|
||||||
|
; CHECK-NEXT: movk x0, #8455, lsl #16
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
ret i64 3689348814437076172
|
||||||
|
}
|
||||||
|
|
||||||
|
; FIXME: prefer "mov x0, #536866816; orr x0, x0, #0x3fff800000000000"
|
||||||
|
define i64 @orr_orr_64() nounwind {
|
||||||
|
; CHECK-LABEL: orr_orr_64:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov x0, #4611545280939032576
|
||||||
|
; CHECK-NEXT: movk x0, #61440
|
||||||
|
; CHECK-NEXT: movk x0, #8191, lsl #16
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
ret i64 4611545281475899392
|
||||||
|
}
|
||||||
|
|
||||||
|
; FIXME: prefer "mov x0, #558551907040256; orr x0, x0, #0x1000100010001000"
|
||||||
|
define i64 @orr_orr_32() nounwind {
|
||||||
|
; CHECK-LABEL: orr_orr_32:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov x0, #-287953294993589248
|
||||||
|
; CHECK-NEXT: movk x0, #7169, lsl #16
|
||||||
|
; CHECK-NEXT: movk x0, #7169, lsl #48
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
ret i64 2018171185438784512
|
||||||
|
}
|
||||||
|
|
||||||
|
; FIXME: prefer "mov x0, #281479271743489; orr x0, x0, #0x1000100010001000"
|
||||||
|
define i64 @orr_orr_16() nounwind {
|
||||||
|
; CHECK-LABEL: orr_orr_16:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov x0, #4097
|
||||||
|
; CHECK-NEXT: movk x0, #4097, lsl #16
|
||||||
|
; CHECK-NEXT: movk x0, #4097, lsl #32
|
||||||
|
; CHECK-NEXT: movk x0, #4097, lsl #48
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
ret i64 1153220576333074433
|
||||||
|
}
|
||||||
|
|
||||||
|
; FIXME: prefer "mov x0, #144680345676153346; orr x0, x0, #0x1818181818181818"
|
||||||
|
define i64 @orr_orr_8() nounwind {
|
||||||
|
; CHECK-LABEL: orr_orr_8:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov x0, #6682
|
||||||
|
; CHECK-NEXT: movk x0, #6682, lsl #16
|
||||||
|
; CHECK-NEXT: movk x0, #6682, lsl #32
|
||||||
|
; CHECK-NEXT: movk x0, #6682, lsl #48
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
ret i64 1880844493789993498
|
||||||
|
}
|
||||||
|
|
||||||
|
; FIXME: prefer "mov x0, #-6148914691236517206; orr x0, x0, #0x0FFFFF0000000000"
|
||||||
|
define i64 @orr_64_orr_8() nounwind {
|
||||||
|
; CHECK-LABEL: orr_64_orr_8:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov x0, #-6148914691236517206
|
||||||
|
; CHECK-NEXT: movk x0, #65450, lsl #32
|
||||||
|
; CHECK-NEXT: movk x0, #45055, lsl #48
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
ret i64 -5764607889538110806
|
||||||
|
}
|
||||||
|
|
|
@ -32,8 +32,8 @@ define void @test_simple(i32 %n, ...) {
|
||||||
; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128
|
; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128
|
||||||
; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
|
; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
|
||||||
|
|
||||||
; CHECK: mov [[GRVR:x[0-9]+]], #-545460846720
|
; CHECK: mov [[GRVR:x[0-9]+]], #-56
|
||||||
; CHECK: movk [[GRVR]], #65480
|
; CHECK: movk [[GRVR]], #65408, lsl #32
|
||||||
; CHECK: str [[GRVR]], [x[[VA_LIST]], #24]
|
; CHECK: str [[GRVR]], [x[[VA_LIST]], #24]
|
||||||
|
|
||||||
%addr = bitcast %va_list* @var to i8*
|
%addr = bitcast %va_list* @var to i8*
|
||||||
|
|
Loading…
Reference in New Issue