forked from OSchip/llvm-project
Re-land r349731 "[CodeGen][ExpandMemcmp] Add an option for allowing overlapping loads.
Update PPC ir following GEP->bitcat to bitcat->GEP->bitcat change. llvm-svn: 349747
This commit is contained in:
parent
f43b510015
commit
36a3480385
|
@ -581,13 +581,17 @@ public:
|
|||
struct MemCmpExpansionOptions {
|
||||
// The list of available load sizes (in bytes), sorted in decreasing order.
|
||||
SmallVector<unsigned, 8> LoadSizes;
|
||||
// Set to true to allow overlapping loads. For example, 7-byte compares can
|
||||
// be done with two 4-byte compares instead of 4+2+1-byte compares. This
|
||||
// requires all loads in LoadSizes to be doable in an unaligned way.
|
||||
bool AllowOverlappingLoads = false;
|
||||
};
|
||||
const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
|
||||
|
||||
/// Enable matching of interleaved access groups.
|
||||
bool enableInterleavedAccessVectorization() const;
|
||||
|
||||
/// Enable matching of interleaved access groups that contain predicated
|
||||
/// Enable matching of interleaved access groups that contain predicated
|
||||
/// accesses or gaps and therefore vectorized using masked
|
||||
/// vector loads/stores.
|
||||
bool enableMaskedInterleavedAccessVectorization() const;
|
||||
|
@ -772,7 +776,7 @@ public:
|
|||
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
|
||||
/// The index and subtype parameters are used by the subvector insertion and
|
||||
/// extraction shuffle kinds to show the insert/extract point and the type of
|
||||
/// the subvector being inserted/extracted.
|
||||
/// the subvector being inserted/extracted.
|
||||
/// NOTE: For subvector extractions Tp represents the source type.
|
||||
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
|
||||
Type *SubTp = nullptr) const;
|
||||
|
|
|
@ -66,23 +66,18 @@ class MemCmpExpansion {
|
|||
// Represents the decomposition in blocks of the expansion. For example,
|
||||
// comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
|
||||
// 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}.
|
||||
// TODO(courbet): Involve the target more in this computation. On X86, 7
|
||||
// bytes can be done more efficiently with two overlaping 4-byte loads than
|
||||
// covering the interval with [{4, 0},{2, 4},{1, 6}}.
|
||||
struct LoadEntry {
|
||||
LoadEntry(unsigned LoadSize, uint64_t Offset)
|
||||
: LoadSize(LoadSize), Offset(Offset) {
|
||||
assert(Offset % LoadSize == 0 && "invalid load entry");
|
||||
}
|
||||
|
||||
uint64_t getGEPIndex() const { return Offset / LoadSize; }
|
||||
|
||||
// The size of the load for this block, in bytes.
|
||||
const unsigned LoadSize;
|
||||
// The offset of this load WRT the base pointer, in bytes.
|
||||
const uint64_t Offset;
|
||||
unsigned LoadSize;
|
||||
// The offset of this load from the base pointer, in bytes.
|
||||
uint64_t Offset;
|
||||
};
|
||||
SmallVector<LoadEntry, 8> LoadSequence;
|
||||
using LoadEntryVector = SmallVector<LoadEntry, 8>;
|
||||
LoadEntryVector LoadSequence;
|
||||
|
||||
void createLoadCmpBlocks();
|
||||
void createResultBlock();
|
||||
|
@ -92,13 +87,23 @@ class MemCmpExpansion {
|
|||
void emitLoadCompareBlock(unsigned BlockIndex);
|
||||
void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
|
||||
unsigned &LoadIndex);
|
||||
void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex);
|
||||
void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
|
||||
void emitMemCmpResultBlock();
|
||||
Value *getMemCmpExpansionZeroCase();
|
||||
Value *getMemCmpEqZeroOneBlock();
|
||||
Value *getMemCmpOneBlock();
|
||||
Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType,
|
||||
uint64_t OffsetBytes);
|
||||
|
||||
public:
|
||||
static LoadEntryVector
|
||||
computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
|
||||
unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
|
||||
static LoadEntryVector
|
||||
computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
|
||||
unsigned MaxNumLoads,
|
||||
unsigned &NumLoadsNonOneByte);
|
||||
|
||||
public:
|
||||
MemCmpExpansion(CallInst *CI, uint64_t Size,
|
||||
const TargetTransformInfo::MemCmpExpansionOptions &Options,
|
||||
unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
|
||||
|
@ -110,6 +115,76 @@ class MemCmpExpansion {
|
|||
Value *getMemCmpExpansion();
|
||||
};
|
||||
|
||||
MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
|
||||
uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
|
||||
const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
|
||||
NumLoadsNonOneByte = 0;
|
||||
LoadEntryVector LoadSequence;
|
||||
uint64_t Offset = 0;
|
||||
while (Size && !LoadSizes.empty()) {
|
||||
const unsigned LoadSize = LoadSizes.front();
|
||||
const uint64_t NumLoadsForThisSize = Size / LoadSize;
|
||||
if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
|
||||
// Do not expand if the total number of loads is larger than what the
|
||||
// target allows. Note that it's important that we exit before completing
|
||||
// the expansion to avoid using a ton of memory to store the expansion for
|
||||
// large sizes.
|
||||
return {};
|
||||
}
|
||||
if (NumLoadsForThisSize > 0) {
|
||||
for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
|
||||
LoadSequence.push_back({LoadSize, Offset});
|
||||
Offset += LoadSize;
|
||||
}
|
||||
if (LoadSize > 1)
|
||||
++NumLoadsNonOneByte;
|
||||
Size = Size % LoadSize;
|
||||
}
|
||||
LoadSizes = LoadSizes.drop_front();
|
||||
}
|
||||
return LoadSequence;
|
||||
}
|
||||
|
||||
MemCmpExpansion::LoadEntryVector
|
||||
MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
|
||||
const unsigned MaxLoadSize,
|
||||
const unsigned MaxNumLoads,
|
||||
unsigned &NumLoadsNonOneByte) {
|
||||
// These are already handled by the greedy approach.
|
||||
if (Size < 2 || MaxLoadSize < 2)
|
||||
return {};
|
||||
|
||||
// We try to do as many non-overlapping loads as possible starting from the
|
||||
// beginning.
|
||||
const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
|
||||
assert(NumNonOverlappingLoads && "there must be at least one load");
|
||||
// There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
|
||||
// an overlapping load.
|
||||
Size = Size - NumNonOverlappingLoads * MaxLoadSize;
|
||||
// Bail if we do not need an overloapping store, this is already handled by
|
||||
// the greedy approach.
|
||||
if (Size == 0)
|
||||
return {};
|
||||
// Bail if the number of loads (non-overlapping + potential overlapping one)
|
||||
// is larger than the max allowed.
|
||||
if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
|
||||
return {};
|
||||
|
||||
// Add non-overlapping loads.
|
||||
LoadEntryVector LoadSequence;
|
||||
uint64_t Offset = 0;
|
||||
for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
|
||||
LoadSequence.push_back({MaxLoadSize, Offset});
|
||||
Offset += MaxLoadSize;
|
||||
}
|
||||
|
||||
// Add the last overlapping load.
|
||||
assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
|
||||
LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
|
||||
NumLoadsNonOneByte = 1;
|
||||
return LoadSequence;
|
||||
}
|
||||
|
||||
// Initialize the basic block structure required for expansion of memcmp call
|
||||
// with given maximum load size and memcmp size parameter.
|
||||
// This structure includes:
|
||||
|
@ -133,38 +208,31 @@ MemCmpExpansion::MemCmpExpansion(
|
|||
Builder(CI) {
|
||||
assert(Size > 0 && "zero blocks");
|
||||
// Scale the max size down if the target can load more bytes than we need.
|
||||
size_t LoadSizeIndex = 0;
|
||||
while (LoadSizeIndex < Options.LoadSizes.size() &&
|
||||
Options.LoadSizes[LoadSizeIndex] > Size) {
|
||||
++LoadSizeIndex;
|
||||
llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
|
||||
while (!LoadSizes.empty() && LoadSizes.front() > Size) {
|
||||
LoadSizes = LoadSizes.drop_front();
|
||||
}
|
||||
this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex];
|
||||
assert(!LoadSizes.empty() && "cannot load Size bytes");
|
||||
MaxLoadSize = LoadSizes.front();
|
||||
// Compute the decomposition.
|
||||
uint64_t CurSize = Size;
|
||||
uint64_t Offset = 0;
|
||||
while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) {
|
||||
const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex];
|
||||
assert(LoadSize > 0 && "zero load size");
|
||||
const uint64_t NumLoadsForThisSize = CurSize / LoadSize;
|
||||
if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
|
||||
// Do not expand if the total number of loads is larger than what the
|
||||
// target allows. Note that it's important that we exit before completing
|
||||
// the expansion to avoid using a ton of memory to store the expansion for
|
||||
// large sizes.
|
||||
LoadSequence.clear();
|
||||
return;
|
||||
unsigned GreedyNumLoadsNonOneByte = 0;
|
||||
LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads,
|
||||
GreedyNumLoadsNonOneByte);
|
||||
NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
|
||||
assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
|
||||
// If we allow overlapping loads and the load sequence is not already optimal,
|
||||
// use overlapping loads.
|
||||
if (Options.AllowOverlappingLoads &&
|
||||
(LoadSequence.empty() || LoadSequence.size() > 2)) {
|
||||
unsigned OverlappingNumLoadsNonOneByte = 0;
|
||||
auto OverlappingLoads = computeOverlappingLoadSequence(
|
||||
Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte);
|
||||
if (!OverlappingLoads.empty() &&
|
||||
(LoadSequence.empty() ||
|
||||
OverlappingLoads.size() < LoadSequence.size())) {
|
||||
LoadSequence = OverlappingLoads;
|
||||
NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
|
||||
}
|
||||
if (NumLoadsForThisSize > 0) {
|
||||
for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
|
||||
LoadSequence.push_back({LoadSize, Offset});
|
||||
Offset += LoadSize;
|
||||
}
|
||||
if (LoadSize > 1) {
|
||||
++NumLoadsNonOneByte;
|
||||
}
|
||||
CurSize = CurSize % LoadSize;
|
||||
}
|
||||
++LoadSizeIndex;
|
||||
}
|
||||
assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
|
||||
}
|
||||
|
@ -189,30 +257,32 @@ void MemCmpExpansion::createResultBlock() {
|
|||
EndBlock->getParent(), EndBlock);
|
||||
}
|
||||
|
||||
/// Return a pointer to an element of type `LoadSizeType` at offset
|
||||
/// `OffsetBytes`.
|
||||
Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source,
|
||||
Type *LoadSizeType,
|
||||
uint64_t OffsetBytes) {
|
||||
if (OffsetBytes > 0) {
|
||||
auto *ByteType = Type::getInt8Ty(CI->getContext());
|
||||
Source = Builder.CreateGEP(
|
||||
ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
|
||||
ConstantInt::get(ByteType, OffsetBytes));
|
||||
}
|
||||
return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());
|
||||
}
|
||||
|
||||
// This function creates the IR instructions for loading and comparing 1 byte.
|
||||
// It loads 1 byte from each source of the memcmp parameters with the given
|
||||
// GEPIndex. It then subtracts the two loaded values and adds this result to the
|
||||
// final phi node for selecting the memcmp result.
|
||||
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
|
||||
unsigned GEPIndex) {
|
||||
Value *Source1 = CI->getArgOperand(0);
|
||||
Value *Source2 = CI->getArgOperand(1);
|
||||
|
||||
unsigned OffsetBytes) {
|
||||
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
|
||||
Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
|
||||
// Cast source to LoadSizeType*.
|
||||
if (Source1->getType() != LoadSizeType)
|
||||
Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
|
||||
if (Source2->getType() != LoadSizeType)
|
||||
Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
|
||||
|
||||
// Get the base address using the GEPIndex.
|
||||
if (GEPIndex != 0) {
|
||||
Source1 = Builder.CreateGEP(LoadSizeType, Source1,
|
||||
ConstantInt::get(LoadSizeType, GEPIndex));
|
||||
Source2 = Builder.CreateGEP(LoadSizeType, Source2,
|
||||
ConstantInt::get(LoadSizeType, GEPIndex));
|
||||
}
|
||||
Value *Source1 =
|
||||
getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes);
|
||||
Value *Source2 =
|
||||
getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes);
|
||||
|
||||
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
|
||||
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
|
||||
|
@ -270,24 +340,10 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
|
|||
IntegerType *LoadSizeType =
|
||||
IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
|
||||
|
||||
Value *Source1 = CI->getArgOperand(0);
|
||||
Value *Source2 = CI->getArgOperand(1);
|
||||
|
||||
// Cast source to LoadSizeType*.
|
||||
if (Source1->getType() != LoadSizeType)
|
||||
Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
|
||||
if (Source2->getType() != LoadSizeType)
|
||||
Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
|
||||
|
||||
// Get the base address using a GEP.
|
||||
if (CurLoadEntry.Offset != 0) {
|
||||
Source1 = Builder.CreateGEP(
|
||||
LoadSizeType, Source1,
|
||||
ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
|
||||
Source2 = Builder.CreateGEP(
|
||||
LoadSizeType, Source2,
|
||||
ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
|
||||
}
|
||||
Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
|
||||
CurLoadEntry.Offset);
|
||||
Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
|
||||
CurLoadEntry.Offset);
|
||||
|
||||
// Get a constant or load a value for each source address.
|
||||
Value *LoadSrc1 = nullptr;
|
||||
|
@ -378,8 +434,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
|
|||
const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
|
||||
|
||||
if (CurLoadEntry.LoadSize == 1) {
|
||||
MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex,
|
||||
CurLoadEntry.getGEPIndex());
|
||||
MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -388,25 +443,12 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
|
|||
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
|
||||
assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
|
||||
|
||||
Value *Source1 = CI->getArgOperand(0);
|
||||
Value *Source2 = CI->getArgOperand(1);
|
||||
|
||||
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
|
||||
// Cast source to LoadSizeType*.
|
||||
if (Source1->getType() != LoadSizeType)
|
||||
Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
|
||||
if (Source2->getType() != LoadSizeType)
|
||||
Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
|
||||
|
||||
// Get the base address using a GEP.
|
||||
if (CurLoadEntry.Offset != 0) {
|
||||
Source1 = Builder.CreateGEP(
|
||||
LoadSizeType, Source1,
|
||||
ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
|
||||
Source2 = Builder.CreateGEP(
|
||||
LoadSizeType, Source2,
|
||||
ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
|
||||
}
|
||||
Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
|
||||
CurLoadEntry.Offset);
|
||||
Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
|
||||
CurLoadEntry.Offset);
|
||||
|
||||
// Load LoadSizeType from the base address.
|
||||
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
|
||||
|
@ -694,7 +736,6 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
|
|||
if (SizeVal == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// TTI call to check if target would like to expand memcmp. Also, get the
|
||||
// available load sizes.
|
||||
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
|
||||
|
|
|
@ -1886,7 +1886,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
|
|||
{ ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
|
||||
};
|
||||
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
|
||||
{ ISD::BITREVERSE, MVT::i64, 14 }
|
||||
{ ISD::BITREVERSE, MVT::i64, 14 }
|
||||
};
|
||||
static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
|
||||
{ ISD::BITREVERSE, MVT::i32, 14 },
|
||||
|
@ -2899,6 +2899,9 @@ X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
|
|||
Options.LoadSizes.push_back(4);
|
||||
Options.LoadSizes.push_back(2);
|
||||
Options.LoadSizes.push_back(1);
|
||||
// All GPR and vector loads can be unaligned. SIMD compare requires integer
|
||||
// vectors (SSE2/AVX2).
|
||||
Options.AllowOverlappingLoads = true;
|
||||
return Options;
|
||||
}();
|
||||
return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
|
||||
|
|
|
@ -17,10 +17,14 @@ entry:
|
|||
; CHECK-NEXT: br label %endblock
|
||||
|
||||
; CHECK-LABEL: loadbb1:{{.*}}
|
||||
; CHECK: [[GEP1:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
|
||||
; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
|
||||
; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[GEP1]]
|
||||
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[GEP2]]
|
||||
; CHECK: [[BCC1:%[0-9]+]] = bitcast i32* {{.*}} to i8*
|
||||
; CHECK-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.*}} to i8*
|
||||
; CHECK-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i8 8
|
||||
; CHECK-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*
|
||||
; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i8 8
|
||||
; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*
|
||||
; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]
|
||||
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]
|
||||
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
|
||||
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]
|
||||
|
@ -38,10 +42,14 @@ entry:
|
|||
; CHECK-BE-NEXT: br label %endblock
|
||||
|
||||
; CHECK-BE-LABEL: loadbb1:{{.*}}
|
||||
; CHECK-BE: [[GEP1:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
|
||||
; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
|
||||
; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[GEP1]]
|
||||
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[GEP2]]
|
||||
; CHECK-BE: [[BCC1:%[0-9]+]] = bitcast i32* {{.*}} to i8*
|
||||
; CHECK-BE-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.*}} to i8*
|
||||
; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i8 8
|
||||
; CHECK-BE-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*
|
||||
; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i8 8
|
||||
; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*
|
||||
; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]
|
||||
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]
|
||||
; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block
|
||||
|
||||
|
|
|
@ -639,17 +639,33 @@ define i32 @length24(i8* %X, i8* %Y) nounwind optsize {
|
|||
}
|
||||
|
||||
define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
|
||||
; X86-LABEL: length24_eq:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $24
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: calll memcmp
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: retl
|
||||
; X86-NOSSE-LABEL: length24_eq:
|
||||
; X86-NOSSE: # %bb.0:
|
||||
; X86-NOSSE-NEXT: pushl $0
|
||||
; X86-NOSSE-NEXT: pushl $24
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: calll memcmp
|
||||
; X86-NOSSE-NEXT: addl $16, %esp
|
||||
; X86-NOSSE-NEXT: testl %eax, %eax
|
||||
; X86-NOSSE-NEXT: sete %al
|
||||
; X86-NOSSE-NEXT: retl
|
||||
;
|
||||
; X86-SSE2-LABEL: length24_eq:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
|
||||
; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1
|
||||
; X86-SSE2-NEXT: movdqu (%eax), %xmm2
|
||||
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0
|
||||
; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: sete %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSE2-LABEL: length24_eq:
|
||||
; X64-SSE2: # %bb.0:
|
||||
|
@ -683,17 +699,30 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
|
|||
}
|
||||
|
||||
define i1 @length24_eq_const(i8* %X) nounwind optsize {
|
||||
; X86-LABEL: length24_eq_const:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $24
|
||||
; X86-NEXT: pushl $.L.str
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: calll memcmp
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: retl
|
||||
; X86-NOSSE-LABEL: length24_eq_const:
|
||||
; X86-NOSSE: # %bb.0:
|
||||
; X86-NOSSE-NEXT: pushl $0
|
||||
; X86-NOSSE-NEXT: pushl $24
|
||||
; X86-NOSSE-NEXT: pushl $.L.str
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: calll memcmp
|
||||
; X86-NOSSE-NEXT: addl $16, %esp
|
||||
; X86-NOSSE-NEXT: testl %eax, %eax
|
||||
; X86-NOSSE-NEXT: setne %al
|
||||
; X86-NOSSE-NEXT: retl
|
||||
;
|
||||
; X86-SSE2-LABEL: length24_eq_const:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movdqu (%eax), %xmm0
|
||||
; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1
|
||||
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1
|
||||
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: setne %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSE2-LABEL: length24_eq_const:
|
||||
; X64-SSE2: # %bb.0:
|
||||
|
|
|
@ -362,24 +362,24 @@ define i1 @length5_eq(i8* %X, i8* %Y) nounwind {
|
|||
define i1 @length7_eq(i8* %X, i8* %Y) nounwind {
|
||||
; X86-LABEL: length7_eq:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $7
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: calll memcmp
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl (%ecx), %edx
|
||||
; X86-NEXT: movl 3(%ecx), %ecx
|
||||
; X86-NEXT: xorl (%eax), %edx
|
||||
; X86-NEXT: xorl 3(%eax), %ecx
|
||||
; X86-NEXT: orl %edx, %ecx
|
||||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length7_eq:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $7, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: movl 3(%rdi), %ecx
|
||||
; X64-NEXT: xorl (%rsi), %eax
|
||||
; X64-NEXT: xorl 3(%rsi), %ecx
|
||||
; X64-NEXT: orl %eax, %ecx
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 7) nounwind
|
||||
%c = icmp ne i32 %m, 0
|
||||
|
@ -548,12 +548,12 @@ define i1 @length11_eq(i8* %X, i8* %Y) nounwind {
|
|||
;
|
||||
; X64-LABEL: length11_eq:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $11, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: movq 3(%rdi), %rcx
|
||||
; X64-NEXT: xorq (%rsi), %rax
|
||||
; X64-NEXT: xorq 3(%rsi), %rcx
|
||||
; X64-NEXT: orq %rax, %rcx
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 11) nounwind
|
||||
%c = icmp eq i32 %m, 0
|
||||
|
@ -640,12 +640,12 @@ define i1 @length13_eq(i8* %X, i8* %Y) nounwind {
|
|||
;
|
||||
; X64-LABEL: length13_eq:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $13, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: movq 5(%rdi), %rcx
|
||||
; X64-NEXT: xorq (%rsi), %rax
|
||||
; X64-NEXT: xorq 5(%rsi), %rcx
|
||||
; X64-NEXT: orq %rax, %rcx
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 13) nounwind
|
||||
%c = icmp eq i32 %m, 0
|
||||
|
@ -667,12 +667,12 @@ define i1 @length14_eq(i8* %X, i8* %Y) nounwind {
|
|||
;
|
||||
; X64-LABEL: length14_eq:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $14, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: movq 6(%rdi), %rcx
|
||||
; X64-NEXT: xorq (%rsi), %rax
|
||||
; X64-NEXT: xorq 6(%rsi), %rcx
|
||||
; X64-NEXT: orq %rax, %rcx
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 14) nounwind
|
||||
%c = icmp eq i32 %m, 0
|
||||
|
@ -694,12 +694,12 @@ define i1 @length15_eq(i8* %X, i8* %Y) nounwind {
|
|||
;
|
||||
; X64-LABEL: length15_eq:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $15, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: movq 7(%rdi), %rcx
|
||||
; X64-NEXT: xorq (%rsi), %rax
|
||||
; X64-NEXT: xorq 7(%rsi), %rcx
|
||||
; X64-NEXT: orq %rax, %rcx
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 15) nounwind
|
||||
%c = icmp eq i32 %m, 0
|
||||
|
@ -885,17 +885,45 @@ define i32 @length24(i8* %X, i8* %Y) nounwind {
|
|||
}
|
||||
|
||||
define i1 @length24_eq(i8* %x, i8* %y) nounwind {
|
||||
; X86-LABEL: length24_eq:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $24
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: calll memcmp
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: retl
|
||||
; X86-NOSSE-LABEL: length24_eq:
|
||||
; X86-NOSSE: # %bb.0:
|
||||
; X86-NOSSE-NEXT: pushl $0
|
||||
; X86-NOSSE-NEXT: pushl $24
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: calll memcmp
|
||||
; X86-NOSSE-NEXT: addl $16, %esp
|
||||
; X86-NOSSE-NEXT: testl %eax, %eax
|
||||
; X86-NOSSE-NEXT: sete %al
|
||||
; X86-NOSSE-NEXT: retl
|
||||
;
|
||||
; X86-SSE1-LABEL: length24_eq:
|
||||
; X86-SSE1: # %bb.0:
|
||||
; X86-SSE1-NEXT: pushl $0
|
||||
; X86-SSE1-NEXT: pushl $24
|
||||
; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: calll memcmp
|
||||
; X86-SSE1-NEXT: addl $16, %esp
|
||||
; X86-SSE1-NEXT: testl %eax, %eax
|
||||
; X86-SSE1-NEXT: sete %al
|
||||
; X86-SSE1-NEXT: retl
|
||||
;
|
||||
; X86-SSE2-LABEL: length24_eq:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
|
||||
; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1
|
||||
; X86-SSE2-NEXT: movdqu (%eax), %xmm2
|
||||
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0
|
||||
; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: sete %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSE2-LABEL: length24_eq:
|
||||
; X64-SSE2: # %bb.0:
|
||||
|
@ -929,17 +957,42 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind {
|
|||
}
|
||||
|
||||
define i1 @length24_eq_const(i8* %X) nounwind {
|
||||
; X86-LABEL: length24_eq_const:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $24
|
||||
; X86-NEXT: pushl $.L.str
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: calll memcmp
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: retl
|
||||
; X86-NOSSE-LABEL: length24_eq_const:
|
||||
; X86-NOSSE: # %bb.0:
|
||||
; X86-NOSSE-NEXT: pushl $0
|
||||
; X86-NOSSE-NEXT: pushl $24
|
||||
; X86-NOSSE-NEXT: pushl $.L.str
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: calll memcmp
|
||||
; X86-NOSSE-NEXT: addl $16, %esp
|
||||
; X86-NOSSE-NEXT: testl %eax, %eax
|
||||
; X86-NOSSE-NEXT: setne %al
|
||||
; X86-NOSSE-NEXT: retl
|
||||
;
|
||||
; X86-SSE1-LABEL: length24_eq_const:
|
||||
; X86-SSE1: # %bb.0:
|
||||
; X86-SSE1-NEXT: pushl $0
|
||||
; X86-SSE1-NEXT: pushl $24
|
||||
; X86-SSE1-NEXT: pushl $.L.str
|
||||
; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: calll memcmp
|
||||
; X86-SSE1-NEXT: addl $16, %esp
|
||||
; X86-SSE1-NEXT: testl %eax, %eax
|
||||
; X86-SSE1-NEXT: setne %al
|
||||
; X86-SSE1-NEXT: retl
|
||||
;
|
||||
; X86-SSE2-LABEL: length24_eq_const:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movdqu (%eax), %xmm0
|
||||
; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1
|
||||
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1
|
||||
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: setne %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSE2-LABEL: length24_eq_const:
|
||||
; X64-SSE2: # %bb.0:
|
||||
|
|
|
@ -130,11 +130,11 @@ define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
|
||||
; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
|
||||
; ALL: loadbb1:
|
||||
; ALL-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i16*
|
||||
; ALL-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i16*
|
||||
; ALL-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2
|
||||
; ALL-NEXT: [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 2
|
||||
; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
|
||||
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 4
|
||||
; ALL-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16*
|
||||
; ALL-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4
|
||||
; ALL-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16*
|
||||
; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP11]]
|
||||
; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
|
||||
; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
|
||||
; ALL-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
|
||||
|
@ -178,11 +178,11 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
|
||||
; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
|
||||
; X32: loadbb1:
|
||||
; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i32*
|
||||
; X32-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i32*
|
||||
; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1
|
||||
; X32-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1
|
||||
; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
|
||||
; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 4
|
||||
; X32-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
||||
; X32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4
|
||||
; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32*
|
||||
; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP11]]
|
||||
; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]]
|
||||
; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
|
||||
; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
|
||||
|
@ -272,11 +272,11 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
|
||||
; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
|
||||
; X64: loadbb1:
|
||||
; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i16*
|
||||
; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i16*
|
||||
; X64-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 4
|
||||
; X64-NEXT: [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 4
|
||||
; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
|
||||
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8
|
||||
; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16*
|
||||
; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8
|
||||
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16*
|
||||
; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP11]]
|
||||
; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
|
||||
; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
|
||||
; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
|
||||
|
@ -324,11 +324,11 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
|
||||
; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
|
||||
; X64: loadbb1:
|
||||
; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i32*
|
||||
; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i32*
|
||||
; X64-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 2
|
||||
; X64-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 2
|
||||
; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
|
||||
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8
|
||||
; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
|
||||
; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8
|
||||
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32*
|
||||
; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP11]]
|
||||
; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]]
|
||||
; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
|
||||
; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
|
||||
|
@ -394,11 +394,11 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
|
||||
; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
|
||||
; X64: loadbb1:
|
||||
; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i64*
|
||||
; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i64*
|
||||
; X64-NEXT: [[TMP12:%.*]] = getelementptr i64, i64* [[TMP10]], i64 1
|
||||
; X64-NEXT: [[TMP13:%.*]] = getelementptr i64, i64* [[TMP11]], i64 1
|
||||
; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]]
|
||||
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8
|
||||
; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i64*
|
||||
; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8
|
||||
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i64*
|
||||
; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP11]]
|
||||
; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]]
|
||||
; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
|
||||
; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
|
||||
|
@ -597,11 +597,11 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
|
||||
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
|
||||
; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
|
||||
; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
|
||||
; X32-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
|
||||
; X32-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
|
||||
; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
|
||||
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
|
||||
; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
|
||||
; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
|
||||
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
|
||||
; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]]
|
||||
; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
|
||||
; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
|
||||
; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
|
||||
|
@ -625,11 +625,11 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
|
||||
; X64_1LD: loadbb1:
|
||||
; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
|
||||
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
|
||||
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
|
||||
; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
|
||||
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
|
||||
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
|
||||
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
|
||||
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
|
||||
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
|
||||
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]]
|
||||
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
|
||||
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
|
||||
|
@ -645,11 +645,11 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
|
||||
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
|
||||
; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
|
||||
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
|
||||
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
|
||||
; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
|
||||
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
|
||||
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
|
||||
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
|
||||
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
|
||||
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
|
||||
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]]
|
||||
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
|
||||
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
|
||||
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
|
||||
|
@ -668,11 +668,71 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
}
|
||||
|
||||
define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; ALL-LABEL: @cmp_eq7(
|
||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7)
|
||||
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; ALL-NEXT: ret i32 [[CONV]]
|
||||
; X32-LABEL: @cmp_eq7(
|
||||
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
|
||||
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
|
||||
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
|
||||
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
|
||||
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
|
||||
; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
|
||||
; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
|
||||
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
|
||||
; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
|
||||
; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
|
||||
; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
|
||||
; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
|
||||
; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
|
||||
; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
|
||||
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
|
||||
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X32-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64_1LD-LABEL: @cmp_eq7(
|
||||
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
|
||||
; X64_1LD: res_block:
|
||||
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
|
||||
; X64_1LD: loadbb:
|
||||
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
|
||||
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
|
||||
; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
|
||||
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
|
||||
; X64_1LD: loadbb1:
|
||||
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
|
||||
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
|
||||
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
|
||||
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
|
||||
; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
|
||||
; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
|
||||
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
|
||||
; X64_1LD: endblock:
|
||||
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
|
||||
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
|
||||
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64_1LD-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64_2LD-LABEL: @cmp_eq7(
|
||||
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
|
||||
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
|
||||
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
|
||||
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
|
||||
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
|
||||
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
|
||||
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
|
||||
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
|
||||
; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
|
||||
; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
|
||||
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
|
||||
; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
|
||||
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
|
||||
; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
|
||||
; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
|
||||
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64_2LD-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
|
||||
%cmp = icmp eq i32 %call, 0
|
||||
|
@ -687,11 +747,11 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
|
||||
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
|
||||
; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
|
||||
; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
|
||||
; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
|
||||
; X32-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 1
|
||||
; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
|
||||
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
|
||||
; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
|
||||
; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
|
||||
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
|
||||
; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
|
||||
; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
|
||||
; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
|
||||
; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
|
||||
|
@ -794,11 +854,11 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
|
||||
; X64_1LD: loadbb1:
|
||||
; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
|
||||
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
|
||||
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
|
||||
; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4
|
||||
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
|
||||
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
|
||||
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
|
||||
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
|
||||
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
|
||||
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]]
|
||||
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
|
||||
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
|
||||
|
@ -814,11 +874,11 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
|
||||
; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
|
||||
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
|
||||
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
|
||||
; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4
|
||||
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
|
||||
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
|
||||
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
|
||||
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
|
||||
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
|
||||
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]]
|
||||
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
|
||||
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64
|
||||
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64
|
||||
|
@ -837,11 +897,57 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
}
|
||||
|
||||
define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; ALL-LABEL: @cmp_eq11(
|
||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11)
|
||||
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; ALL-NEXT: ret i32 [[CONV]]
|
||||
; X32-LABEL: @cmp_eq11(
|
||||
; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11)
|
||||
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X32-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64_1LD-LABEL: @cmp_eq11(
|
||||
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
|
||||
; X64_1LD: res_block:
|
||||
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
|
||||
; X64_1LD: loadbb:
|
||||
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
|
||||
; X64_1LD: loadbb1:
|
||||
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
|
||||
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
|
||||
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
|
||||
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
|
||||
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
|
||||
; X64_1LD: endblock:
|
||||
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
|
||||
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
|
||||
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64_1LD-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64_2LD-LABEL: @cmp_eq11(
|
||||
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
|
||||
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
|
||||
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
|
||||
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
|
||||
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
|
||||
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
|
||||
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
|
||||
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
|
||||
; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
|
||||
; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
|
||||
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64_2LD-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
|
||||
%cmp = icmp eq i32 %call, 0
|
||||
|
@ -868,11 +974,11 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
|
||||
; X64_1LD: loadbb1:
|
||||
; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
|
||||
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
|
||||
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
|
||||
; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2
|
||||
; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
|
||||
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
|
||||
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
|
||||
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
|
||||
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
|
||||
; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
|
||||
; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
|
||||
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
|
||||
|
@ -888,11 +994,11 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
|
||||
; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
|
||||
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
|
||||
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
|
||||
; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2
|
||||
; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
|
||||
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
|
||||
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
|
||||
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
|
||||
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
|
||||
; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
|
||||
; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
|
||||
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
|
||||
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
|
||||
|
@ -911,11 +1017,57 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
}
|
||||
|
||||
define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; ALL-LABEL: @cmp_eq13(
|
||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13)
|
||||
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; ALL-NEXT: ret i32 [[CONV]]
|
||||
; X32-LABEL: @cmp_eq13(
|
||||
; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13)
|
||||
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X32-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64_1LD-LABEL: @cmp_eq13(
|
||||
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
|
||||
; X64_1LD: res_block:
|
||||
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
|
||||
; X64_1LD: loadbb:
|
||||
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
|
||||
; X64_1LD: loadbb1:
|
||||
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 5
|
||||
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 5
|
||||
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
|
||||
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
|
||||
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
|
||||
; X64_1LD: endblock:
|
||||
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
|
||||
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
|
||||
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64_1LD-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64_2LD-LABEL: @cmp_eq13(
|
||||
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
|
||||
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 5
|
||||
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 5
|
||||
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
|
||||
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
|
||||
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
|
||||
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
|
||||
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
|
||||
; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
|
||||
; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
|
||||
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64_2LD-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
|
||||
%cmp = icmp eq i32 %call, 0
|
||||
|
@ -924,11 +1076,57 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
}
|
||||
|
||||
define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; ALL-LABEL: @cmp_eq14(
|
||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14)
|
||||
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; ALL-NEXT: ret i32 [[CONV]]
|
||||
; X32-LABEL: @cmp_eq14(
|
||||
; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14)
|
||||
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X32-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64_1LD-LABEL: @cmp_eq14(
|
||||
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
|
||||
; X64_1LD: res_block:
|
||||
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
|
||||
; X64_1LD: loadbb:
|
||||
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
|
||||
; X64_1LD: loadbb1:
|
||||
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 6
|
||||
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 6
|
||||
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
|
||||
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
|
||||
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
|
||||
; X64_1LD: endblock:
|
||||
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
|
||||
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
|
||||
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64_1LD-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64_2LD-LABEL: @cmp_eq14(
|
||||
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
|
||||
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 6
|
||||
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 6
|
||||
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
|
||||
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
|
||||
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
|
||||
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
|
||||
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
|
||||
; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
|
||||
; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
|
||||
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64_2LD-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
|
||||
%cmp = icmp eq i32 %call, 0
|
||||
|
@ -937,11 +1135,57 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
}
|
||||
|
||||
define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; ALL-LABEL: @cmp_eq15(
|
||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15)
|
||||
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; ALL-NEXT: ret i32 [[CONV]]
|
||||
; X32-LABEL: @cmp_eq15(
|
||||
; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15)
|
||||
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X32-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64_1LD-LABEL: @cmp_eq15(
|
||||
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
|
||||
; X64_1LD: res_block:
|
||||
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
|
||||
; X64_1LD: loadbb:
|
||||
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
|
||||
; X64_1LD: loadbb1:
|
||||
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 7
|
||||
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 7
|
||||
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
|
||||
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
|
||||
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
|
||||
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
|
||||
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
|
||||
; X64_1LD: endblock:
|
||||
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
|
||||
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
|
||||
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64_1LD-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64_2LD-LABEL: @cmp_eq15(
|
||||
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
|
||||
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 7
|
||||
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 7
|
||||
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
|
||||
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
|
||||
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
|
||||
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
|
||||
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
|
||||
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
|
||||
; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
|
||||
; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
|
||||
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64_2LD-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
|
||||
%cmp = icmp eq i32 %call, 0
|
||||
|
|
Loading…
Reference in New Issue