forked from OSchip/llvm-project
[CodeGen][ExpandMemcmp] Allow memcmp to expand to vector loads (2).
- Targets that want to support memcmp expansions now return the list of supported load sizes. - Expansion codegen does not assume that all power-of-two load sizes smaller than the max load size are valid. For examples, this is not the case for x86(32bit)+sse2. Fixes PR34887. llvm-svn: 316905
This commit is contained in:
parent
bef1c56724
commit
b2c3eb8cf1
|
@ -554,8 +554,13 @@ public:
|
|||
/// \brief Don't restrict interleaved unrolling to small loops.
|
||||
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
|
||||
|
||||
/// \brief Enable inline expansion of memcmp
|
||||
bool enableMemCmpExpansion(unsigned &MaxLoadSize) const;
|
||||
/// \brief If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
|
||||
/// true if this is the expansion of memcmp(p1, p2, s) == 0.
|
||||
struct MemCmpExpansionOptions {
|
||||
// The list of available load sizes (in bytes), sorted in decreasing order.
|
||||
SmallVector<unsigned, 8> LoadSizes;
|
||||
};
|
||||
const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
|
||||
|
||||
/// \brief Enable matching of interleaved access groups.
|
||||
bool enableInterleavedAccessVectorization() const;
|
||||
|
@ -993,7 +998,8 @@ public:
|
|||
unsigned VF) = 0;
|
||||
virtual bool supportsEfficientVectorElementLoadStore() = 0;
|
||||
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
|
||||
virtual bool enableMemCmpExpansion(unsigned &MaxLoadSize) = 0;
|
||||
virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
|
||||
bool IsZeroCmp) const = 0;
|
||||
virtual bool enableInterleavedAccessVectorization() = 0;
|
||||
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
|
||||
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
|
||||
|
@ -1246,8 +1252,9 @@ public:
|
|||
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
|
||||
return Impl.enableAggressiveInterleaving(LoopHasReductions);
|
||||
}
|
||||
bool enableMemCmpExpansion(unsigned &MaxLoadSize) override {
|
||||
return Impl.enableMemCmpExpansion(MaxLoadSize);
|
||||
const MemCmpExpansionOptions *enableMemCmpExpansion(
|
||||
bool IsZeroCmp) const override {
|
||||
return Impl.enableMemCmpExpansion(IsZeroCmp);
|
||||
}
|
||||
bool enableInterleavedAccessVectorization() override {
|
||||
return Impl.enableInterleavedAccessVectorization();
|
||||
|
|
|
@ -294,7 +294,10 @@ public:
|
|||
|
||||
bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
|
||||
|
||||
bool enableMemCmpExpansion(unsigned &MaxLoadSize) { return false; }
|
||||
const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
|
||||
bool IsZeroCmp) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool enableInterleavedAccessVectorization() { return false; }
|
||||
|
||||
|
|
|
@ -250,8 +250,9 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c
|
|||
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::enableMemCmpExpansion(unsigned &MaxLoadSize) const {
|
||||
return TTIImpl->enableMemCmpExpansion(MaxLoadSize);
|
||||
const TargetTransformInfo::MemCmpExpansionOptions *
|
||||
TargetTransformInfo::enableMemCmpExpansion(bool IsZeroCmp) const {
|
||||
return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
|
||||
|
|
|
@ -1758,9 +1758,10 @@ class MemCmpExpansion {
|
|||
Value *getMemCmpOneBlock();
|
||||
|
||||
public:
|
||||
MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize,
|
||||
unsigned MaxNumLoads, unsigned NumLoadsPerBlock,
|
||||
const DataLayout &DL);
|
||||
MemCmpExpansion(CallInst *CI, uint64_t Size,
|
||||
const TargetTransformInfo::MemCmpExpansionOptions &Options,
|
||||
unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
|
||||
unsigned NumLoadsPerBlock, const DataLayout &DL);
|
||||
|
||||
unsigned getNumBlocks();
|
||||
uint64_t getNumLoads() const { return LoadSequence.size(); }
|
||||
|
@ -1778,29 +1779,32 @@ class MemCmpExpansion {
|
|||
// return from.
|
||||
// 3. ResultBlock, block to branch to for early exit when a
|
||||
// LoadCmpBlock finds a difference.
|
||||
MemCmpExpansion::MemCmpExpansion(CallInst *const CI, uint64_t Size,
|
||||
const unsigned MaxLoadSize,
|
||||
const unsigned MaxNumLoads,
|
||||
const unsigned LoadsPerBlock,
|
||||
const DataLayout &TheDataLayout)
|
||||
MemCmpExpansion::MemCmpExpansion(
|
||||
CallInst *const CI, uint64_t Size,
|
||||
const TargetTransformInfo::MemCmpExpansionOptions &Options,
|
||||
const unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
|
||||
const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout)
|
||||
: CI(CI),
|
||||
Size(Size),
|
||||
MaxLoadSize(MaxLoadSize),
|
||||
MaxLoadSize(0),
|
||||
NumLoadsNonOneByte(0),
|
||||
NumLoadsPerBlock(LoadsPerBlock),
|
||||
IsUsedForZeroCmp(isOnlyUsedInZeroEqualityComparison(CI)),
|
||||
NumLoadsPerBlock(NumLoadsPerBlock),
|
||||
IsUsedForZeroCmp(IsUsedForZeroCmp),
|
||||
DL(TheDataLayout),
|
||||
Builder(CI) {
|
||||
assert(Size > 0 && "zero blocks");
|
||||
// Scale the max size down if the target can load more bytes than we need.
|
||||
while (this->MaxLoadSize > Size) {
|
||||
this->MaxLoadSize /= 2;
|
||||
size_t LoadSizeIndex = 0;
|
||||
while (LoadSizeIndex < Options.LoadSizes.size() &&
|
||||
Options.LoadSizes[LoadSizeIndex] > Size) {
|
||||
++LoadSizeIndex;
|
||||
}
|
||||
this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex];
|
||||
// Compute the decomposition.
|
||||
unsigned LoadSize = this->MaxLoadSize;
|
||||
uint64_t CurSize = Size;
|
||||
uint64_t Offset = 0;
|
||||
while (CurSize) {
|
||||
while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) {
|
||||
const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex];
|
||||
assert(LoadSize > 0 && "zero load size");
|
||||
const uint64_t NumLoadsForThisSize = CurSize / LoadSize;
|
||||
if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
|
||||
|
@ -1821,11 +1825,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *const CI, uint64_t Size,
|
|||
}
|
||||
CurSize = CurSize % LoadSize;
|
||||
}
|
||||
// FIXME: This can result in a non-native load size (e.g. X86-32+SSE can
|
||||
// load 16 and 4 but not 8), which throws the load count off (e.g. in the
|
||||
// aforementioned case, 16 bytes will count for 2 loads but will generate
|
||||
// 4).
|
||||
LoadSize /= 2;
|
||||
++LoadSizeIndex;
|
||||
}
|
||||
assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
|
||||
}
|
||||
|
@ -2362,15 +2362,16 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
|
|||
}
|
||||
|
||||
// TTI call to check if target would like to expand memcmp. Also, get the
|
||||
// max LoadSize.
|
||||
unsigned MaxLoadSize;
|
||||
if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return false;
|
||||
// available load sizes.
|
||||
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
|
||||
const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp);
|
||||
if (!Options) return false;
|
||||
|
||||
const unsigned MaxNumLoads =
|
||||
TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize());
|
||||
|
||||
MemCmpExpansion Expansion(CI, SizeVal, MaxLoadSize, MaxNumLoads,
|
||||
MemCmpNumLoadsPerBlock, *DL);
|
||||
MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads,
|
||||
IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL);
|
||||
|
||||
// Don't expand if this will require more loads than desired by the target.
|
||||
if (Expansion.getNumLoads() == 0) {
|
||||
|
|
|
@ -226,9 +226,17 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
|
|||
return LoopHasReductions;
|
||||
}
|
||||
|
||||
bool PPCTTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) {
|
||||
MaxLoadSize = 8;
|
||||
return true;
|
||||
const PPCTTIImpl::TTI::MemCmpExpansionOptions *
|
||||
PPCTTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
|
||||
static const auto Options = []() {
|
||||
TTI::MemCmpExpansionOptions Options;
|
||||
Options.LoadSizes.push_back(8);
|
||||
Options.LoadSizes.push_back(4);
|
||||
Options.LoadSizes.push_back(2);
|
||||
Options.LoadSizes.push_back(1);
|
||||
return Options;
|
||||
}();
|
||||
return &Options;
|
||||
}
|
||||
|
||||
bool PPCTTIImpl::enableInterleavedAccessVectorization() {
|
||||
|
|
|
@ -63,7 +63,8 @@ public:
|
|||
/// @{
|
||||
|
||||
bool enableAggressiveInterleaving(bool LoopHasReductions);
|
||||
bool enableMemCmpExpansion(unsigned &MaxLoadSize);
|
||||
const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
|
||||
bool IsZeroCmp) const;
|
||||
bool enableInterleavedAccessVectorization();
|
||||
unsigned getNumberOfRegisters(bool Vector);
|
||||
unsigned getRegisterBitWidth(bool Vector) const;
|
||||
|
|
|
@ -2536,10 +2536,35 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
|
|||
return (CallerBits & CalleeBits) == CalleeBits;
|
||||
}
|
||||
|
||||
bool X86TTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) {
|
||||
// TODO: We can increase these based on available vector ops.
|
||||
MaxLoadSize = ST->is64Bit() ? 8 : 4;
|
||||
return true;
|
||||
const X86TTIImpl::TTI::MemCmpExpansionOptions *
|
||||
X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
|
||||
// Only enable vector loads for equality comparison.
|
||||
// Right now the vector version is not as fast, see #33329.
|
||||
static const auto ThreeWayOptions = [this]() {
|
||||
TTI::MemCmpExpansionOptions Options;
|
||||
if (ST->is64Bit()) {
|
||||
Options.LoadSizes.push_back(8);
|
||||
}
|
||||
Options.LoadSizes.push_back(4);
|
||||
Options.LoadSizes.push_back(2);
|
||||
Options.LoadSizes.push_back(1);
|
||||
return Options;
|
||||
}();
|
||||
static const auto EqZeroOptions = [this]() {
|
||||
TTI::MemCmpExpansionOptions Options;
|
||||
// TODO: enable AVX512 when the DAG is ready.
|
||||
// if (ST->hasAVX512()) Options.LoadSizes.push_back(64);
|
||||
if (ST->hasAVX2()) Options.LoadSizes.push_back(32);
|
||||
if (ST->hasSSE2()) Options.LoadSizes.push_back(16);
|
||||
if (ST->is64Bit()) {
|
||||
Options.LoadSizes.push_back(8);
|
||||
}
|
||||
Options.LoadSizes.push_back(4);
|
||||
Options.LoadSizes.push_back(2);
|
||||
Options.LoadSizes.push_back(1);
|
||||
return Options;
|
||||
}();
|
||||
return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
|
||||
}
|
||||
|
||||
bool X86TTIImpl::enableInterleavedAccessVectorization() {
|
||||
|
|
|
@ -127,7 +127,8 @@ public:
|
|||
bool hasDivRemOp(Type *DataType, bool IsSigned);
|
||||
bool areInlineCompatible(const Function *Caller,
|
||||
const Function *Callee) const;
|
||||
bool enableMemCmpExpansion(unsigned &MaxLoadSize);
|
||||
const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
|
||||
bool IsZeroCmp) const;
|
||||
bool enableInterleavedAccessVectorization();
|
||||
private:
|
||||
int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
|
||||
|
|
|
@ -625,8 +625,7 @@ PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
|
|||
|
||||
// We only try merging comparisons if the target wants to expand memcmp later.
|
||||
// The rationale is to avoid turning small chains into memcmp calls.
|
||||
unsigned MaxLoadSize;
|
||||
if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return PreservedAnalyses::all();
|
||||
if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all();
|
||||
|
||||
bool MadeChange = false;
|
||||
|
||||
|
|
|
@ -598,22 +598,24 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind optsize {
|
|||
; X86-SSE2-NEXT: setne %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length16_eq:
|
||||
; X64: # BB#0: # %loadbb
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: cmpq (%rsi), %rax
|
||||
; X64-NEXT: jne .LBB17_1
|
||||
; X64-NEXT: # BB#2: # %loadbb1
|
||||
; X64-NEXT: movq 8(%rdi), %rcx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: cmpq 8(%rsi), %rcx
|
||||
; X64-NEXT: je .LBB17_3
|
||||
; X64-NEXT: .LBB17_1: # %res_block
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: .LBB17_3: # %endblock
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length16_eq:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
|
||||
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: setne %al
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX2-LABEL: length16_eq:
|
||||
; X64-AVX2: # BB#0:
|
||||
; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-AVX2-NEXT: setne %al
|
||||
; X64-AVX2-NEXT: retq
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
|
||||
%cmp = icmp ne i32 %call, 0
|
||||
ret i1 %cmp
|
||||
|
@ -642,22 +644,23 @@ define i1 @length16_eq_const(i8* %X) nounwind optsize {
|
|||
; X86-SSE2-NEXT: sete %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length16_eq_const:
|
||||
; X64: # BB#0: # %loadbb
|
||||
; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
|
||||
; X64-NEXT: cmpq %rax, (%rdi)
|
||||
; X64-NEXT: jne .LBB18_1
|
||||
; X64-NEXT: # BB#2: # %loadbb1
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: movabsq $3833745473465760056, %rcx # imm = 0x3534333231303938
|
||||
; X64-NEXT: cmpq %rcx, 8(%rdi)
|
||||
; X64-NEXT: je .LBB18_3
|
||||
; X64-NEXT: .LBB18_1: # %res_block
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: .LBB18_3: # %endblock
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length16_eq_const:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: sete %al
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX2-LABEL: length16_eq_const:
|
||||
; X64-AVX2: # BB#0:
|
||||
; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-AVX2-NEXT: sete %al
|
||||
; X64-AVX2-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
|
||||
%c = icmp eq i32 %m, 0
|
||||
ret i1 %c
|
||||
|
@ -697,15 +700,44 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
|
|||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length24_eq:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $24, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length24_eq:
|
||||
; X64-SSE2: # BB#0: # %loadbb
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
|
||||
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: jne .LBB20_1
|
||||
; X64-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-SSE2-NEXT: movq 16(%rdi), %rcx
|
||||
; X64-SSE2-NEXT: xorl %eax, %eax
|
||||
; X64-SSE2-NEXT: cmpq 16(%rsi), %rcx
|
||||
; X64-SSE2-NEXT: je .LBB20_3
|
||||
; X64-SSE2-NEXT: .LBB20_1: # %res_block
|
||||
; X64-SSE2-NEXT: movl $1, %eax
|
||||
; X64-SSE2-NEXT: .LBB20_3: # %endblock
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: sete %al
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX2-LABEL: length24_eq:
|
||||
; X64-AVX2: # BB#0: # %loadbb
|
||||
; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-AVX2-NEXT: jne .LBB20_1
|
||||
; X64-AVX2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-AVX2-NEXT: movq 16(%rdi), %rcx
|
||||
; X64-AVX2-NEXT: xorl %eax, %eax
|
||||
; X64-AVX2-NEXT: cmpq 16(%rsi), %rcx
|
||||
; X64-AVX2-NEXT: je .LBB20_3
|
||||
; X64-AVX2-NEXT: .LBB20_1: # %res_block
|
||||
; X64-AVX2-NEXT: movl $1, %eax
|
||||
; X64-AVX2-NEXT: .LBB20_3: # %endblock
|
||||
; X64-AVX2-NEXT: testl %eax, %eax
|
||||
; X64-AVX2-NEXT: sete %al
|
||||
; X64-AVX2-NEXT: retq
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
|
||||
%cmp = icmp eq i32 %call, 0
|
||||
ret i1 %cmp
|
||||
|
@ -724,16 +756,43 @@ define i1 @length24_eq_const(i8* %X) nounwind optsize {
|
|||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length24_eq_const:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $.L.str, %esi
|
||||
; X64-NEXT: movl $24, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length24_eq_const:
|
||||
; X64-SSE2: # BB#0: # %loadbb
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: jne .LBB21_1
|
||||
; X64-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-SSE2-NEXT: xorl %eax, %eax
|
||||
; X64-SSE2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
|
||||
; X64-SSE2-NEXT: cmpq %rcx, 16(%rdi)
|
||||
; X64-SSE2-NEXT: je .LBB21_3
|
||||
; X64-SSE2-NEXT: .LBB21_1: # %res_block
|
||||
; X64-SSE2-NEXT: movl $1, %eax
|
||||
; X64-SSE2-NEXT: .LBB21_3: # %endblock
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: setne %al
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX2-LABEL: length24_eq_const:
|
||||
; X64-AVX2: # BB#0: # %loadbb
|
||||
; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-AVX2-NEXT: jne .LBB21_1
|
||||
; X64-AVX2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-AVX2-NEXT: xorl %eax, %eax
|
||||
; X64-AVX2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
|
||||
; X64-AVX2-NEXT: cmpq %rcx, 16(%rdi)
|
||||
; X64-AVX2-NEXT: je .LBB21_3
|
||||
; X64-AVX2-NEXT: .LBB21_1: # %res_block
|
||||
; X64-AVX2-NEXT: movl $1, %eax
|
||||
; X64-AVX2-NEXT: .LBB21_3: # %endblock
|
||||
; X64-AVX2-NEXT: testl %eax, %eax
|
||||
; X64-AVX2-NEXT: setne %al
|
||||
; X64-AVX2-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
|
||||
%c = icmp ne i32 %m, 0
|
||||
ret i1 %c
|
||||
|
@ -761,26 +820,65 @@ define i32 @length32(i8* %X, i8* %Y) nounwind optsize {
|
|||
; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
|
||||
|
||||
define i1 @length32_eq(i8* %x, i8* %y) nounwind optsize {
|
||||
; X86-LABEL: length32_eq:
|
||||
; X86: # BB#0:
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $32
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: calll memcmp
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: retl
|
||||
; X86-NOSSE-LABEL: length32_eq:
|
||||
; X86-NOSSE: # BB#0:
|
||||
; X86-NOSSE-NEXT: pushl $0
|
||||
; X86-NOSSE-NEXT: pushl $32
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: calll memcmp
|
||||
; X86-NOSSE-NEXT: addl $16, %esp
|
||||
; X86-NOSSE-NEXT: testl %eax, %eax
|
||||
; X86-NOSSE-NEXT: sete %al
|
||||
; X86-NOSSE-NEXT: retl
|
||||
;
|
||||
; X86-SSE2-LABEL: length32_eq:
|
||||
; X86-SSE2: # BB#0: # %loadbb
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
|
||||
; X86-SSE2-NEXT: movdqu (%eax), %xmm1
|
||||
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm1, %edx
|
||||
; X86-SSE2-NEXT: cmpl $65535, %edx # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: jne .LBB23_1
|
||||
; X86-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm0
|
||||
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
|
||||
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm1, %ecx
|
||||
; X86-SSE2-NEXT: xorl %eax, %eax
|
||||
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: je .LBB23_3
|
||||
; X86-SSE2-NEXT: .LBB23_1: # %res_block
|
||||
; X86-SSE2-NEXT: xorl %eax, %eax
|
||||
; X86-SSE2-NEXT: incl %eax
|
||||
; X86-SSE2-NEXT: .LBB23_3: # %endblock
|
||||
; X86-SSE2-NEXT: testl %eax, %eax
|
||||
; X86-SSE2-NEXT: sete %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSE2-LABEL: length32_eq:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: pushq %rax
|
||||
; X64-SSE2-NEXT: movl $32, %edx
|
||||
; X64-SSE2-NEXT: callq memcmp
|
||||
; X64-SSE2: # BB#0: # %loadbb
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
|
||||
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: jne .LBB23_1
|
||||
; X64-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm1
|
||||
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm1, %ecx
|
||||
; X64-SSE2-NEXT: xorl %eax, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: je .LBB23_3
|
||||
; X64-SSE2-NEXT: .LBB23_1: # %res_block
|
||||
; X64-SSE2-NEXT: movl $1, %eax
|
||||
; X64-SSE2-NEXT: .LBB23_3: # %endblock
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: sete %al
|
||||
; X64-SSE2-NEXT: popq %rcx
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX2-LABEL: length32_eq:
|
||||
|
@ -798,27 +896,60 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind optsize {
|
|||
}
|
||||
|
||||
define i1 @length32_eq_const(i8* %X) nounwind optsize {
|
||||
; X86-LABEL: length32_eq_const:
|
||||
; X86: # BB#0:
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $32
|
||||
; X86-NEXT: pushl $.L.str
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: calll memcmp
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: retl
|
||||
; X86-NOSSE-LABEL: length32_eq_const:
|
||||
; X86-NOSSE: # BB#0:
|
||||
; X86-NOSSE-NEXT: pushl $0
|
||||
; X86-NOSSE-NEXT: pushl $32
|
||||
; X86-NOSSE-NEXT: pushl $.L.str
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: calll memcmp
|
||||
; X86-NOSSE-NEXT: addl $16, %esp
|
||||
; X86-NOSSE-NEXT: testl %eax, %eax
|
||||
; X86-NOSSE-NEXT: setne %al
|
||||
; X86-NOSSE-NEXT: retl
|
||||
;
|
||||
; X86-SSE2-LABEL: length32_eq_const:
|
||||
; X86-SSE2: # BB#0: # %loadbb
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movdqu (%eax), %xmm0
|
||||
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
|
||||
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: jne .LBB24_1
|
||||
; X86-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
|
||||
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
|
||||
; X86-SSE2-NEXT: xorl %eax, %eax
|
||||
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: je .LBB24_3
|
||||
; X86-SSE2-NEXT: .LBB24_1: # %res_block
|
||||
; X86-SSE2-NEXT: xorl %eax, %eax
|
||||
; X86-SSE2-NEXT: incl %eax
|
||||
; X86-SSE2-NEXT: .LBB24_3: # %endblock
|
||||
; X86-SSE2-NEXT: testl %eax, %eax
|
||||
; X86-SSE2-NEXT: setne %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSE2-LABEL: length32_eq_const:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: pushq %rax
|
||||
; X64-SSE2-NEXT: movl $.L.str, %esi
|
||||
; X64-SSE2-NEXT: movl $32, %edx
|
||||
; X64-SSE2-NEXT: callq memcmp
|
||||
; X64-SSE2: # BB#0: # %loadbb
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: jne .LBB24_1
|
||||
; X64-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm0, %ecx
|
||||
; X64-SSE2-NEXT: xorl %eax, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: je .LBB24_3
|
||||
; X64-SSE2-NEXT: .LBB24_1: # %res_block
|
||||
; X64-SSE2-NEXT: movl $1, %eax
|
||||
; X64-SSE2-NEXT: .LBB24_3: # %endblock
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: setne %al
|
||||
; X64-SSE2-NEXT: popq %rcx
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX2-LABEL: length32_eq_const:
|
||||
|
@ -867,15 +998,37 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind optsize {
|
|||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length64_eq:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $64, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length64_eq:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: pushq %rax
|
||||
; X64-SSE2-NEXT: movl $64, %edx
|
||||
; X64-SSE2-NEXT: callq memcmp
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: setne %al
|
||||
; X64-SSE2-NEXT: popq %rcx
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX2-LABEL: length64_eq:
|
||||
; X64-AVX2: # BB#0: # %loadbb
|
||||
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; X64-AVX2-NEXT: cmpl $-1, %eax
|
||||
; X64-AVX2-NEXT: jne .LBB26_1
|
||||
; X64-AVX2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
||||
; X64-AVX2-NEXT: xorl %eax, %eax
|
||||
; X64-AVX2-NEXT: cmpl $-1, %ecx
|
||||
; X64-AVX2-NEXT: je .LBB26_3
|
||||
; X64-AVX2-NEXT: .LBB26_1: # %res_block
|
||||
; X64-AVX2-NEXT: movl $1, %eax
|
||||
; X64-AVX2-NEXT: .LBB26_3: # %endblock
|
||||
; X64-AVX2-NEXT: testl %eax, %eax
|
||||
; X64-AVX2-NEXT: setne %al
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
|
||||
%cmp = icmp ne i32 %call, 0
|
||||
ret i1 %cmp
|
||||
|
@ -894,16 +1047,38 @@ define i1 @length64_eq_const(i8* %X) nounwind optsize {
|
|||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length64_eq_const:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $.L.str, %esi
|
||||
; X64-NEXT: movl $64, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length64_eq_const:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: pushq %rax
|
||||
; X64-SSE2-NEXT: movl $.L.str, %esi
|
||||
; X64-SSE2-NEXT: movl $64, %edx
|
||||
; X64-SSE2-NEXT: callq memcmp
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: sete %al
|
||||
; X64-SSE2-NEXT: popq %rcx
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX2-LABEL: length64_eq_const:
|
||||
; X64-AVX2: # BB#0: # %loadbb
|
||||
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; X64-AVX2-NEXT: cmpl $-1, %eax
|
||||
; X64-AVX2-NEXT: jne .LBB27_1
|
||||
; X64-AVX2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
||||
; X64-AVX2-NEXT: xorl %eax, %eax
|
||||
; X64-AVX2-NEXT: cmpl $-1, %ecx
|
||||
; X64-AVX2-NEXT: je .LBB27_3
|
||||
; X64-AVX2-NEXT: .LBB27_1: # %res_block
|
||||
; X64-AVX2-NEXT: movl $1, %eax
|
||||
; X64-AVX2-NEXT: .LBB27_3: # %endblock
|
||||
; X64-AVX2-NEXT: testl %eax, %eax
|
||||
; X64-AVX2-NEXT: sete %al
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
|
||||
%c = icmp eq i32 %m, 0
|
||||
ret i1 %c
|
||||
|
|
|
@ -639,22 +639,24 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind {
|
|||
; X86-SSE2-NEXT: setne %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length16_eq:
|
||||
; X64: # BB#0: # %loadbb
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: cmpq (%rsi), %rax
|
||||
; X64-NEXT: jne .LBB19_1
|
||||
; X64-NEXT: # BB#2: # %loadbb1
|
||||
; X64-NEXT: movq 8(%rdi), %rcx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: cmpq 8(%rsi), %rcx
|
||||
; X64-NEXT: je .LBB19_3
|
||||
; X64-NEXT: .LBB19_1: # %res_block
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: .LBB19_3: # %endblock
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length16_eq:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
|
||||
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: setne %al
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: length16_eq:
|
||||
; X64-AVX: # BB#0:
|
||||
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; X64-AVX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-AVX-NEXT: setne %al
|
||||
; X64-AVX-NEXT: retq
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
|
||||
%cmp = icmp ne i32 %call, 0
|
||||
ret i1 %cmp
|
||||
|
@ -695,22 +697,23 @@ define i1 @length16_eq_const(i8* %X) nounwind {
|
|||
; X86-SSE2-NEXT: sete %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length16_eq_const:
|
||||
; X64: # BB#0: # %loadbb
|
||||
; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
|
||||
; X64-NEXT: cmpq %rax, (%rdi)
|
||||
; X64-NEXT: jne .LBB20_1
|
||||
; X64-NEXT: # BB#2: # %loadbb1
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: movabsq $3833745473465760056, %rcx # imm = 0x3534333231303938
|
||||
; X64-NEXT: cmpq %rcx, 8(%rdi)
|
||||
; X64-NEXT: je .LBB20_3
|
||||
; X64-NEXT: .LBB20_1: # %res_block
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: .LBB20_3: # %endblock
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length16_eq_const:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: sete %al
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: length16_eq_const:
|
||||
; X64-AVX: # BB#0:
|
||||
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; X64-AVX-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-AVX-NEXT: sete %al
|
||||
; X64-AVX-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
|
||||
%c = icmp eq i32 %m, 0
|
||||
ret i1 %c
|
||||
|
@ -750,15 +753,44 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind {
|
|||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length24_eq:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $24, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length24_eq:
|
||||
; X64-SSE2: # BB#0: # %loadbb
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
|
||||
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: jne .LBB22_1
|
||||
; X64-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-SSE2-NEXT: movq 16(%rdi), %rcx
|
||||
; X64-SSE2-NEXT: xorl %eax, %eax
|
||||
; X64-SSE2-NEXT: cmpq 16(%rsi), %rcx
|
||||
; X64-SSE2-NEXT: je .LBB22_3
|
||||
; X64-SSE2-NEXT: .LBB22_1: # %res_block
|
||||
; X64-SSE2-NEXT: movl $1, %eax
|
||||
; X64-SSE2-NEXT: .LBB22_3: # %endblock
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: sete %al
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: length24_eq:
|
||||
; X64-AVX: # BB#0: # %loadbb
|
||||
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; X64-AVX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-AVX-NEXT: jne .LBB22_1
|
||||
; X64-AVX-NEXT: # BB#2: # %loadbb1
|
||||
; X64-AVX-NEXT: movq 16(%rdi), %rcx
|
||||
; X64-AVX-NEXT: xorl %eax, %eax
|
||||
; X64-AVX-NEXT: cmpq 16(%rsi), %rcx
|
||||
; X64-AVX-NEXT: je .LBB22_3
|
||||
; X64-AVX-NEXT: .LBB22_1: # %res_block
|
||||
; X64-AVX-NEXT: movl $1, %eax
|
||||
; X64-AVX-NEXT: .LBB22_3: # %endblock
|
||||
; X64-AVX-NEXT: testl %eax, %eax
|
||||
; X64-AVX-NEXT: sete %al
|
||||
; X64-AVX-NEXT: retq
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
|
||||
%cmp = icmp eq i32 %call, 0
|
||||
ret i1 %cmp
|
||||
|
@ -777,16 +809,43 @@ define i1 @length24_eq_const(i8* %X) nounwind {
|
|||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length24_eq_const:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $.L.str, %esi
|
||||
; X64-NEXT: movl $24, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length24_eq_const:
|
||||
; X64-SSE2: # BB#0: # %loadbb
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: jne .LBB23_1
|
||||
; X64-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-SSE2-NEXT: xorl %eax, %eax
|
||||
; X64-SSE2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
|
||||
; X64-SSE2-NEXT: cmpq %rcx, 16(%rdi)
|
||||
; X64-SSE2-NEXT: je .LBB23_3
|
||||
; X64-SSE2-NEXT: .LBB23_1: # %res_block
|
||||
; X64-SSE2-NEXT: movl $1, %eax
|
||||
; X64-SSE2-NEXT: .LBB23_3: # %endblock
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: setne %al
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: length24_eq_const:
|
||||
; X64-AVX: # BB#0: # %loadbb
|
||||
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; X64-AVX-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-AVX-NEXT: jne .LBB23_1
|
||||
; X64-AVX-NEXT: # BB#2: # %loadbb1
|
||||
; X64-AVX-NEXT: xorl %eax, %eax
|
||||
; X64-AVX-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
|
||||
; X64-AVX-NEXT: cmpq %rcx, 16(%rdi)
|
||||
; X64-AVX-NEXT: je .LBB23_3
|
||||
; X64-AVX-NEXT: .LBB23_1: # %res_block
|
||||
; X64-AVX-NEXT: movl $1, %eax
|
||||
; X64-AVX-NEXT: .LBB23_3: # %endblock
|
||||
; X64-AVX-NEXT: testl %eax, %eax
|
||||
; X64-AVX-NEXT: setne %al
|
||||
; X64-AVX-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
|
||||
%c = icmp ne i32 %m, 0
|
||||
ret i1 %c
|
||||
|
@ -814,41 +873,96 @@ define i32 @length32(i8* %X, i8* %Y) nounwind {
|
|||
; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
|
||||
|
||||
define i1 @length32_eq(i8* %x, i8* %y) nounwind {
|
||||
; X86-LABEL: length32_eq:
|
||||
; X86: # BB#0:
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $32
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: calll memcmp
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: retl
|
||||
; X86-NOSSE-LABEL: length32_eq:
|
||||
; X86-NOSSE: # BB#0:
|
||||
; X86-NOSSE-NEXT: pushl $0
|
||||
; X86-NOSSE-NEXT: pushl $32
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: calll memcmp
|
||||
; X86-NOSSE-NEXT: addl $16, %esp
|
||||
; X86-NOSSE-NEXT: testl %eax, %eax
|
||||
; X86-NOSSE-NEXT: sete %al
|
||||
; X86-NOSSE-NEXT: retl
|
||||
;
|
||||
; X86-SSE1-LABEL: length32_eq:
|
||||
; X86-SSE1: # BB#0:
|
||||
; X86-SSE1-NEXT: pushl $0
|
||||
; X86-SSE1-NEXT: pushl $32
|
||||
; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: calll memcmp
|
||||
; X86-SSE1-NEXT: addl $16, %esp
|
||||
; X86-SSE1-NEXT: testl %eax, %eax
|
||||
; X86-SSE1-NEXT: sete %al
|
||||
; X86-SSE1-NEXT: retl
|
||||
;
|
||||
; X86-SSE2-LABEL: length32_eq:
|
||||
; X86-SSE2: # BB#0: # %loadbb
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
|
||||
; X86-SSE2-NEXT: movdqu (%eax), %xmm1
|
||||
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm1, %edx
|
||||
; X86-SSE2-NEXT: cmpl $65535, %edx # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: jne .LBB25_1
|
||||
; X86-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm0
|
||||
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
|
||||
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm1, %ecx
|
||||
; X86-SSE2-NEXT: xorl %eax, %eax
|
||||
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: je .LBB25_3
|
||||
; X86-SSE2-NEXT: .LBB25_1: # %res_block
|
||||
; X86-SSE2-NEXT: movl $1, %eax
|
||||
; X86-SSE2-NEXT: .LBB25_3: # %endblock
|
||||
; X86-SSE2-NEXT: testl %eax, %eax
|
||||
; X86-SSE2-NEXT: sete %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSE2-LABEL: length32_eq:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: pushq %rax
|
||||
; X64-SSE2-NEXT: movl $32, %edx
|
||||
; X64-SSE2-NEXT: callq memcmp
|
||||
; X64-SSE2: # BB#0: # %loadbb
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
|
||||
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: jne .LBB25_1
|
||||
; X64-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm1
|
||||
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm1, %ecx
|
||||
; X64-SSE2-NEXT: xorl %eax, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: je .LBB25_3
|
||||
; X64-SSE2-NEXT: .LBB25_1: # %res_block
|
||||
; X64-SSE2-NEXT: movl $1, %eax
|
||||
; X64-SSE2-NEXT: .LBB25_3: # %endblock
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: sete %al
|
||||
; X64-SSE2-NEXT: popq %rcx
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX1-LABEL: length32_eq:
|
||||
; X64-AVX1: # BB#0:
|
||||
; X64-AVX1-NEXT: movq 16(%rdi), %rax
|
||||
; X64-AVX1-NEXT: movq (%rdi), %rcx
|
||||
; X64-AVX1-NEXT: movq 8(%rdi), %rdx
|
||||
; X64-AVX1-NEXT: movq 24(%rdi), %rdi
|
||||
; X64-AVX1-NEXT: xorq 24(%rsi), %rdi
|
||||
; X64-AVX1-NEXT: xorq 8(%rsi), %rdx
|
||||
; X64-AVX1-NEXT: orq %rdi, %rdx
|
||||
; X64-AVX1-NEXT: xorq 16(%rsi), %rax
|
||||
; X64-AVX1-NEXT: xorq (%rsi), %rcx
|
||||
; X64-AVX1-NEXT: orq %rax, %rcx
|
||||
; X64-AVX1-NEXT: orq %rdx, %rcx
|
||||
; X64-AVX1: # BB#0: # %loadbb
|
||||
; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; X64-AVX1-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; X64-AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-AVX1-NEXT: jne .LBB25_1
|
||||
; X64-AVX1-NEXT: # BB#2: # %loadbb1
|
||||
; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm0
|
||||
; X64-AVX1-NEXT: vpcmpeqb 16(%rsi), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; X64-AVX1-NEXT: xorl %eax, %eax
|
||||
; X64-AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X64-AVX1-NEXT: je .LBB25_3
|
||||
; X64-AVX1-NEXT: .LBB25_1: # %res_block
|
||||
; X64-AVX1-NEXT: movl $1, %eax
|
||||
; X64-AVX1-NEXT: .LBB25_3: # %endblock
|
||||
; X64-AVX1-NEXT: testl %eax, %eax
|
||||
; X64-AVX1-NEXT: sete %al
|
||||
; X64-AVX1-NEXT: retq
|
||||
;
|
||||
|
@ -867,42 +981,91 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind {
|
|||
}
|
||||
|
||||
define i1 @length32_eq_const(i8* %X) nounwind {
|
||||
; X86-LABEL: length32_eq_const:
|
||||
; X86: # BB#0:
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $32
|
||||
; X86-NEXT: pushl $.L.str
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: calll memcmp
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: retl
|
||||
; X86-NOSSE-LABEL: length32_eq_const:
|
||||
; X86-NOSSE: # BB#0:
|
||||
; X86-NOSSE-NEXT: pushl $0
|
||||
; X86-NOSSE-NEXT: pushl $32
|
||||
; X86-NOSSE-NEXT: pushl $.L.str
|
||||
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NOSSE-NEXT: calll memcmp
|
||||
; X86-NOSSE-NEXT: addl $16, %esp
|
||||
; X86-NOSSE-NEXT: testl %eax, %eax
|
||||
; X86-NOSSE-NEXT: setne %al
|
||||
; X86-NOSSE-NEXT: retl
|
||||
;
|
||||
; X86-SSE1-LABEL: length32_eq_const:
|
||||
; X86-SSE1: # BB#0:
|
||||
; X86-SSE1-NEXT: pushl $0
|
||||
; X86-SSE1-NEXT: pushl $32
|
||||
; X86-SSE1-NEXT: pushl $.L.str
|
||||
; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: calll memcmp
|
||||
; X86-SSE1-NEXT: addl $16, %esp
|
||||
; X86-SSE1-NEXT: testl %eax, %eax
|
||||
; X86-SSE1-NEXT: setne %al
|
||||
; X86-SSE1-NEXT: retl
|
||||
;
|
||||
; X86-SSE2-LABEL: length32_eq_const:
|
||||
; X86-SSE2: # BB#0: # %loadbb
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movdqu (%eax), %xmm0
|
||||
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
|
||||
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: jne .LBB26_1
|
||||
; X86-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
|
||||
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
|
||||
; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
|
||||
; X86-SSE2-NEXT: xorl %eax, %eax
|
||||
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X86-SSE2-NEXT: je .LBB26_3
|
||||
; X86-SSE2-NEXT: .LBB26_1: # %res_block
|
||||
; X86-SSE2-NEXT: movl $1, %eax
|
||||
; X86-SSE2-NEXT: .LBB26_3: # %endblock
|
||||
; X86-SSE2-NEXT: testl %eax, %eax
|
||||
; X86-SSE2-NEXT: setne %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSE2-LABEL: length32_eq_const:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: pushq %rax
|
||||
; X64-SSE2-NEXT: movl $.L.str, %esi
|
||||
; X64-SSE2-NEXT: movl $32, %edx
|
||||
; X64-SSE2-NEXT: callq memcmp
|
||||
; X64-SSE2: # BB#0: # %loadbb
|
||||
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: jne .LBB26_1
|
||||
; X64-SSE2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
|
||||
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
|
||||
; X64-SSE2-NEXT: pmovmskb %xmm0, %ecx
|
||||
; X64-SSE2-NEXT: xorl %eax, %eax
|
||||
; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X64-SSE2-NEXT: je .LBB26_3
|
||||
; X64-SSE2-NEXT: .LBB26_1: # %res_block
|
||||
; X64-SSE2-NEXT: movl $1, %eax
|
||||
; X64-SSE2-NEXT: .LBB26_3: # %endblock
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: setne %al
|
||||
; X64-SSE2-NEXT: popq %rcx
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX1-LABEL: length32_eq_const:
|
||||
; X64-AVX1: # BB#0:
|
||||
; X64-AVX1-NEXT: movabsq $3544395820347831604, %rax # imm = 0x3130393837363534
|
||||
; X64-AVX1-NEXT: xorq 24(%rdi), %rax
|
||||
; X64-AVX1-NEXT: movabsq $3833745473465760056, %rcx # imm = 0x3534333231303938
|
||||
; X64-AVX1-NEXT: xorq 8(%rdi), %rcx
|
||||
; X64-AVX1-NEXT: orq %rax, %rcx
|
||||
; X64-AVX1-NEXT: movabsq $3689065127958034230, %rax # imm = 0x3332313039383736
|
||||
; X64-AVX1-NEXT: xorq 16(%rdi), %rax
|
||||
; X64-AVX1-NEXT: movabsq $3978425819141910832, %rdx # imm = 0x3736353433323130
|
||||
; X64-AVX1-NEXT: xorq (%rdi), %rdx
|
||||
; X64-AVX1-NEXT: orq %rax, %rdx
|
||||
; X64-AVX1-NEXT: orq %rcx, %rdx
|
||||
; X64-AVX1: # BB#0: # %loadbb
|
||||
; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; X64-AVX1-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; X64-AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; X64-AVX1-NEXT: jne .LBB26_1
|
||||
; X64-AVX1-NEXT: # BB#2: # %loadbb1
|
||||
; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm0
|
||||
; X64-AVX1-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; X64-AVX1-NEXT: xorl %eax, %eax
|
||||
; X64-AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; X64-AVX1-NEXT: je .LBB26_3
|
||||
; X64-AVX1-NEXT: .LBB26_1: # %res_block
|
||||
; X64-AVX1-NEXT: movl $1, %eax
|
||||
; X64-AVX1-NEXT: .LBB26_3: # %endblock
|
||||
; X64-AVX1-NEXT: testl %eax, %eax
|
||||
; X64-AVX1-NEXT: setne %al
|
||||
; X64-AVX1-NEXT: retq
|
||||
;
|
||||
|
@ -952,15 +1115,47 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind {
|
|||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length64_eq:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $64, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length64_eq:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: pushq %rax
|
||||
; X64-SSE2-NEXT: movl $64, %edx
|
||||
; X64-SSE2-NEXT: callq memcmp
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: setne %al
|
||||
; X64-SSE2-NEXT: popq %rcx
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX1-LABEL: length64_eq:
|
||||
; X64-AVX1: # BB#0:
|
||||
; X64-AVX1-NEXT: pushq %rax
|
||||
; X64-AVX1-NEXT: movl $64, %edx
|
||||
; X64-AVX1-NEXT: callq memcmp
|
||||
; X64-AVX1-NEXT: testl %eax, %eax
|
||||
; X64-AVX1-NEXT: setne %al
|
||||
; X64-AVX1-NEXT: popq %rcx
|
||||
; X64-AVX1-NEXT: retq
|
||||
;
|
||||
; X64-AVX2-LABEL: length64_eq:
|
||||
; X64-AVX2: # BB#0: # %loadbb
|
||||
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; X64-AVX2-NEXT: cmpl $-1, %eax
|
||||
; X64-AVX2-NEXT: jne .LBB28_1
|
||||
; X64-AVX2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
||||
; X64-AVX2-NEXT: xorl %eax, %eax
|
||||
; X64-AVX2-NEXT: cmpl $-1, %ecx
|
||||
; X64-AVX2-NEXT: je .LBB28_3
|
||||
; X64-AVX2-NEXT: .LBB28_1: # %res_block
|
||||
; X64-AVX2-NEXT: movl $1, %eax
|
||||
; X64-AVX2-NEXT: .LBB28_3: # %endblock
|
||||
; X64-AVX2-NEXT: testl %eax, %eax
|
||||
; X64-AVX2-NEXT: setne %al
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
|
||||
%cmp = icmp ne i32 %call, 0
|
||||
ret i1 %cmp
|
||||
|
@ -979,16 +1174,49 @@ define i1 @length64_eq_const(i8* %X) nounwind {
|
|||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length64_eq_const:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl $.L.str, %esi
|
||||
; X64-NEXT: movl $64, %edx
|
||||
; X64-NEXT: callq memcmp
|
||||
; X64-NEXT: testl %eax, %eax
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE2-LABEL: length64_eq_const:
|
||||
; X64-SSE2: # BB#0:
|
||||
; X64-SSE2-NEXT: pushq %rax
|
||||
; X64-SSE2-NEXT: movl $.L.str, %esi
|
||||
; X64-SSE2-NEXT: movl $64, %edx
|
||||
; X64-SSE2-NEXT: callq memcmp
|
||||
; X64-SSE2-NEXT: testl %eax, %eax
|
||||
; X64-SSE2-NEXT: sete %al
|
||||
; X64-SSE2-NEXT: popq %rcx
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-AVX1-LABEL: length64_eq_const:
|
||||
; X64-AVX1: # BB#0:
|
||||
; X64-AVX1-NEXT: pushq %rax
|
||||
; X64-AVX1-NEXT: movl $.L.str, %esi
|
||||
; X64-AVX1-NEXT: movl $64, %edx
|
||||
; X64-AVX1-NEXT: callq memcmp
|
||||
; X64-AVX1-NEXT: testl %eax, %eax
|
||||
; X64-AVX1-NEXT: sete %al
|
||||
; X64-AVX1-NEXT: popq %rcx
|
||||
; X64-AVX1-NEXT: retq
|
||||
;
|
||||
; X64-AVX2-LABEL: length64_eq_const:
|
||||
; X64-AVX2: # BB#0: # %loadbb
|
||||
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; X64-AVX2-NEXT: cmpl $-1, %eax
|
||||
; X64-AVX2-NEXT: jne .LBB29_1
|
||||
; X64-AVX2-NEXT: # BB#2: # %loadbb1
|
||||
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
|
||||
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
||||
; X64-AVX2-NEXT: xorl %eax, %eax
|
||||
; X64-AVX2-NEXT: cmpl $-1, %ecx
|
||||
; X64-AVX2-NEXT: je .LBB29_3
|
||||
; X64-AVX2-NEXT: .LBB29_1: # %res_block
|
||||
; X64-AVX2-NEXT: movl $1, %eax
|
||||
; X64-AVX2-NEXT: .LBB29_3: # %endblock
|
||||
; X64-AVX2-NEXT: testl %eax, %eax
|
||||
; X64-AVX2-NEXT: sete %al
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
|
||||
%c = icmp eq i32 %m, 0
|
||||
ret i1 %c
|
||||
|
|
|
@ -753,27 +753,13 @@ define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X32-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64-LABEL: @cmp_eq16(
|
||||
; X64-NEXT: loadbb:
|
||||
; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
|
||||
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
|
||||
; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
|
||||
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]]
|
||||
; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
|
||||
; X64: res_block:
|
||||
; X64-NEXT: br label [[ENDBLOCK:%.*]]
|
||||
; X64: loadbb1:
|
||||
; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i64*
|
||||
; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i64*
|
||||
; X64-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[TMP5]], i64 1
|
||||
; X64-NEXT: [[TMP8:%.*]] = getelementptr i64, i64* [[TMP6]], i64 1
|
||||
; X64-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP7]]
|
||||
; X64-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
|
||||
; X64-NEXT: [[TMP11:%.*]] = icmp ne i64 [[TMP9]], [[TMP10]]
|
||||
; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[ENDBLOCK]]
|
||||
; X64: endblock:
|
||||
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
|
||||
; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
|
||||
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i128*
|
||||
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i128*
|
||||
; X64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]]
|
||||
; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP2]]
|
||||
; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]]
|
||||
; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
|
||||
; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
|
||||
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue