forked from OSchip/llvm-project
[CGP] add specialization for memcmp expansion with only one basic block
llvm-svn: 306485
This commit is contained in:
parent
c5fa6358ba
commit
4b23fa0abf
|
@ -1679,6 +1679,7 @@ class MemCmpExpansion {
|
|||
void emitMemCmpResultBlock();
|
||||
Value *getMemCmpExpansionZeroCase(unsigned Size);
|
||||
Value *getMemCmpEqZeroOneBlock(unsigned Size);
|
||||
Value *getMemCmpOneBlock(unsigned Size);
|
||||
unsigned getLoadSize(unsigned Size);
|
||||
unsigned getNumLoads(unsigned Size);
|
||||
|
||||
|
@ -1711,7 +1712,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
|
|||
// we choose to handle this case too to avoid fragmented lowering.
|
||||
IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
|
||||
NumBlocks = calculateNumBlocks(Size);
|
||||
if (!IsUsedForZeroCmp || NumBlocks != 1) {
|
||||
if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) {
|
||||
BasicBlock *StartBlock = CI->getParent();
|
||||
EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
|
||||
setupEndBlockPHINodes();
|
||||
|
@ -2090,6 +2091,41 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) {
|
|||
return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
|
||||
}
|
||||
|
||||
/// A memcmp expansion that only has one block of load and compare can bypass
|
||||
/// the compare, branch, and phi IR that is required in the general case.
|
||||
Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) {
|
||||
assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
|
||||
|
||||
Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
|
||||
Value *Source1 = CI->getArgOperand(0);
|
||||
Value *Source2 = CI->getArgOperand(1);
|
||||
|
||||
// Cast source to LoadSizeType*.
|
||||
if (Source1->getType() != LoadSizeType)
|
||||
Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
|
||||
if (Source2->getType() != LoadSizeType)
|
||||
Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
|
||||
|
||||
// Load LoadSizeType from the base address.
|
||||
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
|
||||
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
|
||||
|
||||
if (DL.isLittleEndian() && Size != 1) {
|
||||
Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
|
||||
Intrinsic::bswap, LoadSizeType);
|
||||
LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
|
||||
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
|
||||
}
|
||||
|
||||
// TODO: Instead of comparing ULT, just subtract and return the difference?
|
||||
Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
|
||||
Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
|
||||
Type *I32 = Builder.getInt32Ty();
|
||||
Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1),
|
||||
ConstantInt::get(I32, 1));
|
||||
return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0));
|
||||
}
|
||||
|
||||
// This function expands the memcmp call into an inline expansion and returns
|
||||
// the memcmp result.
|
||||
Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
|
||||
|
@ -2097,6 +2133,10 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
|
|||
return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) :
|
||||
getMemCmpExpansionZeroCase(Size);
|
||||
|
||||
// TODO: Handle more than one load pair per block in getMemCmpOneBlock().
|
||||
if (NumBlocks == 1 && NumLoadsPerBlock == 1)
|
||||
return getMemCmpOneBlock(Size);
|
||||
|
||||
// This loop calls emitLoadCompareBlock for comparing Size bytes of the two
|
||||
// memcmp sources. It starts with loading using the maximum load size set by
|
||||
// the target. It processes any remaining bytes using a load size which is the
|
||||
|
|
|
@ -13,11 +13,10 @@ entry:
|
|||
; CHECK: ldbrx [[LOAD1:[0-9]+]]
|
||||
; CHECK-NEXT: ldbrx [[LOAD2:[0-9]+]]
|
||||
; CHECK-NEXT: li [[LI:[0-9]+]], 1
|
||||
; CHECK-NEXT: cmpld [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-NEXT: li [[LI2:[0-9]+]], -1
|
||||
; CHECK-NEXT: cmpld [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0
|
||||
; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2
|
||||
; CHECK-NEXT: extsw 3, [[ISEL2]]
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
|
@ -34,11 +33,10 @@ entry:
|
|||
; CHECK: lwbrx [[LOAD1:[0-9]+]]
|
||||
; CHECK-NEXT: lwbrx [[LOAD2:[0-9]+]]
|
||||
; CHECK-NEXT: li [[LI:[0-9]+]], 1
|
||||
; CHECK-NEXT: cmpld [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-NEXT: li [[LI2:[0-9]+]], -1
|
||||
; CHECK-NEXT: cmplw [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0
|
||||
; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2
|
||||
; CHECK-NEXT: extsw 3, [[ISEL2]]
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
|
@ -55,11 +53,10 @@ entry:
|
|||
; CHECK: lhbrx [[LOAD1:[0-9]+]]
|
||||
; CHECK-NEXT: lhbrx [[LOAD2:[0-9]+]]
|
||||
; CHECK-NEXT: li [[LI:[0-9]+]], 1
|
||||
; CHECK-NEXT: cmpld [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-NEXT: li [[LI2:[0-9]+]], -1
|
||||
; CHECK-NEXT: cmplw [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0
|
||||
; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2
|
||||
; CHECK-NEXT: extsw 3, [[ISEL2]]
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
|
@ -75,8 +72,11 @@ entry:
|
|||
; CHECK-LABEL: @test4
|
||||
; CHECK: lbz [[LOAD1:[0-9]+]]
|
||||
; CHECK-NEXT: lbz [[LOAD2:[0-9]+]]
|
||||
; CHECK-NEXT: subf [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]]
|
||||
; CHECK-NEXT: extsw 3, [[SUB]]
|
||||
; CHECK-NEXT: li [[LI:[0-9]+]], 1
|
||||
; CHECK-NEXT: li [[LI2:[0-9]+]], -1
|
||||
; CHECK-NEXT: cmplw [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0
|
||||
; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
|
|
|
@ -59,28 +59,20 @@ define signext i32 @test2(i32* nocapture readonly %buffer1, i32* nocapture reado
|
|||
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
|
||||
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
|
||||
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
|
||||
; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64
|
||||
; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
|
||||
; CHECK-NEXT: br i1 [[ICMP]], label %endblock, label %res_block
|
||||
|
||||
; CHECK-LABEL: res_block:{{.*}}
|
||||
; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
|
||||
; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
|
||||
; CHECK-NEXT: br label %endblock
|
||||
; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ne i32 [[BSWAP1]], [[BSWAP2]]
|
||||
; CHECK-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[BSWAP1]], [[BSWAP2]]
|
||||
; CHECK-NEXT: [[SELECT1:%[0-9]+]] = select i1 [[CMP2]], i32 -1, i32 1
|
||||
; CHECK-NEXT: [[SELECT2:%[0-9]+]] = select i1 [[CMP1]], i32 [[SELECT1]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[SELECT2]]
|
||||
|
||||
; CHECK-BE-LABEL: @test2(
|
||||
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32*
|
||||
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
|
||||
; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64
|
||||
; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64
|
||||
; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
|
||||
; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block
|
||||
|
||||
; CHECK-BE-LABEL: res_block:{{.*}}
|
||||
; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
|
||||
; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
|
||||
; CHECK-BE-NEXT: br label %endblock
|
||||
; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ne i32 [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-BE-NEXT: [[SELECT1:%[0-9]+]] = select i1 [[CMP2]], i32 -1, i32 1
|
||||
; CHECK-BE-NEXT: [[SELECT2:%[0-9]+]] = select i1 [[CMP1]], i32 [[SELECT1]], i32 0
|
||||
; CHECK-BE-NEXT: ret i32 [[SELECT2]]
|
||||
|
||||
entry:
|
||||
%0 = bitcast i32* %buffer1 to i8*
|
||||
|
|
|
@ -12,42 +12,39 @@ declare i32 @memcmp(i8*, i8*, i64)
|
|||
|
||||
define i32 @length2(i8* %X, i8* %Y) nounwind {
|
||||
; X32-LABEL: length2:
|
||||
; X32: # BB#0: # %loadbb
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movzwl (%ecx), %ecx
|
||||
; X32-NEXT: movzwl (%eax), %eax
|
||||
; X32-NEXT: rolw $8, %cx
|
||||
; X32-NEXT: rolw $8, %ax
|
||||
; X32-NEXT: movzwl %cx, %ecx
|
||||
; X32-NEXT: movzwl %ax, %edx
|
||||
; X32-NEXT: xorl %eax, %eax
|
||||
; X32-NEXT: cmpl %edx, %ecx
|
||||
; X32-NEXT: je .LBB0_3
|
||||
; X32-NEXT: # BB#1: # %res_block
|
||||
; X32-NEXT: cmpw %ax, %cx
|
||||
; X32-NEXT: movl $-1, %eax
|
||||
; X32-NEXT: jb .LBB0_3
|
||||
; X32-NEXT: # BB#2: # %res_block
|
||||
; X32-NEXT: jae .LBB0_1
|
||||
; X32-NEXT: # BB#2:
|
||||
; X32-NEXT: je .LBB0_3
|
||||
; X32-NEXT: .LBB0_4:
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: .LBB0_1:
|
||||
; X32-NEXT: movl $1, %eax
|
||||
; X32-NEXT: .LBB0_3: # %endblock
|
||||
; X32-NEXT: jne .LBB0_4
|
||||
; X32-NEXT: .LBB0_3:
|
||||
; X32-NEXT: xorl %eax, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length2:
|
||||
; X64: # BB#0: # %loadbb
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movzwl (%rdi), %eax
|
||||
; X64-NEXT: movzwl (%rsi), %ecx
|
||||
; X64-NEXT: rolw $8, %ax
|
||||
; X64-NEXT: rolw $8, %cx
|
||||
; X64-NEXT: movzwl %ax, %edx
|
||||
; X64-NEXT: movzwl %cx, %ecx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: cmpq %rcx, %rdx
|
||||
; X64-NEXT: je .LBB0_2
|
||||
; X64-NEXT: # BB#1: # %res_block
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: cmpw %cx, %ax
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: .LBB0_2: # %endblock
|
||||
; X64-NEXT: cmovel %edx, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
|
||||
ret i32 %m
|
||||
|
@ -169,38 +166,39 @@ define i1 @length3_eq(i8* %X, i8* %Y) nounwind {
|
|||
|
||||
define i32 @length4(i8* %X, i8* %Y) nounwind {
|
||||
; X32-LABEL: length4:
|
||||
; X32: # BB#0: # %loadbb
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl (%ecx), %ecx
|
||||
; X32-NEXT: movl (%eax), %edx
|
||||
; X32-NEXT: movl (%eax), %eax
|
||||
; X32-NEXT: bswapl %ecx
|
||||
; X32-NEXT: bswapl %edx
|
||||
; X32-NEXT: xorl %eax, %eax
|
||||
; X32-NEXT: cmpl %edx, %ecx
|
||||
; X32-NEXT: je .LBB6_3
|
||||
; X32-NEXT: # BB#1: # %res_block
|
||||
; X32-NEXT: bswapl %eax
|
||||
; X32-NEXT: cmpl %eax, %ecx
|
||||
; X32-NEXT: movl $-1, %eax
|
||||
; X32-NEXT: jb .LBB6_3
|
||||
; X32-NEXT: # BB#2: # %res_block
|
||||
; X32-NEXT: jae .LBB6_1
|
||||
; X32-NEXT: # BB#2:
|
||||
; X32-NEXT: je .LBB6_3
|
||||
; X32-NEXT: .LBB6_4:
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: .LBB6_1:
|
||||
; X32-NEXT: movl $1, %eax
|
||||
; X32-NEXT: .LBB6_3: # %endblock
|
||||
; X32-NEXT: jne .LBB6_4
|
||||
; X32-NEXT: .LBB6_3:
|
||||
; X32-NEXT: xorl %eax, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length4:
|
||||
; X64: # BB#0: # %loadbb
|
||||
; X64-NEXT: movl (%rdi), %ecx
|
||||
; X64-NEXT: movl (%rsi), %edx
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: movl (%rsi), %ecx
|
||||
; X64-NEXT: bswapl %eax
|
||||
; X64-NEXT: bswapl %ecx
|
||||
; X64-NEXT: bswapl %edx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: cmpq %rdx, %rcx
|
||||
; X64-NEXT: je .LBB6_2
|
||||
; X64-NEXT: # BB#1: # %res_block
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: cmpl %ecx, %eax
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: .LBB6_2: # %endblock
|
||||
; X64-NEXT: cmovel %edx, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
|
||||
ret i32 %m
|
||||
|
@ -303,19 +301,17 @@ define i32 @length8(i8* %X, i8* %Y) nounwind {
|
|||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length8:
|
||||
; X64: # BB#0: # %loadbb
|
||||
; X64-NEXT: movq (%rdi), %rcx
|
||||
; X64-NEXT: movq (%rsi), %rdx
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: movq (%rsi), %rcx
|
||||
; X64-NEXT: bswapq %rax
|
||||
; X64-NEXT: bswapq %rcx
|
||||
; X64-NEXT: bswapq %rdx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: cmpq %rdx, %rcx
|
||||
; X64-NEXT: je .LBB11_2
|
||||
; X64-NEXT: # BB#1: # %res_block
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: cmpq %rcx, %rax
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: .LBB11_2: # %endblock
|
||||
; X64-NEXT: cmovel %edx, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
|
||||
ret i32 %m
|
||||
|
|
|
@ -4,45 +4,18 @@
|
|||
declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)
|
||||
|
||||
define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; X32-LABEL: @cmp2(
|
||||
; X32-NEXT: loadbb:
|
||||
; X32-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i16*
|
||||
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i16*
|
||||
; X32-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
|
||||
; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
|
||||
; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
|
||||
; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
|
||||
; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
|
||||
; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
|
||||
; X32-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]]
|
||||
; X32-NEXT: br i1 [[TMP8]], label %endblock, label %res_block
|
||||
; X32: res_block:
|
||||
; X32-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
|
||||
; X32-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
|
||||
; X32-NEXT: br label %endblock
|
||||
; X32: endblock:
|
||||
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP10]], %res_block ]
|
||||
; X32-NEXT: ret i32 [[PHI_RES]]
|
||||
;
|
||||
; X64-LABEL: @cmp2(
|
||||
; X64-NEXT: loadbb:
|
||||
; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i16*
|
||||
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i16*
|
||||
; X64-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
|
||||
; X64-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
|
||||
; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
|
||||
; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
|
||||
; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i64
|
||||
; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i64
|
||||
; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
|
||||
; X64-NEXT: br i1 [[TMP8]], label %endblock, label %res_block
|
||||
; X64: res_block:
|
||||
; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
|
||||
; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
|
||||
; X64-NEXT: br label %endblock
|
||||
; X64: endblock:
|
||||
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP10]], %res_block ]
|
||||
; X64-NEXT: ret i32 [[PHI_RES]]
|
||||
; ALL-LABEL: @cmp2(
|
||||
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i16*
|
||||
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i16*
|
||||
; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
|
||||
; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
|
||||
; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
|
||||
; ALL-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
|
||||
; ALL-NEXT: [[TMP7:%.*]] = icmp ne i16 [[TMP5]], [[TMP6]]
|
||||
; ALL-NEXT: [[TMP8:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
|
||||
; ALL-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
|
||||
; ALL-NEXT: [[TMP10:%.*]] = select i1 [[TMP7]], i32 [[TMP9]], i32 0
|
||||
; ALL-NEXT: ret i32 [[TMP10]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
|
||||
ret i32 %call
|
||||
|
@ -58,43 +31,18 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
}
|
||||
|
||||
define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; X32-LABEL: @cmp4(
|
||||
; X32-NEXT: loadbb:
|
||||
; X32-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i32*
|
||||
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i32*
|
||||
; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
|
||||
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
|
||||
; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
|
||||
; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
|
||||
; X32-NEXT: br i1 [[TMP6]], label %endblock, label %res_block
|
||||
; X32: res_block:
|
||||
; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
|
||||
; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
|
||||
; X32-NEXT: br label %endblock
|
||||
; X32: endblock:
|
||||
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP8]], %res_block ]
|
||||
; X32-NEXT: ret i32 [[PHI_RES]]
|
||||
;
|
||||
; X64-LABEL: @cmp4(
|
||||
; X64-NEXT: loadbb:
|
||||
; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i32*
|
||||
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i32*
|
||||
; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
|
||||
; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
|
||||
; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
|
||||
; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
|
||||
; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
|
||||
; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
|
||||
; X64-NEXT: br i1 [[TMP8]], label %endblock, label %res_block
|
||||
; X64: res_block:
|
||||
; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
|
||||
; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
|
||||
; X64-NEXT: br label %endblock
|
||||
; X64: endblock:
|
||||
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP10]], %res_block ]
|
||||
; X64-NEXT: ret i32 [[PHI_RES]]
|
||||
; ALL-LABEL: @cmp4(
|
||||
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32*
|
||||
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i32*
|
||||
; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
|
||||
; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
|
||||
; ALL-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
|
||||
; ALL-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP5]], [[TMP6]]
|
||||
; ALL-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]]
|
||||
; ALL-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
|
||||
; ALL-NEXT: [[TMP10:%.*]] = select i1 [[TMP7]], i32 [[TMP9]], i32 0
|
||||
; ALL-NEXT: ret i32 [[TMP10]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
|
||||
ret i32 %call
|
||||
|
@ -133,22 +81,17 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X32-NEXT: ret i32 [[CALL]]
|
||||
;
|
||||
; X64-LABEL: @cmp8(
|
||||
; X64-NEXT: loadbb:
|
||||
; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i64*
|
||||
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i64*
|
||||
; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
|
||||
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i64*
|
||||
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i64*
|
||||
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
|
||||
; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
|
||||
; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
|
||||
; X64-NEXT: br i1 [[TMP6]], label %endblock, label %res_block
|
||||
; X64: res_block:
|
||||
; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]]
|
||||
; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
|
||||
; X64-NEXT: br label %endblock
|
||||
; X64: endblock:
|
||||
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP8]], %res_block ]
|
||||
; X64-NEXT: ret i32 [[PHI_RES]]
|
||||
; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
|
||||
; X64-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP5]], [[TMP6]]
|
||||
; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]]
|
||||
; X64-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
|
||||
; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP7]], i32 [[TMP9]], i32 0
|
||||
; X64-NEXT: ret i32 [[TMP10]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
|
||||
ret i32 %call
|
||||
|
|
Loading…
Reference in New Issue