forked from OSchip/llvm-project
[CGP] use subtract or subtract-of-cmps for result of memcmp expansion
As noted in the code comment, transforming this in the other direction might require a separate transform here in CGP given the block-at-a-time DAG constraint. Besides that theoretical motivation, there are 2 practical motivations for the subtract-of-cmps form: 1. The codegen for both x86 and PPC is better for this IR (though PPC could be better still). There is discussion about canonicalizing IR to the select form ( http://lists.llvm.org/pipermail/llvm-dev/2017-July/114885.html ), so we probably need to add DAG transforms for those patterns anyway, but this improves the memcmp output without waiting for that step. 2. If we allow vector-sized chunks for the load and compare, x86 is better prepared to convert that to optimal code when using subtract-of-cmps, so another prerequisite patch is avoided if we choose to enable that. Differential Revision: https://reviews.llvm.org/D34904 llvm-svn: 309597
This commit is contained in:
parent
70d35e102e
commit
fea731a4aa
|
@ -2117,13 +2117,25 @@ Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) {
|
|||
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
|
||||
}
|
||||
|
||||
// TODO: Instead of comparing ULT, just subtract and return the difference?
|
||||
Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
|
||||
if (Size < 4) {
|
||||
// The i8 and i16 cases don't need compares. We zext the loaded values and
|
||||
// subtract them to get the suitable negative, zero, or positive i32 result.
|
||||
LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty());
|
||||
LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty());
|
||||
return Builder.CreateSub(LoadSrc1, LoadSrc2);
|
||||
}
|
||||
|
||||
// The result of memcmp is negative, zero, or positive, so produce that by
|
||||
// subtracting 2 extended compare bits: sub (ugt, ult).
|
||||
// If a target prefers to use selects to get -1/0/1, they should be able
|
||||
// to transform this later. The inverse transform (going from selects to math)
|
||||
// may not be possible in the DAG because the selects got converted into
|
||||
// branches before we got there.
|
||||
Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2);
|
||||
Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
|
||||
Type *I32 = Builder.getInt32Ty();
|
||||
Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1),
|
||||
ConstantInt::get(I32, 1));
|
||||
return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0));
|
||||
Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
|
||||
Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
|
||||
return Builder.CreateSub(ZextUGT, ZextULT);
|
||||
}
|
||||
|
||||
// This function expands the memcmp call into an inline expansion and returns
|
||||
|
|
|
@ -6,11 +6,13 @@ define signext i32 @memcmp8(i32* nocapture readonly %buffer1, i32* nocapture rea
|
|||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: ldbrx 3, 0, 3
|
||||
; CHECK-NEXT: ldbrx 4, 0, 4
|
||||
; CHECK-NEXT: li 5, 1
|
||||
; CHECK-NEXT: li 12, -1
|
||||
; CHECK-NEXT: li 5, 0
|
||||
; CHECK-NEXT: cmpld 3, 4
|
||||
; CHECK-NEXT: isel 3, 12, 5, 0
|
||||
; CHECK-NEXT: isel 3, 0, 3, 2
|
||||
; CHECK-NEXT: li 3, 1
|
||||
; CHECK-NEXT: isel 4, 3, 5, 1
|
||||
; CHECK-NEXT: isel 3, 3, 5, 0
|
||||
; CHECK-NEXT: subf 3, 3, 4
|
||||
; CHECK-NEXT: extsw 3, 3
|
||||
; CHECK-NEXT: blr
|
||||
%t0 = bitcast i32* %buffer1 to i8*
|
||||
%t1 = bitcast i32* %buffer2 to i8*
|
||||
|
@ -23,11 +25,12 @@ define signext i32 @memcmp4(i32* nocapture readonly %buffer1, i32* nocapture rea
|
|||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: lwbrx 3, 0, 3
|
||||
; CHECK-NEXT: lwbrx 4, 0, 4
|
||||
; CHECK-NEXT: li 5, 1
|
||||
; CHECK-NEXT: li 12, -1
|
||||
; CHECK-NEXT: cmplw 3, 4
|
||||
; CHECK-NEXT: isel 3, 12, 5, 0
|
||||
; CHECK-NEXT: isel 3, 0, 3, 2
|
||||
; CHECK-NEXT: sub 5, 4, 3
|
||||
; CHECK-NEXT: sub 3, 3, 4
|
||||
; CHECK-NEXT: rldicl 4, 5, 1, 63
|
||||
; CHECK-NEXT: rldicl 3, 3, 1, 63
|
||||
; CHECK-NEXT: subf 3, 3, 4
|
||||
; CHECK-NEXT: extsw 3, 3
|
||||
; CHECK-NEXT: blr
|
||||
%t0 = bitcast i32* %buffer1 to i8*
|
||||
%t1 = bitcast i32* %buffer2 to i8*
|
||||
|
@ -40,11 +43,8 @@ define signext i32 @memcmp2(i32* nocapture readonly %buffer1, i32* nocapture rea
|
|||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: lhbrx 3, 0, 3
|
||||
; CHECK-NEXT: lhbrx 4, 0, 4
|
||||
; CHECK-NEXT: li 5, 1
|
||||
; CHECK-NEXT: li 12, -1
|
||||
; CHECK-NEXT: cmplw 3, 4
|
||||
; CHECK-NEXT: isel 3, 12, 5, 0
|
||||
; CHECK-NEXT: isel 3, 0, 3, 2
|
||||
; CHECK-NEXT: subf 3, 4, 3
|
||||
; CHECK-NEXT: extsw 3, 3
|
||||
; CHECK-NEXT: blr
|
||||
%t0 = bitcast i32* %buffer1 to i8*
|
||||
%t1 = bitcast i32* %buffer2 to i8*
|
||||
|
@ -57,11 +57,8 @@ define signext i32 @memcmp1(i32* nocapture readonly %buffer1, i32* nocapture rea
|
|||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: lbz 3, 0(3)
|
||||
; CHECK-NEXT: lbz 4, 0(4)
|
||||
; CHECK-NEXT: li 5, 1
|
||||
; CHECK-NEXT: li 12, -1
|
||||
; CHECK-NEXT: cmplw 3, 4
|
||||
; CHECK-NEXT: isel 3, 12, 5, 0
|
||||
; CHECK-NEXT: isel 3, 0, 3, 2
|
||||
; CHECK-NEXT: subf 3, 4, 3
|
||||
; CHECK-NEXT: extsw 3, 3
|
||||
; CHECK-NEXT: blr
|
||||
%t0 = bitcast i32* %buffer1 to i8*
|
||||
%t1 = bitcast i32* %buffer2 to i8*
|
||||
|
|
|
@ -59,20 +59,22 @@ define signext i32 @test2(i32* nocapture readonly %buffer1, i32* nocapture reado
|
|||
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
|
||||
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
|
||||
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
|
||||
; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ne i32 [[BSWAP1]], [[BSWAP2]]
|
||||
; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[BSWAP1]], [[BSWAP2]]
|
||||
; CHECK-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[BSWAP1]], [[BSWAP2]]
|
||||
; CHECK-NEXT: [[SELECT1:%[0-9]+]] = select i1 [[CMP2]], i32 -1, i32 1
|
||||
; CHECK-NEXT: [[SELECT2:%[0-9]+]] = select i1 [[CMP1]], i32 [[SELECT1]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[SELECT2]]
|
||||
; CHECK-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
|
||||
; CHECK-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
|
||||
; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
|
||||
; CHECK-NEXT: ret i32 [[SUB]]
|
||||
|
||||
; CHECK-BE-LABEL: @test2(
|
||||
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32*
|
||||
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
|
||||
; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ne i32 [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]]
|
||||
; CHECK-BE-NEXT: [[SELECT1:%[0-9]+]] = select i1 [[CMP2]], i32 -1, i32 1
|
||||
; CHECK-BE-NEXT: [[SELECT2:%[0-9]+]] = select i1 [[CMP1]], i32 [[SELECT1]], i32 0
|
||||
; CHECK-BE-NEXT: ret i32 [[SELECT2]]
|
||||
; CHECK-BE-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
|
||||
; CHECK-BE-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
|
||||
; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
|
||||
; CHECK-BE-NEXT: ret i32 [[SUB]]
|
||||
|
||||
entry:
|
||||
%0 = bitcast i32* %buffer1 to i8*
|
||||
|
|
|
@ -14,24 +14,15 @@ declare i32 @memcmp(i8*, i8*, i64)
|
|||
define i32 @length2(i8* %X, i8* %Y) nounwind optsize {
|
||||
; X86-LABEL: length2:
|
||||
; X86: # BB#0:
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movzwl (%ecx), %ecx
|
||||
; X86-NEXT: movzwl (%eax), %edx
|
||||
; X86-NEXT: rolw $8, %cx
|
||||
; X86-NEXT: rolw $8, %dx
|
||||
; X86-NEXT: xorl %esi, %esi
|
||||
; X86-NEXT: xorl %edi, %edi
|
||||
; X86-NEXT: incl %edi
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: decl %eax
|
||||
; X86-NEXT: cmpw %dx, %cx
|
||||
; X86-NEXT: cmovael %edi, %eax
|
||||
; X86-NEXT: cmovel %esi, %eax
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: movzwl %cx, %eax
|
||||
; X86-NEXT: movzwl %dx, %ecx
|
||||
; X86-NEXT: subl %ecx, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length2:
|
||||
|
@ -40,12 +31,9 @@ define i32 @length2(i8* %X, i8* %Y) nounwind optsize {
|
|||
; X64-NEXT: movzwl (%rsi), %ecx
|
||||
; X64-NEXT: rolw $8, %ax
|
||||
; X64-NEXT: rolw $8, %cx
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: cmpw %cx, %ax
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: cmovel %edx, %eax
|
||||
; X64-NEXT: movzwl %ax, %eax
|
||||
; X64-NEXT: movzwl %cx, %ecx
|
||||
; X64-NEXT: subl %ecx, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
|
||||
ret i32 %m
|
||||
|
@ -218,38 +206,28 @@ define i1 @length3_eq(i8* %X, i8* %Y) nounwind optsize {
|
|||
define i32 @length4(i8* %X, i8* %Y) nounwind optsize {
|
||||
; X86-LABEL: length4:
|
||||
; X86: # BB#0:
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl (%ecx), %ecx
|
||||
; X86-NEXT: movl (%eax), %edx
|
||||
; X86-NEXT: bswapl %ecx
|
||||
; X86-NEXT: bswapl %edx
|
||||
; X86-NEXT: xorl %esi, %esi
|
||||
; X86-NEXT: xorl %edi, %edi
|
||||
; X86-NEXT: incl %edi
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: decl %eax
|
||||
; X86-NEXT: cmpl %edx, %ecx
|
||||
; X86-NEXT: cmovael %edi, %eax
|
||||
; X86-NEXT: cmovel %esi, %eax
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: seta %al
|
||||
; X86-NEXT: sbbl $0, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length4:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: movl (%rsi), %ecx
|
||||
; X64-NEXT: bswapl %eax
|
||||
; X64-NEXT: movl (%rdi), %ecx
|
||||
; X64-NEXT: movl (%rsi), %edx
|
||||
; X64-NEXT: bswapl %ecx
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: cmpl %ecx, %eax
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: cmovel %edx, %eax
|
||||
; X64-NEXT: bswapl %edx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: cmpl %edx, %ecx
|
||||
; X64-NEXT: seta %al
|
||||
; X64-NEXT: sbbl $0, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
|
||||
ret i32 %m
|
||||
|
@ -419,16 +397,14 @@ define i32 @length8(i8* %X, i8* %Y) nounwind optsize {
|
|||
;
|
||||
; X64-LABEL: length8:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: movq (%rsi), %rcx
|
||||
; X64-NEXT: bswapq %rax
|
||||
; X64-NEXT: movq (%rdi), %rcx
|
||||
; X64-NEXT: movq (%rsi), %rdx
|
||||
; X64-NEXT: bswapq %rcx
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: cmpq %rcx, %rax
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: cmovel %edx, %eax
|
||||
; X64-NEXT: bswapq %rdx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: cmpq %rdx, %rcx
|
||||
; X64-NEXT: seta %al
|
||||
; X64-NEXT: sbbl $0, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
|
||||
ret i32 %m
|
||||
|
|
|
@ -17,15 +17,12 @@ define i32 @length2(i8* %X, i8* %Y) nounwind {
|
|||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movzwl (%ecx), %ecx
|
||||
; X86-NEXT: movzwl (%eax), %eax
|
||||
; X86-NEXT: movzwl (%eax), %edx
|
||||
; X86-NEXT: rolw $8, %cx
|
||||
; X86-NEXT: rolw $8, %ax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: cmpw %ax, %cx
|
||||
; X86-NEXT: movl $-1, %ecx
|
||||
; X86-NEXT: movl $1, %eax
|
||||
; X86-NEXT: cmovbl %ecx, %eax
|
||||
; X86-NEXT: cmovel %edx, %eax
|
||||
; X86-NEXT: rolw $8, %dx
|
||||
; X86-NEXT: movzwl %cx, %eax
|
||||
; X86-NEXT: movzwl %dx, %ecx
|
||||
; X86-NEXT: subl %ecx, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length2:
|
||||
|
@ -34,12 +31,9 @@ define i32 @length2(i8* %X, i8* %Y) nounwind {
|
|||
; X64-NEXT: movzwl (%rsi), %ecx
|
||||
; X64-NEXT: rolw $8, %ax
|
||||
; X64-NEXT: rolw $8, %cx
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: cmpw %cx, %ax
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: cmovel %edx, %eax
|
||||
; X64-NEXT: movzwl %ax, %eax
|
||||
; X64-NEXT: movzwl %cx, %ecx
|
||||
; X64-NEXT: subl %ecx, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
|
||||
ret i32 %m
|
||||
|
@ -211,29 +205,25 @@ define i32 @length4(i8* %X, i8* %Y) nounwind {
|
|||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl (%ecx), %ecx
|
||||
; X86-NEXT: movl (%eax), %eax
|
||||
; X86-NEXT: movl (%eax), %edx
|
||||
; X86-NEXT: bswapl %ecx
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: cmpl %eax, %ecx
|
||||
; X86-NEXT: movl $-1, %ecx
|
||||
; X86-NEXT: movl $1, %eax
|
||||
; X86-NEXT: cmovbl %ecx, %eax
|
||||
; X86-NEXT: cmovel %edx, %eax
|
||||
; X86-NEXT: bswapl %edx
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: cmpl %edx, %ecx
|
||||
; X86-NEXT: seta %al
|
||||
; X86-NEXT: sbbl $0, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length4:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: movl (%rsi), %ecx
|
||||
; X64-NEXT: bswapl %eax
|
||||
; X64-NEXT: movl (%rdi), %ecx
|
||||
; X64-NEXT: movl (%rsi), %edx
|
||||
; X64-NEXT: bswapl %ecx
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: cmpl %ecx, %eax
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: cmovel %edx, %eax
|
||||
; X64-NEXT: bswapl %edx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: cmpl %edx, %ecx
|
||||
; X64-NEXT: seta %al
|
||||
; X64-NEXT: sbbl $0, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
|
||||
ret i32 %m
|
||||
|
@ -399,16 +389,14 @@ define i32 @length8(i8* %X, i8* %Y) nounwind {
|
|||
;
|
||||
; X64-LABEL: length8:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: movq (%rsi), %rcx
|
||||
; X64-NEXT: bswapq %rax
|
||||
; X64-NEXT: movq (%rdi), %rcx
|
||||
; X64-NEXT: movq (%rsi), %rdx
|
||||
; X64-NEXT: bswapq %rcx
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: cmpq %rcx, %rax
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: cmovel %edx, %eax
|
||||
; X64-NEXT: bswapq %rdx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: cmpq %rdx, %rcx
|
||||
; X64-NEXT: seta %al
|
||||
; X64-NEXT: sbbl $0, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
|
||||
ret i32 %m
|
||||
|
|
|
@ -12,11 +12,10 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
|
||||
; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
|
||||
; ALL-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
|
||||
; ALL-NEXT: [[TMP7:%.*]] = icmp ne i16 [[TMP5]], [[TMP6]]
|
||||
; ALL-NEXT: [[TMP8:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
|
||||
; ALL-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
|
||||
; ALL-NEXT: [[TMP10:%.*]] = select i1 [[TMP7]], i32 [[TMP9]], i32 0
|
||||
; ALL-NEXT: ret i32 [[TMP10]]
|
||||
; ALL-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
|
||||
; ALL-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
|
||||
; ALL-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
|
||||
; ALL-NEXT: ret i32 [[TMP9]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
|
||||
ret i32 %call
|
||||
|
@ -93,11 +92,12 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
|
||||
; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
|
||||
; ALL-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
|
||||
; ALL-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP5]], [[TMP6]]
|
||||
; ALL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]]
|
||||
; ALL-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]]
|
||||
; ALL-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
|
||||
; ALL-NEXT: [[TMP10:%.*]] = select i1 [[TMP7]], i32 [[TMP9]], i32 0
|
||||
; ALL-NEXT: ret i32 [[TMP10]]
|
||||
; ALL-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
|
||||
; ALL-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
|
||||
; ALL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
|
||||
; ALL-NEXT: ret i32 [[TMP11]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
|
||||
ret i32 %call
|
||||
|
@ -285,11 +285,12 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
|
||||
; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
|
||||
; X64-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP5]], [[TMP6]]
|
||||
; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]]
|
||||
; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]]
|
||||
; X64-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
|
||||
; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP7]], i32 [[TMP9]], i32 0
|
||||
; X64-NEXT: ret i32 [[TMP10]]
|
||||
; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
|
||||
; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
|
||||
; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
|
||||
; X64-NEXT: ret i32 [[TMP11]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
|
||||
ret i32 %call
|
||||
|
|
Loading…
Reference in New Issue