forked from OSchip/llvm-project
[PGO][PGSO] Add profile guided size optimization to X86ISelDAGToDAG.
Differential Revision: https://reviews.llvm.org/D83331
This commit is contained in:
parent
44899ed659
commit
fb558ccae7
|
@ -160,10 +160,6 @@ namespace {
|
|||
/// make the right decision when generating code for different targets.
|
||||
const X86Subtarget *Subtarget;
|
||||
|
||||
/// If true, selector should try to optimize for code size instead of
|
||||
/// performance.
|
||||
bool OptForSize;
|
||||
|
||||
/// If true, selector should try to optimize for minimum code size.
|
||||
bool OptForMinSize;
|
||||
|
||||
|
@ -172,7 +168,7 @@ namespace {
|
|||
|
||||
public:
|
||||
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
|
||||
: SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
|
||||
: SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
|
||||
OptForMinSize(false), IndirectTlsSegRefs(false) {}
|
||||
|
||||
StringRef getPassName() const override {
|
||||
|
@ -186,7 +182,7 @@ namespace {
|
|||
"indirect-tls-seg-refs");
|
||||
|
||||
// OptFor[Min]Size are used in pattern predicates that isel is matching.
|
||||
OptForSize = MF.getFunction().hasOptSize();
|
||||
bool OptForSize = MF.getFunction().hasOptSize();
|
||||
OptForMinSize = MF.getFunction().hasMinSize();
|
||||
assert((!OptForMinSize || OptForSize) &&
|
||||
"OptForMinSize implies OptForSize");
|
||||
|
@ -4557,7 +4553,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
// the patterns on the add/sub/and/or/xor with immediate paterns in the
|
||||
// tablegen files to check immediate use count without making the patterns
|
||||
// unavailable to the fast-isel table.
|
||||
if (!OptForSize)
|
||||
if (!CurDAG->shouldOptForSize())
|
||||
break;
|
||||
|
||||
// Only handle i8/i16/i32/i64.
|
||||
|
|
|
@ -1034,8 +1034,454 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
|
|||
ret i128 %cnt
|
||||
}
|
||||
|
||||
define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
|
||||
; X32-LABEL: cnt32_pgso:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl %ecx
|
||||
; X32-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X32-NEXT: subl %ecx, %eax
|
||||
; X32-NEXT: movl $858993459, %ecx # imm = 0x33333333
|
||||
; X32-NEXT: movl %eax, %edx
|
||||
; X32-NEXT: andl %ecx, %edx
|
||||
; X32-NEXT: shrl $2, %eax
|
||||
; X32-NEXT: andl %ecx, %eax
|
||||
; X32-NEXT: addl %edx, %eax
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl $4, %ecx
|
||||
; X32-NEXT: addl %eax, %ecx
|
||||
; X32-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X32-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
|
||||
; X32-NEXT: shrl $24, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: cnt32_pgso:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shrl %eax
|
||||
; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: movl $858993459, %eax # imm = 0x33333333
|
||||
; X64-NEXT: movl %edi, %ecx
|
||||
; X64-NEXT: andl %eax, %ecx
|
||||
; X64-NEXT: shrl $2, %edi
|
||||
; X64-NEXT: andl %eax, %edi
|
||||
; X64-NEXT: addl %ecx, %edi
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shrl $4, %eax
|
||||
; X64-NEXT: addl %edi, %eax
|
||||
; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
|
||||
; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
|
||||
; X64-NEXT: shrl $24, %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X32-POPCNT-LABEL: cnt32_pgso:
|
||||
; X32-POPCNT: # %bb.0:
|
||||
; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
|
||||
; X32-POPCNT-NEXT: retl
|
||||
;
|
||||
; X64-POPCNT-LABEL: cnt32_pgso:
|
||||
; X64-POPCNT: # %bb.0:
|
||||
; X64-POPCNT-NEXT: popcntl %edi, %eax
|
||||
; X64-POPCNT-NEXT: retq
|
||||
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
|
||||
ret i32 %cnt
|
||||
}
|
||||
|
||||
define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
|
||||
; X32-NOSSE-LABEL: cnt64_pgso:
|
||||
; X32-NOSSE: # %bb.0:
|
||||
; X32-NOSSE-NEXT: pushl %ebx
|
||||
; X32-NOSSE-NEXT: pushl %edi
|
||||
; X32-NOSSE-NEXT: pushl %esi
|
||||
; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NOSSE-NEXT: movl %ecx, %edx
|
||||
; X32-NOSSE-NEXT: shrl %edx
|
||||
; X32-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
|
||||
; X32-NOSSE-NEXT: andl %esi, %edx
|
||||
; X32-NOSSE-NEXT: subl %edx, %ecx
|
||||
; X32-NOSSE-NEXT: movl $858993459, %edx # imm = 0x33333333
|
||||
; X32-NOSSE-NEXT: movl %ecx, %edi
|
||||
; X32-NOSSE-NEXT: andl %edx, %edi
|
||||
; X32-NOSSE-NEXT: shrl $2, %ecx
|
||||
; X32-NOSSE-NEXT: andl %edx, %ecx
|
||||
; X32-NOSSE-NEXT: addl %edi, %ecx
|
||||
; X32-NOSSE-NEXT: movl %ecx, %edi
|
||||
; X32-NOSSE-NEXT: shrl $4, %edi
|
||||
; X32-NOSSE-NEXT: addl %ecx, %edi
|
||||
; X32-NOSSE-NEXT: movl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X32-NOSSE-NEXT: andl %ecx, %edi
|
||||
; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
|
||||
; X32-NOSSE-NEXT: shrl $24, %edi
|
||||
; X32-NOSSE-NEXT: movl %eax, %ebx
|
||||
; X32-NOSSE-NEXT: shrl %ebx
|
||||
; X32-NOSSE-NEXT: andl %esi, %ebx
|
||||
; X32-NOSSE-NEXT: subl %ebx, %eax
|
||||
; X32-NOSSE-NEXT: movl %eax, %esi
|
||||
; X32-NOSSE-NEXT: andl %edx, %esi
|
||||
; X32-NOSSE-NEXT: shrl $2, %eax
|
||||
; X32-NOSSE-NEXT: andl %edx, %eax
|
||||
; X32-NOSSE-NEXT: addl %esi, %eax
|
||||
; X32-NOSSE-NEXT: movl %eax, %edx
|
||||
; X32-NOSSE-NEXT: shrl $4, %edx
|
||||
; X32-NOSSE-NEXT: addl %eax, %edx
|
||||
; X32-NOSSE-NEXT: andl %ecx, %edx
|
||||
; X32-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
|
||||
; X32-NOSSE-NEXT: shrl $24, %eax
|
||||
; X32-NOSSE-NEXT: addl %edi, %eax
|
||||
; X32-NOSSE-NEXT: xorl %edx, %edx
|
||||
; X32-NOSSE-NEXT: popl %esi
|
||||
; X32-NOSSE-NEXT: popl %edi
|
||||
; X32-NOSSE-NEXT: popl %ebx
|
||||
; X32-NOSSE-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: cnt64_pgso:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: shrq %rax
|
||||
; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
|
||||
; X64-NEXT: andq %rax, %rcx
|
||||
; X64-NEXT: subq %rcx, %rdi
|
||||
; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
|
||||
; X64-NEXT: movq %rdi, %rcx
|
||||
; X64-NEXT: andq %rax, %rcx
|
||||
; X64-NEXT: shrq $2, %rdi
|
||||
; X64-NEXT: andq %rax, %rdi
|
||||
; X64-NEXT: addq %rcx, %rdi
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: shrq $4, %rax
|
||||
; X64-NEXT: addq %rdi, %rax
|
||||
; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
|
||||
; X64-NEXT: andq %rax, %rcx
|
||||
; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
|
||||
; X64-NEXT: imulq %rcx, %rax
|
||||
; X64-NEXT: shrq $56, %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X32-POPCNT-LABEL: cnt64_pgso:
|
||||
; X32-POPCNT: # %bb.0:
|
||||
; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
|
||||
; X32-POPCNT-NEXT: addl %ecx, %eax
|
||||
; X32-POPCNT-NEXT: xorl %edx, %edx
|
||||
; X32-POPCNT-NEXT: retl
|
||||
;
|
||||
; X64-POPCNT-LABEL: cnt64_pgso:
|
||||
; X64-POPCNT: # %bb.0:
|
||||
; X64-POPCNT-NEXT: popcntq %rdi, %rax
|
||||
; X64-POPCNT-NEXT: retq
|
||||
;
|
||||
; X32-SSE2-LABEL: cnt64_pgso:
|
||||
; X32-SSE2: # %bb.0:
|
||||
; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: psrlw $1, %xmm1
|
||||
; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
|
||||
; X32-SSE2-NEXT: psubb %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
|
||||
; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X32-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: psrlw $2, %xmm0
|
||||
; X32-SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: paddb %xmm2, %xmm0
|
||||
; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: psrlw $4, %xmm1
|
||||
; X32-SSE2-NEXT: paddb %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
|
||||
; X32-SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; X32-SSE2-NEXT: psadbw %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: movd %xmm0, %eax
|
||||
; X32-SSE2-NEXT: xorl %edx, %edx
|
||||
; X32-SSE2-NEXT: retl
|
||||
;
|
||||
; X32-SSSE3-LABEL: cnt64_pgso:
|
||||
; X32-SSSE3: # %bb.0:
|
||||
; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
|
||||
; X32-SSSE3-NEXT: pand %xmm0, %xmm2
|
||||
; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
|
||||
; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
|
||||
; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
|
||||
; X32-SSSE3-NEXT: psrlw $4, %xmm1
|
||||
; X32-SSSE3-NEXT: pand %xmm0, %xmm1
|
||||
; X32-SSSE3-NEXT: pshufb %xmm1, %xmm3
|
||||
; X32-SSSE3-NEXT: paddb %xmm4, %xmm3
|
||||
; X32-SSSE3-NEXT: pxor %xmm0, %xmm0
|
||||
; X32-SSSE3-NEXT: psadbw %xmm3, %xmm0
|
||||
; X32-SSSE3-NEXT: movd %xmm0, %eax
|
||||
; X32-SSSE3-NEXT: xorl %edx, %edx
|
||||
; X32-SSSE3-NEXT: retl
|
||||
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
|
||||
ret i64 %cnt
|
||||
}
|
||||
|
||||
define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
|
||||
; X32-NOSSE-LABEL: cnt128_pgso:
|
||||
; X32-NOSSE: # %bb.0:
|
||||
; X32-NOSSE-NEXT: pushl %ebp
|
||||
; X32-NOSSE-NEXT: pushl %ebx
|
||||
; X32-NOSSE-NEXT: pushl %edi
|
||||
; X32-NOSSE-NEXT: pushl %esi
|
||||
; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X32-NOSSE-NEXT: movl %ebx, %ecx
|
||||
; X32-NOSSE-NEXT: shrl %ecx
|
||||
; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
|
||||
; X32-NOSSE-NEXT: andl %edi, %ecx
|
||||
; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
|
||||
; X32-NOSSE-NEXT: subl %ecx, %ebx
|
||||
; X32-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
|
||||
; X32-NOSSE-NEXT: movl %ebx, %ebp
|
||||
; X32-NOSSE-NEXT: andl %ecx, %ebp
|
||||
; X32-NOSSE-NEXT: shrl $2, %ebx
|
||||
; X32-NOSSE-NEXT: andl %ecx, %ebx
|
||||
; X32-NOSSE-NEXT: addl %ebp, %ebx
|
||||
; X32-NOSSE-NEXT: movl %ebx, %ebp
|
||||
; X32-NOSSE-NEXT: shrl $4, %ebp
|
||||
; X32-NOSSE-NEXT: addl %ebx, %ebp
|
||||
; X32-NOSSE-NEXT: movl %eax, %ebx
|
||||
; X32-NOSSE-NEXT: shrl %ebx
|
||||
; X32-NOSSE-NEXT: andl %edi, %ebx
|
||||
; X32-NOSSE-NEXT: subl %ebx, %eax
|
||||
; X32-NOSSE-NEXT: movl %eax, %ebx
|
||||
; X32-NOSSE-NEXT: andl %ecx, %ebx
|
||||
; X32-NOSSE-NEXT: shrl $2, %eax
|
||||
; X32-NOSSE-NEXT: andl %ecx, %eax
|
||||
; X32-NOSSE-NEXT: addl %ebx, %eax
|
||||
; X32-NOSSE-NEXT: movl %eax, %edi
|
||||
; X32-NOSSE-NEXT: shrl $4, %edi
|
||||
; X32-NOSSE-NEXT: addl %eax, %edi
|
||||
; X32-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
|
||||
; X32-NOSSE-NEXT: andl %ebx, %ebp
|
||||
; X32-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
|
||||
; X32-NOSSE-NEXT: shrl $24, %eax
|
||||
; X32-NOSSE-NEXT: andl %ebx, %edi
|
||||
; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
|
||||
; X32-NOSSE-NEXT: shrl $24, %edi
|
||||
; X32-NOSSE-NEXT: addl %eax, %edi
|
||||
; X32-NOSSE-NEXT: movl %esi, %eax
|
||||
; X32-NOSSE-NEXT: shrl %eax
|
||||
; X32-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
|
||||
; X32-NOSSE-NEXT: andl %ebp, %eax
|
||||
; X32-NOSSE-NEXT: subl %eax, %esi
|
||||
; X32-NOSSE-NEXT: movl %esi, %eax
|
||||
; X32-NOSSE-NEXT: andl %ecx, %eax
|
||||
; X32-NOSSE-NEXT: shrl $2, %esi
|
||||
; X32-NOSSE-NEXT: andl %ecx, %esi
|
||||
; X32-NOSSE-NEXT: addl %eax, %esi
|
||||
; X32-NOSSE-NEXT: movl %esi, %eax
|
||||
; X32-NOSSE-NEXT: shrl $4, %eax
|
||||
; X32-NOSSE-NEXT: addl %esi, %eax
|
||||
; X32-NOSSE-NEXT: movl %edx, %esi
|
||||
; X32-NOSSE-NEXT: shrl %esi
|
||||
; X32-NOSSE-NEXT: andl %ebp, %esi
|
||||
; X32-NOSSE-NEXT: subl %esi, %edx
|
||||
; X32-NOSSE-NEXT: movl %edx, %esi
|
||||
; X32-NOSSE-NEXT: andl %ecx, %esi
|
||||
; X32-NOSSE-NEXT: shrl $2, %edx
|
||||
; X32-NOSSE-NEXT: andl %ecx, %edx
|
||||
; X32-NOSSE-NEXT: addl %esi, %edx
|
||||
; X32-NOSSE-NEXT: movl %edx, %ecx
|
||||
; X32-NOSSE-NEXT: shrl $4, %ecx
|
||||
; X32-NOSSE-NEXT: addl %edx, %ecx
|
||||
; X32-NOSSE-NEXT: andl %ebx, %eax
|
||||
; X32-NOSSE-NEXT: andl %ebx, %ecx
|
||||
; X32-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
|
||||
; X32-NOSSE-NEXT: shrl $24, %eax
|
||||
; X32-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101
|
||||
; X32-NOSSE-NEXT: shrl $24, %ecx
|
||||
; X32-NOSSE-NEXT: addl %eax, %ecx
|
||||
; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NOSSE-NEXT: addl %edi, %ecx
|
||||
; X32-NOSSE-NEXT: xorl %edx, %edx
|
||||
; X32-NOSSE-NEXT: movl %edx, 12(%eax)
|
||||
; X32-NOSSE-NEXT: movl %edx, 8(%eax)
|
||||
; X32-NOSSE-NEXT: movl %edx, 4(%eax)
|
||||
; X32-NOSSE-NEXT: movl %ecx, (%eax)
|
||||
; X32-NOSSE-NEXT: popl %esi
|
||||
; X32-NOSSE-NEXT: popl %edi
|
||||
; X32-NOSSE-NEXT: popl %ebx
|
||||
; X32-NOSSE-NEXT: popl %ebp
|
||||
; X32-NOSSE-NEXT: retl $4
|
||||
;
|
||||
; X64-LABEL: cnt128_pgso:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rsi, %rax
|
||||
; X64-NEXT: shrq %rax
|
||||
; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
|
||||
; X64-NEXT: andq %r8, %rax
|
||||
; X64-NEXT: subq %rax, %rsi
|
||||
; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
|
||||
; X64-NEXT: movq %rsi, %rcx
|
||||
; X64-NEXT: andq %rax, %rcx
|
||||
; X64-NEXT: shrq $2, %rsi
|
||||
; X64-NEXT: andq %rax, %rsi
|
||||
; X64-NEXT: addq %rcx, %rsi
|
||||
; X64-NEXT: movq %rsi, %rcx
|
||||
; X64-NEXT: shrq $4, %rcx
|
||||
; X64-NEXT: addq %rsi, %rcx
|
||||
; X64-NEXT: movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F
|
||||
; X64-NEXT: andq %r9, %rcx
|
||||
; X64-NEXT: movabsq $72340172838076673, %rdx # imm = 0x101010101010101
|
||||
; X64-NEXT: imulq %rdx, %rcx
|
||||
; X64-NEXT: shrq $56, %rcx
|
||||
; X64-NEXT: movq %rdi, %rsi
|
||||
; X64-NEXT: shrq %rsi
|
||||
; X64-NEXT: andq %r8, %rsi
|
||||
; X64-NEXT: subq %rsi, %rdi
|
||||
; X64-NEXT: movq %rdi, %rsi
|
||||
; X64-NEXT: andq %rax, %rsi
|
||||
; X64-NEXT: shrq $2, %rdi
|
||||
; X64-NEXT: andq %rax, %rdi
|
||||
; X64-NEXT: addq %rsi, %rdi
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: shrq $4, %rax
|
||||
; X64-NEXT: addq %rdi, %rax
|
||||
; X64-NEXT: andq %r9, %rax
|
||||
; X64-NEXT: imulq %rdx, %rax
|
||||
; X64-NEXT: shrq $56, %rax
|
||||
; X64-NEXT: addq %rcx, %rax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X32-POPCNT-LABEL: cnt128_pgso:
|
||||
; X32-POPCNT: # %bb.0:
|
||||
; X32-POPCNT-NEXT: pushl %esi
|
||||
; X32-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
|
||||
; X32-POPCNT-NEXT: addl %ecx, %edx
|
||||
; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
|
||||
; X32-POPCNT-NEXT: addl %ecx, %esi
|
||||
; X32-POPCNT-NEXT: addl %edx, %esi
|
||||
; X32-POPCNT-NEXT: xorl %ecx, %ecx
|
||||
; X32-POPCNT-NEXT: movl %ecx, 12(%eax)
|
||||
; X32-POPCNT-NEXT: movl %ecx, 8(%eax)
|
||||
; X32-POPCNT-NEXT: movl %ecx, 4(%eax)
|
||||
; X32-POPCNT-NEXT: movl %esi, (%eax)
|
||||
; X32-POPCNT-NEXT: popl %esi
|
||||
; X32-POPCNT-NEXT: retl $4
|
||||
;
|
||||
; X64-POPCNT-LABEL: cnt128_pgso:
|
||||
; X64-POPCNT: # %bb.0:
|
||||
; X64-POPCNT-NEXT: popcntq %rsi, %rcx
|
||||
; X64-POPCNT-NEXT: popcntq %rdi, %rax
|
||||
; X64-POPCNT-NEXT: addq %rcx, %rax
|
||||
; X64-POPCNT-NEXT: xorl %edx, %edx
|
||||
; X64-POPCNT-NEXT: retq
|
||||
;
|
||||
; X32-SSE2-LABEL: cnt128_pgso:
|
||||
; X32-SSE2: # %bb.0:
|
||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: psrlw $1, %xmm1
|
||||
; X32-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
|
||||
; X32-SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; X32-SSE2-NEXT: psubb %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
|
||||
; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X32-SSE2-NEXT: pand %xmm1, %xmm3
|
||||
; X32-SSE2-NEXT: psrlw $2, %xmm0
|
||||
; X32-SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: paddb %xmm3, %xmm0
|
||||
; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X32-SSE2-NEXT: psrlw $4, %xmm3
|
||||
; X32-SSE2-NEXT: paddb %xmm0, %xmm3
|
||||
; X32-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; X32-SSE2-NEXT: pand %xmm0, %xmm3
|
||||
; X32-SSE2-NEXT: pxor %xmm4, %xmm4
|
||||
; X32-SSE2-NEXT: psadbw %xmm4, %xmm3
|
||||
; X32-SSE2-NEXT: movd %xmm3, %ecx
|
||||
; X32-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
|
||||
; X32-SSE2-NEXT: movdqa %xmm3, %xmm5
|
||||
; X32-SSE2-NEXT: psrlw $1, %xmm5
|
||||
; X32-SSE2-NEXT: pand %xmm2, %xmm5
|
||||
; X32-SSE2-NEXT: psubb %xmm5, %xmm3
|
||||
; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
|
||||
; X32-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: psrlw $2, %xmm3
|
||||
; X32-SSE2-NEXT: pand %xmm1, %xmm3
|
||||
; X32-SSE2-NEXT: paddb %xmm2, %xmm3
|
||||
; X32-SSE2-NEXT: movdqa %xmm3, %xmm1
|
||||
; X32-SSE2-NEXT: psrlw $4, %xmm1
|
||||
; X32-SSE2-NEXT: paddb %xmm3, %xmm1
|
||||
; X32-SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: psadbw %xmm4, %xmm1
|
||||
; X32-SSE2-NEXT: movd %xmm1, %edx
|
||||
; X32-SSE2-NEXT: addl %ecx, %edx
|
||||
; X32-SSE2-NEXT: xorl %ecx, %ecx
|
||||
; X32-SSE2-NEXT: movl %ecx, 12(%eax)
|
||||
; X32-SSE2-NEXT: movl %ecx, 8(%eax)
|
||||
; X32-SSE2-NEXT: movl %ecx, 4(%eax)
|
||||
; X32-SSE2-NEXT: movl %edx, (%eax)
|
||||
; X32-SSE2-NEXT: retl $4
|
||||
;
|
||||
; X32-SSSE3-LABEL: cnt128_pgso:
|
||||
; X32-SSSE3: # %bb.0:
|
||||
; X32-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
|
||||
; X32-SSSE3-NEXT: pand %xmm0, %xmm2
|
||||
; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
|
||||
; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
|
||||
; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
|
||||
; X32-SSSE3-NEXT: psrlw $4, %xmm1
|
||||
; X32-SSSE3-NEXT: pand %xmm0, %xmm1
|
||||
; X32-SSSE3-NEXT: movdqa %xmm3, %xmm2
|
||||
; X32-SSSE3-NEXT: pshufb %xmm1, %xmm2
|
||||
; X32-SSSE3-NEXT: paddb %xmm4, %xmm2
|
||||
; X32-SSSE3-NEXT: pxor %xmm1, %xmm1
|
||||
; X32-SSSE3-NEXT: psadbw %xmm1, %xmm2
|
||||
; X32-SSSE3-NEXT: movd %xmm2, %ecx
|
||||
; X32-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
|
||||
; X32-SSSE3-NEXT: movdqa %xmm2, %xmm4
|
||||
; X32-SSSE3-NEXT: pand %xmm0, %xmm4
|
||||
; X32-SSSE3-NEXT: movdqa %xmm3, %xmm5
|
||||
; X32-SSSE3-NEXT: pshufb %xmm4, %xmm5
|
||||
; X32-SSSE3-NEXT: psrlw $4, %xmm2
|
||||
; X32-SSSE3-NEXT: pand %xmm0, %xmm2
|
||||
; X32-SSSE3-NEXT: pshufb %xmm2, %xmm3
|
||||
; X32-SSSE3-NEXT: paddb %xmm5, %xmm3
|
||||
; X32-SSSE3-NEXT: psadbw %xmm1, %xmm3
|
||||
; X32-SSSE3-NEXT: movd %xmm3, %edx
|
||||
; X32-SSSE3-NEXT: addl %ecx, %edx
|
||||
; X32-SSSE3-NEXT: xorl %ecx, %ecx
|
||||
; X32-SSSE3-NEXT: movl %ecx, 12(%eax)
|
||||
; X32-SSSE3-NEXT: movl %ecx, 8(%eax)
|
||||
; X32-SSSE3-NEXT: movl %ecx, 4(%eax)
|
||||
; X32-SSSE3-NEXT: movl %edx, (%eax)
|
||||
; X32-SSSE3-NEXT: retl $4
|
||||
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
|
||||
ret i128 %cnt
|
||||
}
|
||||
|
||||
declare i8 @llvm.ctpop.i8(i8) nounwind readnone
|
||||
declare i16 @llvm.ctpop.i16(i16) nounwind readnone
|
||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
||||
declare i128 @llvm.ctpop.i128(i128) nounwind readnone
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
|
|
@ -14,6 +14,19 @@ define i1 @foo(i32 %i) optsize {
|
|||
ret i1 %cmp
|
||||
}
|
||||
|
||||
define i1 @foo_pgso(i32 %i) !prof !14 {
|
||||
; CHECK-LABEL: foo_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl $305419896, %eax # imm = 0x12345678
|
||||
; CHECK-NEXT: andl %eax, %edi
|
||||
; CHECK-NEXT: cmpl %eax, %edi
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
%and = and i32 %i, 305419896
|
||||
%cmp = icmp eq i32 %and, 305419896
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
; 8-bit ALU immediates probably have small encodings.
|
||||
; We do not want to hoist the constant into a register here.
|
||||
|
||||
|
@ -52,3 +65,20 @@ define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
|
|||
%or4 = or i64 %or, %shl
|
||||
ret i64 %or4
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
|
Loading…
Reference in New Issue