forked from OSchip/llvm-project
774 lines
25 KiB
LLVM
774 lines
25 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=GENERIC
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
|
|
|
|
define i32 @test_x86_tbm_bextri_u32(i32 %a0, i32* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_bextri_u32:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
|
|
; GENERIC-NEXT: # sched: [2:1.00]
|
|
; GENERIC-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
|
|
; GENERIC-NEXT: # sched: [7:1.00]
|
|
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_bextri_u32:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
|
|
; BDVER2-NEXT: # sched: [6:0.50]
|
|
; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
|
|
; BDVER2-NEXT: # sched: [2:0.50]
|
|
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_bextri_u32:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
|
|
; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
|
|
; BDVER3-NEXT: addl %ecx, %eax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_bextri_u32:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
|
|
; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
|
|
; BDVER4-NEXT: addl %ecx, %eax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i32, i32* %p1
|
|
%r0 = lshr i32 %a0, 4
|
|
%m0 = lshr i32 %a1, 4
|
|
%r1 = and i32 %r0, 4095
|
|
%m1 = and i32 %m0, 4095
|
|
%res = add i32 %r1, %m1
|
|
ret i32 %res
|
|
}
|
|
|
|
define i64 @test_x86_tbm_bextri_u64(i64 %a0, i64* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_bextri_u64:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
|
|
; GENERIC-NEXT: # sched: [2:1.00]
|
|
; GENERIC-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
|
|
; GENERIC-NEXT: # sched: [7:1.00]
|
|
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_bextri_u64:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
|
|
; BDVER2-NEXT: # sched: [6:0.50]
|
|
; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
|
|
; BDVER2-NEXT: # sched: [2:0.50]
|
|
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_bextri_u64:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
|
|
; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
|
|
; BDVER3-NEXT: addq %rcx, %rax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_bextri_u64:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
|
|
; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
|
|
; BDVER4-NEXT: addq %rcx, %rax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i64, i64* %p1
|
|
%r0 = lshr i64 %a0, 4
|
|
%m0 = lshr i64 %a1, 4
|
|
%r1 = and i64 %r0, 4095
|
|
%m1 = and i64 %m0, 4095
|
|
%res = add i64 %r1, %m1
|
|
ret i64 %res
|
|
}
|
|
|
|
define i32 @test_x86_tbm_blcfill_u32(i32 %a0, i32* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blcfill_u32:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blcfilll %edi, %ecx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blcfill_u32:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blcfilll %edi, %ecx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blcfill_u32:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blcfilll %edi, %ecx
|
|
; BDVER3-NEXT: blcfilll (%rsi), %eax
|
|
; BDVER3-NEXT: addl %ecx, %eax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blcfill_u32:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blcfilll %edi, %ecx
|
|
; BDVER4-NEXT: blcfilll (%rsi), %eax
|
|
; BDVER4-NEXT: addl %ecx, %eax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i32, i32* %p1
|
|
%r0 = add i32 %a0, 1
|
|
%m0 = add i32 %a1, 1
|
|
%r1 = and i32 %r0, %a0
|
|
%m1 = and i32 %m0, %a1
|
|
%res = add i32 %r1, %m1
|
|
ret i32 %res
|
|
}
|
|
|
|
define i64 @test_x86_tbm_blcfill_u64(i64 %a0, i64* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blcfill_u64:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blcfillq %rdi, %rcx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blcfill_u64:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blcfillq %rdi, %rcx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blcfill_u64:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blcfillq %rdi, %rcx
|
|
; BDVER3-NEXT: blcfillq (%rsi), %rax
|
|
; BDVER3-NEXT: addq %rcx, %rax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blcfill_u64:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blcfillq %rdi, %rcx
|
|
; BDVER4-NEXT: blcfillq (%rsi), %rax
|
|
; BDVER4-NEXT: addq %rcx, %rax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i64, i64* %p1
|
|
%r0 = add i64 %a0, 1
|
|
%m0 = add i64 %a1, 1
|
|
%r1 = and i64 %r0, %a0
|
|
%m1 = and i64 %m0, %a1
|
|
%res = add i64 %r1, %m1
|
|
ret i64 %res
|
|
}
|
|
|
|
define i32 @test_x86_tbm_blci_u32(i32 %a0, i32* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blci_u32:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blcil %edi, %ecx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blcil (%rsi), %eax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blci_u32:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blcil (%rsi), %eax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blcil %edi, %ecx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blci_u32:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blcil %edi, %ecx
|
|
; BDVER3-NEXT: blcil (%rsi), %eax
|
|
; BDVER3-NEXT: addl %ecx, %eax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blci_u32:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blcil %edi, %ecx
|
|
; BDVER4-NEXT: blcil (%rsi), %eax
|
|
; BDVER4-NEXT: addl %ecx, %eax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i32, i32* %p1
|
|
%r0 = add i32 1, %a0
|
|
%m0 = add i32 1, %a1
|
|
%r1 = xor i32 %r0, -1
|
|
%m1 = xor i32 %m0, -1
|
|
%r2 = or i32 %r1, %a0
|
|
%m2 = or i32 %m1, %a1
|
|
%res = add i32 %r2, %m2
|
|
ret i32 %res
|
|
}
|
|
|
|
define i64 @test_x86_tbm_blci_u64(i64 %a0, i64* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blci_u64:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blciq %rdi, %rcx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blciq (%rsi), %rax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blci_u64:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blciq (%rsi), %rax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blciq %rdi, %rcx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blci_u64:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blciq %rdi, %rcx
|
|
; BDVER3-NEXT: blciq (%rsi), %rax
|
|
; BDVER3-NEXT: addq %rcx, %rax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blci_u64:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blciq %rdi, %rcx
|
|
; BDVER4-NEXT: blciq (%rsi), %rax
|
|
; BDVER4-NEXT: addq %rcx, %rax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i64, i64* %p1
|
|
%r0 = add i64 1, %a0
|
|
%m0 = add i64 1, %a1
|
|
%r1 = xor i64 %r0, -1
|
|
%m1 = xor i64 %m0, -1
|
|
%r2 = or i64 %r1, %a0
|
|
%m2 = or i64 %m1, %a1
|
|
%res = add i64 %r2, %m2
|
|
ret i64 %res
|
|
}
|
|
|
|
define i32 @test_x86_tbm_blcic_u32(i32 %a0, i32* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blcic_u32:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blcicl %edi, %ecx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blcicl (%rsi), %eax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blcic_u32:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blcicl (%rsi), %eax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blcicl %edi, %ecx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blcic_u32:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blcicl %edi, %ecx
|
|
; BDVER3-NEXT: blcicl (%rsi), %eax
|
|
; BDVER3-NEXT: addl %ecx, %eax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blcic_u32:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blcicl %edi, %ecx
|
|
; BDVER4-NEXT: blcicl (%rsi), %eax
|
|
; BDVER4-NEXT: addl %ecx, %eax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i32, i32* %p1
|
|
%r0 = xor i32 %a0, -1
|
|
%m0 = xor i32 %a1, -1
|
|
%r1 = add i32 %a0, 1
|
|
%m1 = add i32 %a1, 1
|
|
%r2 = and i32 %r1, %r0
|
|
%m2 = and i32 %m1, %m0
|
|
%res = add i32 %r2, %m2
|
|
ret i32 %res
|
|
}
|
|
|
|
define i64 @test_x86_tbm_blcic_u64(i64 %a0, i64* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blcic_u64:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blcicq %rdi, %rcx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blcicq (%rsi), %rax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blcic_u64:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blcicq (%rsi), %rax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blcicq %rdi, %rcx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blcic_u64:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blcicq %rdi, %rcx
|
|
; BDVER3-NEXT: blcicq (%rsi), %rax
|
|
; BDVER3-NEXT: addq %rcx, %rax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blcic_u64:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blcicq %rdi, %rcx
|
|
; BDVER4-NEXT: blcicq (%rsi), %rax
|
|
; BDVER4-NEXT: addq %rcx, %rax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i64, i64* %p1
|
|
%r0 = xor i64 %a0, -1
|
|
%m0 = xor i64 %a1, -1
|
|
%r1 = add i64 %a0, 1
|
|
%m1 = add i64 %a1, 1
|
|
%r2 = and i64 %r1, %r0
|
|
%m2 = and i64 %m1, %m0
|
|
%res = add i64 %r2, %m2
|
|
ret i64 %res
|
|
}
|
|
|
|
define i32 @test_x86_tbm_blcmsk_u32(i32 %a0, i32* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blcmsk_u32:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blcmskl %edi, %ecx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blcmsk_u32:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blcmskl %edi, %ecx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blcmsk_u32:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blcmskl %edi, %ecx
|
|
; BDVER3-NEXT: blcmskl (%rsi), %eax
|
|
; BDVER3-NEXT: addl %ecx, %eax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blcmsk_u32:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blcmskl %edi, %ecx
|
|
; BDVER4-NEXT: blcmskl (%rsi), %eax
|
|
; BDVER4-NEXT: addl %ecx, %eax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i32, i32* %p1
|
|
%r0 = add i32 %a0, 1
|
|
%m0 = add i32 %a1, 1
|
|
%r1 = xor i32 %r0, %a0
|
|
%m1 = xor i32 %m0, %a1
|
|
%res = add i32 %r1, %m1
|
|
ret i32 %res
|
|
}
|
|
|
|
define i64 @test_x86_tbm_blcmsk_u64(i64 %a0, i64* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blcmsk_u64:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blcmskq %rdi, %rcx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blcmsk_u64:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blcmskq %rdi, %rcx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blcmsk_u64:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blcmskq %rdi, %rcx
|
|
; BDVER3-NEXT: blcmskq (%rsi), %rax
|
|
; BDVER3-NEXT: addq %rcx, %rax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blcmsk_u64:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blcmskq %rdi, %rcx
|
|
; BDVER4-NEXT: blcmskq (%rsi), %rax
|
|
; BDVER4-NEXT: addq %rcx, %rax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i64, i64* %p1
|
|
%r0 = add i64 %a0, 1
|
|
%m0 = add i64 %a1, 1
|
|
%r1 = xor i64 %r0, %a0
|
|
%m1 = xor i64 %m0, %a1
|
|
%res = add i64 %r1, %m1
|
|
ret i64 %res
|
|
}
|
|
|
|
define i32 @test_x86_tbm_blcs_u32(i32 %a0, i32* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blcs_u32:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blcsl %edi, %ecx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blcsl (%rsi), %eax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blcs_u32:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blcsl (%rsi), %eax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blcsl %edi, %ecx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blcs_u32:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blcsl %edi, %ecx
|
|
; BDVER3-NEXT: blcsl (%rsi), %eax
|
|
; BDVER3-NEXT: addl %ecx, %eax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blcs_u32:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blcsl %edi, %ecx
|
|
; BDVER4-NEXT: blcsl (%rsi), %eax
|
|
; BDVER4-NEXT: addl %ecx, %eax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i32, i32* %p1
|
|
%r0 = add i32 %a0, 1
|
|
%m0 = add i32 %a1, 1
|
|
%r1 = or i32 %r0, %a0
|
|
%m1 = or i32 %m0, %a1
|
|
%res = add i32 %r1, %m1
|
|
ret i32 %res
|
|
}
|
|
|
|
define i64 @test_x86_tbm_blcs_u64(i64 %a0, i64* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blcs_u64:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blcsq %rdi, %rcx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blcsq (%rsi), %rax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blcs_u64:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blcsq (%rsi), %rax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blcsq %rdi, %rcx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blcs_u64:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blcsq %rdi, %rcx
|
|
; BDVER3-NEXT: blcsq (%rsi), %rax
|
|
; BDVER3-NEXT: addq %rcx, %rax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blcs_u64:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blcsq %rdi, %rcx
|
|
; BDVER4-NEXT: blcsq (%rsi), %rax
|
|
; BDVER4-NEXT: addq %rcx, %rax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i64, i64* %p1
|
|
%r0 = add i64 %a0, 1
|
|
%m0 = add i64 %a1, 1
|
|
%r1 = or i64 %r0, %a0
|
|
%m1 = or i64 %m0, %a1
|
|
%res = add i64 %r1, %m1
|
|
ret i64 %res
|
|
}
|
|
|
|
define i32 @test_x86_tbm_blsfill_u32(i32 %a0, i32* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blsfill_u32:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blsfilll %edi, %ecx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blsfill_u32:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blsfilll %edi, %ecx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blsfill_u32:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blsfilll %edi, %ecx
|
|
; BDVER3-NEXT: blsfilll (%rsi), %eax
|
|
; BDVER3-NEXT: addl %ecx, %eax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blsfill_u32:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blsfilll %edi, %ecx
|
|
; BDVER4-NEXT: blsfilll (%rsi), %eax
|
|
; BDVER4-NEXT: addl %ecx, %eax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i32, i32* %p1
|
|
%r0 = add i32 %a0, -1
|
|
%m0 = add i32 %a1, -1
|
|
%r1 = or i32 %r0, %a0
|
|
%m1 = or i32 %m0, %a1
|
|
%res = add i32 %r1, %m1
|
|
ret i32 %res
|
|
}
|
|
|
|
define i64 @test_x86_tbm_blsfill_u64(i64 %a0, i64* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blsfill_u64:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blsfillq %rdi, %rcx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blsfill_u64:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blsfillq %rdi, %rcx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blsfill_u64:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blsfillq %rdi, %rcx
|
|
; BDVER3-NEXT: blsfillq (%rsi), %rax
|
|
; BDVER3-NEXT: addq %rcx, %rax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blsfill_u64:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blsfillq %rdi, %rcx
|
|
; BDVER4-NEXT: blsfillq (%rsi), %rax
|
|
; BDVER4-NEXT: addq %rcx, %rax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i64, i64* %p1
|
|
%r0 = add i64 %a0, -1
|
|
%m0 = add i64 %a1, -1
|
|
%r1 = or i64 %r0, %a0
|
|
%m1 = or i64 %m0, %a1
|
|
%res = add i64 %r1, %m1
|
|
ret i64 %res
|
|
}
|
|
|
|
define i32 @test_x86_tbm_blsic_u32(i32 %a0, i32* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blsic_u32:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blsicl %edi, %ecx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blsicl (%rsi), %eax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blsic_u32:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blsicl (%rsi), %eax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blsicl %edi, %ecx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blsic_u32:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blsicl %edi, %ecx
|
|
; BDVER3-NEXT: blsicl (%rsi), %eax
|
|
; BDVER3-NEXT: addl %ecx, %eax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blsic_u32:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blsicl %edi, %ecx
|
|
; BDVER4-NEXT: blsicl (%rsi), %eax
|
|
; BDVER4-NEXT: addl %ecx, %eax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i32, i32* %p1
|
|
%r0 = xor i32 %a0, -1
|
|
%m0 = xor i32 %a1, -1
|
|
%r1 = add i32 %a0, -1
|
|
%m1 = add i32 %a1, -1
|
|
%r2 = or i32 %r0, %r1
|
|
%m2 = or i32 %m0, %m1
|
|
%res = add i32 %r2, %m2
|
|
ret i32 %res
|
|
}
|
|
|
|
define i64 @test_x86_tbm_blsic_u64(i64 %a0, i64* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_blsic_u64:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: blsicq %rdi, %rcx # sched: [1:0.33]
|
|
; GENERIC-NEXT: blsicq (%rsi), %rax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_blsic_u64:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: blsicq (%rsi), %rax # sched: [6:0.50]
|
|
; BDVER2-NEXT: blsicq %rdi, %rcx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_blsic_u64:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: blsicq %rdi, %rcx
|
|
; BDVER3-NEXT: blsicq (%rsi), %rax
|
|
; BDVER3-NEXT: addq %rcx, %rax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_blsic_u64:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: blsicq %rdi, %rcx
|
|
; BDVER4-NEXT: blsicq (%rsi), %rax
|
|
; BDVER4-NEXT: addq %rcx, %rax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i64, i64* %p1
|
|
%r0 = xor i64 %a0, -1
|
|
%m0 = xor i64 %a1, -1
|
|
%r1 = add i64 %a0, -1
|
|
%m1 = add i64 %a1, -1
|
|
%r2 = or i64 %r0, %r1
|
|
%m2 = or i64 %m0, %m1
|
|
%res = add i64 %r2, %m2
|
|
ret i64 %res
|
|
}
|
|
|
|
define i32 @test_x86_tbm_t1mskc_u32(i32 %a0, i32* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_t1mskc_u32:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: t1mskcl %edi, %ecx # sched: [1:0.33]
|
|
; GENERIC-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_t1mskc_u32:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50]
|
|
; BDVER2-NEXT: t1mskcl %edi, %ecx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_t1mskc_u32:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: t1mskcl %edi, %ecx
|
|
; BDVER3-NEXT: t1mskcl (%rsi), %eax
|
|
; BDVER3-NEXT: addl %ecx, %eax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_t1mskc_u32:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: t1mskcl %edi, %ecx
|
|
; BDVER4-NEXT: t1mskcl (%rsi), %eax
|
|
; BDVER4-NEXT: addl %ecx, %eax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i32, i32* %p1
|
|
%r0 = xor i32 %a0, -1
|
|
%m0 = xor i32 %a1, -1
|
|
%r1 = add i32 %a0, 1
|
|
%m1 = add i32 %a1, 1
|
|
%r2 = or i32 %r0, %r1
|
|
%m2 = or i32 %m0, %m1
|
|
%res = add i32 %r2, %m2
|
|
ret i32 %res
|
|
}
|
|
|
|
define i64 @test_x86_tbm_t1mskc_u64(i64 %a0, i64* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_t1mskc_u64:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: t1mskcq %rdi, %rcx # sched: [1:0.33]
|
|
; GENERIC-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_t1mskc_u64:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50]
|
|
; BDVER2-NEXT: t1mskcq %rdi, %rcx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_t1mskc_u64:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: t1mskcq %rdi, %rcx
|
|
; BDVER3-NEXT: t1mskcq (%rsi), %rax
|
|
; BDVER3-NEXT: addq %rcx, %rax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_t1mskc_u64:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: t1mskcq %rdi, %rcx
|
|
; BDVER4-NEXT: t1mskcq (%rsi), %rax
|
|
; BDVER4-NEXT: addq %rcx, %rax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i64, i64* %p1
|
|
%r0 = xor i64 %a0, -1
|
|
%m0 = xor i64 %a1, -1
|
|
%r1 = add i64 %a0, 1
|
|
%m1 = add i64 %a1, 1
|
|
%r2 = or i64 %r0, %r1
|
|
%m2 = or i64 %m0, %m1
|
|
%res = add i64 %r2, %m2
|
|
ret i64 %res
|
|
}
|
|
|
|
define i32 @test_x86_tbm_tzmsk_u32(i32 %a0, i32* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_tzmsk_u32:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: tzmskl %edi, %ecx # sched: [1:0.33]
|
|
; GENERIC-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_tzmsk_u32:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50]
|
|
; BDVER2-NEXT: tzmskl %edi, %ecx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_tzmsk_u32:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: tzmskl %edi, %ecx
|
|
; BDVER3-NEXT: tzmskl (%rsi), %eax
|
|
; BDVER3-NEXT: addl %ecx, %eax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_tzmsk_u32:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: tzmskl %edi, %ecx
|
|
; BDVER4-NEXT: tzmskl (%rsi), %eax
|
|
; BDVER4-NEXT: addl %ecx, %eax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i32, i32* %p1
|
|
%r0 = xor i32 %a0, -1
|
|
%m0 = xor i32 %a1, -1
|
|
%r1 = add i32 %a0, -1
|
|
%m1 = add i32 %a1, -1
|
|
%r2 = and i32 %r0, %r1
|
|
%m2 = and i32 %m0, %m1
|
|
%res = add i32 %r2, %m2
|
|
ret i32 %res
|
|
}
|
|
|
|
define i64 @test_x86_tbm_tzmsk_u64(i64 %a0, i64* nocapture %p1) nounwind {
|
|
; GENERIC-LABEL: test_x86_tbm_tzmsk_u64:
|
|
; GENERIC: # %bb.0:
|
|
; GENERIC-NEXT: tzmskq %rdi, %rcx # sched: [1:0.33]
|
|
; GENERIC-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50]
|
|
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
|
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
|
;
|
|
; BDVER2-LABEL: test_x86_tbm_tzmsk_u64:
|
|
; BDVER2: # %bb.0:
|
|
; BDVER2-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50]
|
|
; BDVER2-NEXT: tzmskq %rdi, %rcx # sched: [2:0.50]
|
|
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
|
|
; BDVER2-NEXT: retq # sched: [5:1.00]
|
|
;
|
|
; BDVER3-LABEL: test_x86_tbm_tzmsk_u64:
|
|
; BDVER3: # %bb.0:
|
|
; BDVER3-NEXT: tzmskq %rdi, %rcx
|
|
; BDVER3-NEXT: tzmskq (%rsi), %rax
|
|
; BDVER3-NEXT: addq %rcx, %rax
|
|
; BDVER3-NEXT: retq
|
|
;
|
|
; BDVER4-LABEL: test_x86_tbm_tzmsk_u64:
|
|
; BDVER4: # %bb.0:
|
|
; BDVER4-NEXT: tzmskq %rdi, %rcx
|
|
; BDVER4-NEXT: tzmskq (%rsi), %rax
|
|
; BDVER4-NEXT: addq %rcx, %rax
|
|
; BDVER4-NEXT: retq
|
|
%a1 = load i64, i64* %p1
|
|
%r0 = xor i64 %a0, -1
|
|
%m0 = xor i64 %a1, -1
|
|
%r1 = add i64 %a0, -1
|
|
%m1 = add i64 %a1, -1
|
|
%r2 = and i64 %r0, %r1
|
|
%m2 = and i64 %m0, %m1
|
|
%res = add i64 %r2, %m2
|
|
ret i64 %res
|
|
}
|