llvm-project/llvm/test/CodeGen/X86/stack-folding-tbm.ll

923 lines
32 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi,+tbm < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
; Stack reload folding tests.
;
; By including a nop call with sideeffects we can force a partial register spill of the
; relevant registers and check that the reload is correctly folded into the instruction.
define i32 @stack_fold_bextri_u32(i32 %a0) {
; CHECK-LABEL: stack_fold_bextri_u32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: bextrl $3841, {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
; CHECK-NEXT: # imm = 0xF01
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a0, i32 3841)
ret i32 %2
}
declare i32 @llvm.x86.tbm.bextri.u32(i32, i32)
define i64 @stack_fold_bextri_u64(i64 %a0) {
; CHECK-LABEL: stack_fold_bextri_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: bextrq $3841, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; CHECK-NEXT: # imm = 0xF01
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a0, i64 3841)
ret i64 %2
}
declare i64 @llvm.x86.tbm.bextri.u64(i64, i64)
define i32 @stack_fold_blcfill_u32(i32 %a0) {
; CHECK-LABEL: stack_fold_blcfill_u32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blcfilll {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i32 %a0, 1
%3 = and i32 %a0, %2
ret i32 %3
}
define i64 @stack_fold_blcfill_u64(i64 %a0) {
; CHECK-LABEL: stack_fold_blcfill_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blcfillq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i64 %a0, 1
%3 = and i64 %a0, %2
ret i64 %3
}
define i32 @stack_fold_blci_u32(i32 %a0) {
; CHECK-LABEL: stack_fold_blci_u32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blcil {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i32 %a0, 1
%3 = xor i32 %2, -1
%4 = or i32 %a0, %3
ret i32 %4
}
define i64 @stack_fold_blci_u64(i64 %a0) {
; CHECK-LABEL: stack_fold_blci_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blciq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i64 %a0, 1
%3 = xor i64 %2, -1
%4 = or i64 %a0, %3
ret i64 %4
}
define i32 @stack_fold_blcic_u32(i32 %a0) {
; CHECK-LABEL: stack_fold_blcic_u32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blcicl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i32 %a0, 1
%3 = xor i32 %a0, -1
%4 = and i32 %2, %3
ret i32 %4
}
define i64 @stack_fold_blcic_u64(i64 %a0) {
; CHECK-LABEL: stack_fold_blcic_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blcicq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i64 %a0, 1
%3 = xor i64 %a0, -1
%4 = and i64 %2, %3
ret i64 %4
}
define i32 @stack_fold_blcmsk_u32(i32 %a0) {
; CHECK-LABEL: stack_fold_blcmsk_u32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blcmskl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i32 %a0, 1
%3 = xor i32 %a0, %2
ret i32 %3
}
define i64 @stack_fold_blcmsk_u64(i64 %a0) {
; CHECK-LABEL: stack_fold_blcmsk_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blcmskq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i64 %a0, 1
%3 = xor i64 %a0, %2
ret i64 %3
}
define i32 @stack_fold_blcs_u32(i32 %a0) {
; CHECK-LABEL: stack_fold_blcs_u32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blcsl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i32 %a0, 1
%3 = or i32 %a0, %2
ret i32 %3
}
define i64 @stack_fold_blcs_u64(i64 %a0) {
; CHECK-LABEL: stack_fold_blcs_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blcsq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i64 %a0, 1
%3 = or i64 %a0, %2
ret i64 %3
}
define i32 @stack_fold_blsfill_u32(i32 %a0) {
; CHECK-LABEL: stack_fold_blsfill_u32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blsfilll {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = sub i32 %a0, 1
%3 = or i32 %a0, %2
ret i32 %3
}
define i64 @stack_fold_blsfill_u64(i64 %a0) {
; CHECK-LABEL: stack_fold_blsfill_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blsfillq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = sub i64 %a0, 1
%3 = or i64 %a0, %2
ret i64 %3
}
define i32 @stack_fold_blsic_u32(i32 %a0) {
; CHECK-LABEL: stack_fold_blsic_u32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blsicl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = sub i32 %a0, 1
%3 = xor i32 %a0, -1
%4 = or i32 %2, %3
ret i32 %4
}
define i64 @stack_fold_blsic_u64(i64 %a0) {
; CHECK-LABEL: stack_fold_blsic_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: blsicq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = sub i64 %a0, 1
%3 = xor i64 %a0, -1
%4 = or i64 %2, %3
ret i64 %4
}
define i32 @stack_fold_t1mskc_u32(i32 %a0) {
; CHECK-LABEL: stack_fold_t1mskc_u32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: t1mskcl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i32 %a0, 1
%3 = xor i32 %a0, -1
%4 = or i32 %2, %3
ret i32 %4
}
define i64 @stack_fold_t1mskc_u64(i64 %a0) {
; CHECK-LABEL: stack_fold_t1mskc_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: t1mskcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = add i64 %a0, 1
%3 = xor i64 %a0, -1
%4 = or i64 %2, %3
ret i64 %4
}
define i32 @stack_fold_tzmsk_u32(i32 %a0) {
; CHECK-LABEL: stack_fold_tzmsk_u32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: tzmskl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = sub i32 %a0, 1
%3 = xor i32 %a0, -1
%4 = and i32 %2, %3
ret i32 %4
}
define i64 @stack_fold_tzmsk_u64(i64 %a0) {
; CHECK-LABEL: stack_fold_tzmsk_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: .cfi_offset %rbx, -56
; CHECK-NEXT: .cfi_offset %r12, -48
; CHECK-NEXT: .cfi_offset %r13, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: tzmskq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r13
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
%2 = sub i64 %a0, 1
%3 = xor i64 %a0, -1
%4 = and i64 %2, %3
ret i64 %4
}