2019-08-26 04:48:14 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2015-10-25 19:42:46 +08:00
|
|
|
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+aes,+pclmul < %s | FileCheck %s
|
2015-01-21 07:54:17 +08:00
|
|
|
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
target triple = "x86_64-unknown-unknown"
|
|
|
|
|
|
|
|
; Stack reload folding tests.
|
|
|
|
;
|
|
|
|
; By including a nop call with sideeffects we can force a partial register spill of the
|
|
|
|
; relevant registers and check that the reload is correctly folded into the instruction.
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_aesdec:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vaesdec {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1)
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
|
|
|
declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_aesdeclast:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vaesdeclast {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1)
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
|
|
|
declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_aesenc:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vaesenc {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1)
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
|
|
|
declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_aesenclast:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vaesenclast {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1)
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
|
|
|
declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_aesimc(<2 x i64> %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_aesimc:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vaesimc {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0)
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
|
|
|
declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_aeskeygenassist(<2 x i64> %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_aeskeygenassist:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vaeskeygenassist $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7)
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
|
|
|
declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
|
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define <4 x i32> @stack_fold_movd_load(i32 %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_movd_load:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: pushq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: pushq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: pushq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: pushq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: popq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: popq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: popq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: popq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
|
|
%2 = insertelement <4 x i32> zeroinitializer, i32 %a0, i32 0
|
2015-02-14 22:10:44 +08:00
|
|
|
; add forces execution domain
|
|
|
|
%3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
ret <4 x i32> %3
|
2015-01-22 07:43:30 +08:00
|
|
|
}
|
|
|
|
|
2018-12-15 00:44:58 +08:00
|
|
|
define i32 @stack_fold_movd_store(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_movd_store:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: pushq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: pushq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: pushq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: pushq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: popq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: popq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: popq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: popq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-14 22:10:44 +08:00
|
|
|
; add forces execution domain
|
2018-12-15 00:44:58 +08:00
|
|
|
%1 = add <4 x i32> %a0, %a1
|
2015-02-14 22:10:44 +08:00
|
|
|
%2 = extractelement <4 x i32> %1, i32 0
|
|
|
|
%3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
|
|
ret i32 %2
|
2015-01-22 07:43:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_movq_load:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
|
2015-03-30 23:25:51 +08:00
|
|
|
; add forces execution domain
|
|
|
|
%3 = add <2 x i64> %2, <i64 1, i64 1>
|
|
|
|
ret <2 x i64> %3
|
2015-01-22 07:43:30 +08:00
|
|
|
}
|
|
|
|
|
2018-12-15 00:44:58 +08:00
|
|
|
define i64 @stack_fold_movq_store(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_movq_store:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: pushq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: pushq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: pushq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: pushq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vmovq %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: popq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: popq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: popq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: popq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2015-03-30 23:25:51 +08:00
|
|
|
; add forces execution domain
|
2018-12-15 00:44:58 +08:00
|
|
|
%1 = add <2 x i64> %a0, %a1
|
2015-03-30 23:25:51 +08:00
|
|
|
%2 = extractelement <2 x i64> %1, i32 0
|
|
|
|
%3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
|
|
ret i64 %2
|
2015-01-22 07:43:30 +08:00
|
|
|
}
|
|
|
|
|
2015-02-08 05:20:11 +08:00
|
|
|
define <8 x i16> @stack_fold_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_mpsadbw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vmpsadbw $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-08 05:20:11 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
|
|
|
|
2015-01-21 07:54:17 +08:00
|
|
|
define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pabsb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpabsb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp sgt <16 x i8> %a0, zeroinitializer
|
|
|
|
%3 = sub <16 x i8> zeroinitializer, %a0
|
|
|
|
%4 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %3
|
|
|
|
ret <16 x i8> %4
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pabsd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpabsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp sgt <4 x i32> %a0, zeroinitializer
|
|
|
|
%3 = sub <4 x i32> zeroinitializer, %a0
|
|
|
|
%4 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %3
|
|
|
|
ret <4 x i32> %4
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pabsw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpabsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp sgt <8 x i16> %a0, zeroinitializer
|
|
|
|
%3 = sub <8 x i16> zeroinitializer, %a0
|
|
|
|
%4 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %3
|
|
|
|
ret <8 x i16> %4
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_packssdw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpackssdw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_packsswb(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_packsswb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpacksswb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_packusdw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpackusdw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_packuswb(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_packuswb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpackuswb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_paddb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_paddb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpaddb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = add <16 x i8> %a0, %a1
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_paddd(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_paddd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = add <4 x i32> %a0, %a1
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_paddq(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_paddq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpaddq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = add <2 x i64> %a0, %a1
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_paddsb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_paddsb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpaddsb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-12-19 20:00:25 +08:00
|
|
|
%2 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
|
2015-01-21 07:54:17 +08:00
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
2018-12-19 20:00:25 +08:00
|
|
|
declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
|
2015-01-21 07:54:17 +08:00
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_paddsw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_paddsw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpaddsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-12-19 20:00:25 +08:00
|
|
|
%2 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
|
2015-01-21 07:54:17 +08:00
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
2018-12-19 20:00:25 +08:00
|
|
|
declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
2015-01-21 07:54:17 +08:00
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_paddusb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_paddusb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpaddusb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-12-19 18:39:14 +08:00
|
|
|
%2 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
|
2015-01-21 07:54:17 +08:00
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
2018-12-19 18:39:14 +08:00
|
|
|
declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
|
2015-01-21 07:54:17 +08:00
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_paddusw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_paddusw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpaddusw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-12-19 18:39:14 +08:00
|
|
|
%2 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
|
2015-01-21 07:54:17 +08:00
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
2018-12-19 18:39:14 +08:00
|
|
|
declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
2015-01-21 07:54:17 +08:00
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_paddw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_paddw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpaddw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = add <8 x i16> %a0, %a1
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_palignr(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_palignr:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpalignr $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <16 x i8> %a1, <16 x i8> %a0, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_pand(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pand:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpand {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = and <16 x i8> %a0, %a1
|
|
|
|
; add forces execution domain
|
|
|
|
%3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
ret <16 x i8> %3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pandn:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpandn {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
[X86][CodeGen][NFC] Delay `combineIncDecVector()` from DAGCombine to X86DAGToDAGISel
Summary:
We were previously doing it in DAGCombine.
But we also want to do `sub %x, C` -> `add %x, (sub 0, C)` for vectors in DAGCombine.
So if we had `sub %x, -1`, we'll transform it to `add %x, 1`,
which `combineIncDecVector()` will immediately transform back into `sub %x, -1`,
and here we go again...
I've marked this as NFC since not a single test changes,
but since that 'changes' DAGCombine, probably this isn't fully NFC.
Reviewers: RKSimon, craig.topper, spatel
Reviewed By: craig.topper
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62327
llvm-svn: 370327
2019-08-29 18:50:09 +08:00
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = xor <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
|
|
%3 = and <16 x i8> %2, %a1
|
|
|
|
; add forces execution domain
|
|
|
|
%4 = add <16 x i8> %3, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
ret <16 x i8> %4
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_pavgb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pavgb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpavgb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2017-09-12 15:50:35 +08:00
|
|
|
%2 = zext <16 x i8> %a0 to <16 x i16>
|
|
|
|
%3 = zext <16 x i8> %a1 to <16 x i16>
|
|
|
|
%4 = add <16 x i16> %2, %3
|
|
|
|
%5 = add <16 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%6 = lshr <16 x i16> %5, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%7 = trunc <16 x i16> %6 to <16 x i8>
|
|
|
|
ret <16 x i8> %7
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pavgw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pavgw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpavgw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2017-09-12 15:50:35 +08:00
|
|
|
%2 = zext <8 x i16> %a0 to <8 x i32>
|
|
|
|
%3 = zext <8 x i16> %a1 to <8 x i32>
|
|
|
|
%4 = add <8 x i32> %2, %3
|
|
|
|
%5 = add <8 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%6 = lshr <8 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%7 = trunc <8 x i32> %6 to <8 x i16>
|
|
|
|
ret <8 x i16> %7
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define <16 x i8> @stack_fold_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %c) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pblendvb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpblendvb %xmm0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a1, <16 x i8> %c, <16 x i8> %a0)
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
2015-01-21 07:54:17 +08:00
|
|
|
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pblendw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpblendw $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = mem[0,1,2],xmm0[3,4,5,6,7]
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
|
2015-01-21 07:54:17 +08:00
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pclmulqdq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpclmulqdq $0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
2015-01-21 07:54:17 +08:00
|
|
|
declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpeqb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpcmpeqb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = icmp eq <16 x i8> %a0, %a1
|
|
|
|
%3 = sext <16 x i1> %2 to <16 x i8>
|
|
|
|
ret <16 x i8> %3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpeqd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = icmp eq <4 x i32> %a0, %a1
|
|
|
|
%3 = sext <4 x i1> %2 to <4 x i32>
|
|
|
|
ret <4 x i32> %3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpeqq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpcmpeqq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = icmp eq <2 x i64> %a0, %a1
|
|
|
|
%3 = sext <2 x i1> %2 to <2 x i64>
|
|
|
|
ret <2 x i64> %3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpeqw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpcmpeqw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = icmp eq <8 x i16> %a0, %a1
|
|
|
|
%3 = sext <8 x i1> %2 to <8 x i16>
|
|
|
|
ret <8 x i16> %3
|
|
|
|
}
|
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define i32 @stack_fold_pcmpestri(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpestri:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: movl $7, %eax
|
|
|
|
; CHECK-NEXT: movl $7, %edx
|
|
|
|
; CHECK-NEXT: vpcmpestri $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: movl %ecx, %eax
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
|
|
|
|
%2 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
|
|
|
|
ret i32 %2
|
|
|
|
}
|
2015-01-21 07:54:17 +08:00
|
|
|
declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define <16 x i8> @stack_fold_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpestrm:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: movl $7, %eax
|
|
|
|
; CHECK-NEXT: movl $7, %edx
|
|
|
|
; CHECK-NEXT: vpcmpestrm $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
|
|
|
|
%2 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
2015-01-21 07:54:17 +08:00
|
|
|
declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpgtb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpcmpgtb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = icmp sgt <16 x i8> %a0, %a1
|
|
|
|
%3 = sext <16 x i1> %2 to <16 x i8>
|
|
|
|
ret <16 x i8> %3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpgtd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpcmpgtd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = icmp sgt <4 x i32> %a0, %a1
|
|
|
|
%3 = sext <4 x i1> %2 to <4 x i32>
|
|
|
|
ret <4 x i32> %3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpgtq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpcmpgtq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = icmp sgt <2 x i64> %a0, %a1
|
|
|
|
%3 = sext <2 x i1> %2 to <2 x i64>
|
|
|
|
ret <2 x i64> %3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpgtw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpcmpgtw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = icmp sgt <8 x i16> %a0, %a1
|
|
|
|
%3 = sext <8 x i1> %2 to <8 x i16>
|
|
|
|
ret <8 x i16> %3
|
|
|
|
}
|
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define i32 @stack_fold_pcmpistri(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpistri:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpcmpistri $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: movl %ecx, %eax
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
|
|
|
|
ret i32 %2
|
|
|
|
}
|
2015-01-21 07:54:17 +08:00
|
|
|
declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define <16 x i8> @stack_fold_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pcmpistrm:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpcmpistrm $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
2015-01-21 07:54:17 +08:00
|
|
|
declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
|
|
|
|
|
|
|
; TODO stack_fold_pextrb
|
2015-01-22 07:43:30 +08:00
|
|
|
|
2018-12-15 00:44:58 +08:00
|
|
|
define i32 @stack_fold_pextrd(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pextrd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: pushq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: pushq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: pushq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: pushq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpextrd $1, %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: popq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: popq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: popq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: popq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2016-12-07 23:06:14 +08:00
|
|
|
; add forces execution domain
|
2018-12-15 00:44:58 +08:00
|
|
|
%1 = add <4 x i32> %a0, %a1
|
2016-12-07 23:06:14 +08:00
|
|
|
%2 = extractelement <4 x i32> %1, i32 1
|
|
|
|
%3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
|
|
ret i32 %2
|
2015-01-22 07:43:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @stack_fold_pextrq(<2 x i64> %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pextrq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: pushq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: pushq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: pushq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: pushq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: vpextrq $1, %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: popq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: popq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: popq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: popq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = extractelement <2 x i64> %a0, i32 1
|
|
|
|
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
|
|
ret i64 %1
|
|
|
|
}
|
|
|
|
|
2015-01-21 07:54:17 +08:00
|
|
|
; TODO stack_fold_pextrw
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_phaddd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vphaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1)
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_phaddsw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_phaddsw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vphaddsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_phaddw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_phaddw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vphaddw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define <8 x i16> @stack_fold_phminposuw(<8 x i16> %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_phminposuw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vphminposuw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
2015-01-21 07:54:17 +08:00
|
|
|
declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_phsubd(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_phsubd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vphsubd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1)
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_phsubsw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_phsubsw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vphsubsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_phsubw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_phsubw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vphsubw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define <16 x i8> @stack_fold_pinsrb(<16 x i8> %a0, i8 %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pinsrb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: pushq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: pushq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: pushq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: pushq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpinsrb $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: popq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: popq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: popq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: popq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
|
|
%2 = insertelement <16 x i8> %a0, i8 %a1, i32 1
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pinsrd(<4 x i32> %a0, i32 %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pinsrd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: pushq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: pushq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: pushq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: pushq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpinsrd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: popq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: popq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: popq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: popq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
|
|
%2 = insertelement <4 x i32> %a0, i32 %a1, i32 1
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_pinsrq(<2 x i64> %a0, i64 %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pinsrq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: pushq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: pushq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: pushq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: pushq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpinsrq $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: popq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: popq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: popq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: popq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
|
|
%2 = insertelement <2 x i64> %a0, i64 %a1, i32 1
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pinsrw(<8 x i16> %a0, i16 %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pinsrw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: pushq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: pushq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: pushq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: pushq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpinsrw $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: popq %r12
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
|
|
; CHECK-NEXT: popq %r13
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: popq %r14
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
|
|
; CHECK-NEXT: popq %r15
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
|
|
%2 = insertelement <8 x i16> %a0, i16 %a1, i32 1
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
2015-01-21 07:54:17 +08:00
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmaddubsw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmaddubsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pmaddwd(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmaddwd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmaddwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmaxsb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmaxsb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp sgt <16 x i8> %a0, %a1
|
|
|
|
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
|
|
|
ret <16 x i8> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmaxsd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmaxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp sgt <4 x i32> %a0, %a1
|
|
|
|
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
|
|
|
ret <4 x i32> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmaxsw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmaxsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp sgt <8 x i16> %a0, %a1
|
|
|
|
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
|
|
|
ret <8 x i16> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmaxub:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmaxub {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp ugt <16 x i8> %a0, %a1
|
|
|
|
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
|
|
|
ret <16 x i8> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmaxud:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmaxud {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp ugt <4 x i32> %a0, %a1
|
|
|
|
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
|
|
|
ret <4 x i32> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmaxuw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmaxuw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp ugt <8 x i16> %a0, %a1
|
|
|
|
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
|
|
|
ret <8 x i16> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pminsb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpminsb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp slt <16 x i8> %a0, %a1
|
|
|
|
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
|
|
|
ret <16 x i8> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pminsd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpminsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp slt <4 x i32> %a0, %a1
|
|
|
|
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
|
|
|
ret <4 x i32> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pminsw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpminsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp slt <8 x i16> %a0, %a1
|
|
|
|
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
|
|
|
ret <8 x i16> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pminub:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpminub {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp ult <16 x i8> %a0, %a1
|
|
|
|
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
|
|
|
ret <16 x i8> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pminud:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpminud {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp ult <4 x i32> %a0, %a1
|
|
|
|
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
|
|
|
ret <4 x i32> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pminuw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpminuw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = icmp ult <8 x i16> %a0, %a1
|
|
|
|
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
|
|
|
ret <8 x i16> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmuldq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmuldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = bitcast <4 x i32> %a0 to <2 x i64>
|
|
|
|
%3 = bitcast <4 x i32> %a1 to <2 x i64>
|
|
|
|
%4 = shl <2 x i64> %2, <i64 32, i64 32>
|
|
|
|
%5 = ashr <2 x i64> %4, <i64 32, i64 32>
|
|
|
|
%6 = shl <2 x i64> %3, <i64 32, i64 32>
|
|
|
|
%7 = ashr <2 x i64> %6, <i64 32, i64 32>
|
|
|
|
%8 = mul <2 x i64> %5, %7
|
|
|
|
ret <2 x i64> %8
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmulhrsw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmulhrsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pmulhuw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmulhuw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmulhuw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pmulhw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmulhw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmulhw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pmulld(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmulld:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmulld {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = mul <4 x i32> %a0, %a1
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pmullw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmullw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmullw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = mul <8 x i16> %a0, %a1
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pmuludq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpmuludq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-06-21 14:17:16 +08:00
|
|
|
%2 = bitcast <4 x i32> %a0 to <2 x i64>
|
|
|
|
%3 = bitcast <4 x i32> %a1 to <2 x i64>
|
|
|
|
%4 = and <2 x i64> %2, <i64 4294967295, i64 4294967295>
|
|
|
|
%5 = and <2 x i64> %3, <i64 4294967295, i64 4294967295>
|
|
|
|
%6 = mul <2 x i64> %4, %5
|
|
|
|
ret <2 x i64> %6
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_por(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_por:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpor {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = or <16 x i8> %a0, %a1
|
|
|
|
; add forces execution domain
|
|
|
|
%3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
ret <16 x i8> %3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_psadbw(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psadbw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsadbw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1)
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
|
|
|
declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pshufb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpshufb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pshufd(<4 x i32> %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pshufd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpshufd $27, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = mem[3,2,1,0]
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
2017-09-18 11:29:47 +08:00
|
|
|
; add forces execution domain
|
|
|
|
%3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
ret <4 x i32> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pshufhw(<8 x i16> %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pshufhw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpshufhw $11, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = mem[0,1,2,3,7,6,4,4]
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 4, i32 4>
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_pshuflw(<8 x i16> %a0) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pshuflw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpshuflw $27, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = mem[3,2,1,0,4,5,6,7]
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_psignb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psignb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsignb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1)
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_psignd(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psignd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsignd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1)
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_psignw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psignw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsignw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_pslld(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pslld:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpslld {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1)
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_psllq(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psllq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsllq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
|
|
|
declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_psllw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psllw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsllw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_psrad(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psrad:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsrad {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1)
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_psraw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psraw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsraw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_psrld(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psrld:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsrld {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1)
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_psrlq(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psrlq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsrlq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
|
|
|
declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_psrlw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psrlw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsrlw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_psubb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psubb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsubb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = sub <16 x i8> %a0, %a1
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_psubd(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psubd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsubd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = sub <4 x i32> %a0, %a1
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define <2 x i64> @stack_fold_psubq(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psubq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsubq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = sub <2 x i64> %a0, %a1
|
|
|
|
ret <2 x i64> %2
|
|
|
|
}
|
2015-01-21 07:54:17 +08:00
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_psubsb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psubsb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsubsb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-12-19 20:00:25 +08:00
|
|
|
%2 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
|
2015-01-21 07:54:17 +08:00
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
2018-12-19 20:00:25 +08:00
|
|
|
declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
|
2015-01-21 07:54:17 +08:00
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_psubsw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psubsw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsubsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-12-19 20:00:25 +08:00
|
|
|
%2 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
|
2015-01-21 07:54:17 +08:00
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
2018-12-19 20:00:25 +08:00
|
|
|
declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
2015-01-21 07:54:17 +08:00
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define <16 x i8> @stack_fold_psubusb(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psubusb:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsubusb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-12-19 18:39:14 +08:00
|
|
|
%2 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
|
2015-01-22 07:43:30 +08:00
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
2018-12-19 18:39:14 +08:00
|
|
|
declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
|
2015-01-21 07:54:17 +08:00
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define <8 x i16> @stack_fold_psubusw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psubusw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsubusw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
2018-12-19 18:39:14 +08:00
|
|
|
%2 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
|
2015-01-22 07:43:30 +08:00
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
2018-12-19 18:39:14 +08:00
|
|
|
declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
2015-01-21 07:54:17 +08:00
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_psubw(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_psubw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpsubw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = sub <8 x i16> %a0, %a1
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
|
2015-01-22 07:43:30 +08:00
|
|
|
define i32 @stack_fold_ptest(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_ptest:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
|
|
; CHECK-NEXT: vptest {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: setb %al
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-22 07:43:30 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
|
|
|
|
ret i32 %2
|
|
|
|
}
|
2015-01-21 07:54:17 +08:00
|
|
|
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
|
|
|
|
2015-02-08 05:44:06 +08:00
|
|
|
define i32 @stack_fold_ptest_ymm(<4 x i64> %a0, <4 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_ptest_ymm:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
|
|
; CHECK-NEXT: vptest {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
|
|
; CHECK-NEXT: setb %al
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-08 05:44:06 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1)
|
|
|
|
ret i32 %2
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
|
|
|
|
|
2015-01-21 07:54:17 +08:00
|
|
|
define <16 x i8> @stack_fold_punpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_punpckhbw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpunpckhbw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15]
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_punpckhdq(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_punpckhdq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpunpckhdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
2015-02-13 06:47:45 +08:00
|
|
|
; add forces execution domain
|
|
|
|
%3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
ret <4 x i32> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_punpckhqdq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpunpckhqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[1],mem[1]
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
|
2015-02-13 06:47:45 +08:00
|
|
|
; add forces execution domain
|
|
|
|
%3 = add <2 x i64> %2, <i64 1, i64 1>
|
|
|
|
ret <2 x i64> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_punpckhwd(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_punpckhwd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpunpckhwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_punpcklbw(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_punpcklbw:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpunpcklbw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @stack_fold_punpckldq(<4 x i32> %a0, <4 x i32> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_punpckldq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
2015-02-13 06:47:45 +08:00
|
|
|
; add forces execution domain
|
|
|
|
%3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
ret <4 x i32> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @stack_fold_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_punpcklqdq:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
|
2015-02-13 06:47:45 +08:00
|
|
|
; add forces execution domain
|
|
|
|
%3 = add <2 x i64> %2, <i64 1, i64 1>
|
|
|
|
ret <2 x i64> %3
|
2015-01-21 07:54:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @stack_fold_punpcklwd(<8 x i16> %a0, <8 x i16> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_punpcklwd:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @stack_fold_pxor(<16 x i8> %a0, <16 x i8> %a1) {
|
2019-08-26 04:48:14 +08:00
|
|
|
; CHECK-LABEL: stack_fold_pxor:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: #APP
|
|
|
|
; CHECK-NEXT: nop
|
|
|
|
; CHECK-NEXT: #NO_APP
|
|
|
|
; CHECK-NEXT: vpxor {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-01-21 07:54:17 +08:00
|
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
|
|
%2 = xor <16 x i8> %a0, %a1
|
|
|
|
; add forces execution domain
|
|
|
|
%3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
ret <16 x i8> %3
|
|
|
|
}
|