forked from OSchip/llvm-project
[X86] Go through some tests that still reference old intrinsics that have been autoupgraded and replace them with the upgraded IR.
This is mostly the stack folding tests and is by no means a thorough audit of tests. llvm-svn: 335204
This commit is contained in:
parent
d1dab0c3c0
commit
76df3d61a3
|
@ -951,11 +951,15 @@ define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
|
|||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
|
||||
ret void
|
||||
%1 = and i8 %mask, 1
|
||||
%2 = bitcast i8* %ptr to <4 x float>*
|
||||
%3 = bitcast i8 %1 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %data, <4 x float>* %2, i32 1, <4 x i1> %extract)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) #1
|
||||
|
||||
declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32)
|
||||
declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32)
|
||||
|
|
|
@ -52,8 +52,11 @@ define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|||
;CHECK-LABEL: stack_fold_addsd_int
|
||||
;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1)
|
||||
ret <2 x double> %2
|
||||
%2 = extractelement <2 x double> %a0, i32 0
|
||||
%3 = extractelement <2 x double> %a1, i32 0
|
||||
%4 = fadd double %2, %3
|
||||
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
@ -69,8 +72,11 @@ define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
|
|||
;CHECK-LABEL: stack_fold_addss_int
|
||||
;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1)
|
||||
ret <4 x float> %2
|
||||
%2 = extractelement <4 x float> %a0, i32 0
|
||||
%3 = extractelement <4 x float> %a1, i32 0
|
||||
%4 = fadd float %2, %3
|
||||
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
@ -399,10 +405,10 @@ define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) {
|
|||
;CHECK-LABEL: stack_fold_cvtdq2pd_int
|
||||
;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0)
|
||||
ret <2 x double> %2
|
||||
%2 = shufflevector <4 x i32> %a0, <4 x i32> %a0, <2 x i32> <i32 0, i32 1>
|
||||
%cvt = sitofp <2 x i32> %2 to <2 x double>
|
||||
ret <2 x double> %cvt
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
|
||||
|
||||
define <4 x double> @stack_fold_cvtdq2pd_ymm(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_cvtdq2pd_ymm
|
||||
|
@ -416,10 +422,9 @@ define <4 x double> @stack_fold_cvtdq2pd_ymm_int(<4 x i32> %a0) {
|
|||
;CHECK-LABEL: stack_fold_cvtdq2pd_ymm_int
|
||||
;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0)
|
||||
ret <4 x double> %2
|
||||
%cvt = sitofp <4 x i32> %a0 to <4 x double>
|
||||
ret <4 x double> %cvt
|
||||
}
|
||||
declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
|
||||
|
||||
define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_cvtdq2ps
|
||||
|
@ -520,10 +525,10 @@ define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) {
|
|||
;CHECK-LABEL: stack_fold_cvtps2pd_int
|
||||
;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0)
|
||||
ret <2 x double> %2
|
||||
%2 = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
|
||||
%cvtps2pd = fpext <2 x float> %2 to <2 x double>
|
||||
ret <2 x double> %cvtps2pd
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
|
||||
|
||||
define <4 x double> @stack_fold_cvtps2pd_ymm(<4 x float> %a0) {
|
||||
;CHECK-LABEL: stack_fold_cvtps2pd_ymm
|
||||
|
@ -537,10 +542,9 @@ define <4 x double> @stack_fold_cvtps2pd_ymm_int(<4 x float> %a0) {
|
|||
;CHECK-LABEL: stack_fold_cvtps2pd_ymm_int
|
||||
;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0)
|
||||
ret <4 x double> %2
|
||||
%cvtps2pd = fpext <4 x float> %a0 to <4 x double>
|
||||
ret <4 x double> %cvtps2pd
|
||||
}
|
||||
declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_cvtps2ph_ymm(<8 x float> %a0) {
|
||||
;CHECK-LABEL: stack_fold_cvtps2ph_ymm
|
||||
|
@ -587,10 +591,10 @@ define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) optsize {
|
|||
;CHECK-LABEL: stack_fold_cvtsi2sd_int
|
||||
;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0)
|
||||
ret <2 x double> %2
|
||||
%2 = sitofp i32 %a0 to double
|
||||
%3 = insertelement <2 x double> zeroinitializer, double %2, i64 0
|
||||
ret <2 x double> %3
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
|
||||
|
||||
; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
|
||||
define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
|
||||
|
@ -606,10 +610,10 @@ define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) optsize {
|
|||
;CHECK-LABEL: stack_fold_cvtsi642sd_int
|
||||
;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0)
|
||||
ret <2 x double> %2
|
||||
%2 = sitofp i64 %a0 to double
|
||||
%3 = insertelement <2 x double> zeroinitializer, double %2, i64 0
|
||||
ret <2 x double> %3
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
|
||||
|
||||
; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
|
||||
define float @stack_fold_cvtsi2ss(i32 %a0) optsize {
|
||||
|
@ -625,10 +629,10 @@ define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) optsize {
|
|||
;CHECK-LABEL: stack_fold_cvtsi2ss_int
|
||||
;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0)
|
||||
ret <4 x float> %2
|
||||
%2 = sitofp i32 %a0 to float
|
||||
%3 = insertelement <4 x float> zeroinitializer, float %2, i64 0
|
||||
ret <4 x float> %3
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
|
||||
|
||||
; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
|
||||
define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
|
||||
|
@ -644,10 +648,10 @@ define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) optsize {
|
|||
;CHECK-LABEL: stack_fold_cvtsi642ss_int
|
||||
;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0)
|
||||
ret <4 x float> %2
|
||||
%2 = sitofp i64 %a0 to float
|
||||
%3 = insertelement <4 x float> zeroinitializer, float %2, i64 0
|
||||
ret <4 x float> %3
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
|
||||
|
||||
; TODO stack_fold_cvtss2si
|
||||
|
||||
|
@ -816,10 +820,12 @@ define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|||
;CHECK-LABEL: stack_fold_divsd_int
|
||||
;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1)
|
||||
ret <2 x double> %2
|
||||
%2 = extractelement <2 x double> %a0, i32 0
|
||||
%3 = extractelement <2 x double> %a1, i32 0
|
||||
%4 = fdiv double %2, %3
|
||||
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define float @stack_fold_divss(float %a0, float %a1) {
|
||||
;CHECK-LABEL: stack_fold_divss
|
||||
|
@ -833,10 +839,12 @@ define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
|
|||
;CHECK-LABEL: stack_fold_divss_int
|
||||
;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1)
|
||||
ret <4 x float> %2
|
||||
%2 = extractelement <4 x float> %a0, i32 0
|
||||
%3 = extractelement <4 x float> %a1, i32 0
|
||||
%4 = fdiv float %2, %3
|
||||
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_dppd
|
||||
|
@ -1317,10 +1325,12 @@ define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|||
;CHECK-LABEL: stack_fold_mulsd_int
|
||||
;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1)
|
||||
ret <2 x double> %2
|
||||
%2 = extractelement <2 x double> %a0, i32 0
|
||||
%3 = extractelement <2 x double> %a1, i32 0
|
||||
%4 = fmul double %2, %3
|
||||
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define float @stack_fold_mulss(float %a0, float %a1) {
|
||||
;CHECK-LABEL: stack_fold_mulss
|
||||
|
@ -1334,10 +1344,12 @@ define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
|
|||
;CHECK-LABEL: stack_fold_mulss_int
|
||||
;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1)
|
||||
ret <4 x float> %2
|
||||
%2 = extractelement <4 x float> %a0, i32 0
|
||||
%3 = extractelement <4 x float> %a1, i32 0
|
||||
%4 = fmul float %2, %3
|
||||
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_orpd
|
||||
|
@ -1629,37 +1641,37 @@ define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) {
|
|||
;CHECK-LABEL: stack_fold_sqrtpd
|
||||
;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
|
||||
%2 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
||||
declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
|
||||
|
||||
define <4 x double> @stack_fold_sqrtpd_ymm(<4 x double> %a0) {
|
||||
;CHECK-LABEL: stack_fold_sqrtpd_ymm
|
||||
;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0)
|
||||
%2 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a0)
|
||||
ret <4 x double> %2
|
||||
}
|
||||
declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
|
||||
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
|
||||
|
||||
define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) {
|
||||
;CHECK-LABEL: stack_fold_sqrtps
|
||||
;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
|
||||
%2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
|
||||
|
||||
define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) {
|
||||
;CHECK-LABEL: stack_fold_sqrtps_ymm
|
||||
;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0)
|
||||
%2 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %a0)
|
||||
ret <8 x float> %2
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
|
||||
declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
|
||||
|
||||
define double @stack_fold_sqrtsd(double %a0) optsize {
|
||||
;CHECK-LABEL: stack_fold_sqrtsd
|
||||
|
@ -1727,10 +1739,12 @@ define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|||
;CHECK-LABEL: stack_fold_subsd_int
|
||||
;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1)
|
||||
ret <2 x double> %2
|
||||
%2 = extractelement <2 x double> %a0, i32 0
|
||||
%3 = extractelement <2 x double> %a1, i32 0
|
||||
%4 = fsub double %2, %3
|
||||
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define float @stack_fold_subss(float %a0, float %a1) {
|
||||
;CHECK-LABEL: stack_fold_subss
|
||||
|
@ -1744,10 +1758,12 @@ define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
|
|||
;CHECK-LABEL: stack_fold_subss_int
|
||||
;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1)
|
||||
ret <4 x float> %2
|
||||
%2 = extractelement <4 x float> %a0, i32 0
|
||||
%3 = extractelement <4 x float> %a1, i32 0
|
||||
%4 = fsub float %2, %3
|
||||
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define i32 @stack_fold_testpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_testpd
|
||||
|
|
|
@ -624,27 +624,29 @@ define <16 x float> @stack_fold_vpermt2ps(<16 x float> %x0, <16 x i32> %x1, <16
|
|||
;CHECK-LABEL: stack_fold_vpermt2ps
|
||||
;CHECK: vpermt2ps {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
|
||||
ret <16 x float> %res
|
||||
%2 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2)
|
||||
ret <16 x float> %2
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
|
||||
declare <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>)
|
||||
|
||||
define <16 x float> @stack_fold_vpermi2ps(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2ps
|
||||
;CHECK: vpermi2ps {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
|
||||
ret <16 x float> %res
|
||||
%2 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x1, <16 x i32> %x0, <16 x float> %x2)
|
||||
ret <16 x float> %2
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
|
||||
|
||||
define <16 x float> @stack_fold_vpermi2ps_mask(<16 x float> %x0, <16 x i32>* %x1, <16 x float> %x2, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2ps_mask
|
||||
;CHECK: vpermi2ps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%x1b = load <16 x i32>, <16 x i32>* %x1
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1b, <16 x float> %x2, i16 %mask)
|
||||
ret <16 x float> %res
|
||||
%2 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1b, <16 x float> %x2)
|
||||
%3 = bitcast <16 x i32> %x1b to <16 x float>
|
||||
%4 = bitcast i16 %mask to <16 x i1>
|
||||
%5 = select <16 x i1> %4, <16 x float> %2, <16 x float> %3
|
||||
ret <16 x float> %5
|
||||
}
|
||||
|
||||
define <16 x float> @stack_fold_vpermt2ps_mask(<16 x i32>* %x0, <16 x float> %x1, <16 x float> %x2, i16 %mask) {
|
||||
|
@ -652,8 +654,10 @@ define <16 x float> @stack_fold_vpermt2ps_mask(<16 x i32>* %x0, <16 x float> %x1
|
|||
;CHECK: vpermt2ps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%x0b = load <16 x i32>, <16 x i32>* %x0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermt2var.ps.512(<16 x i32> %x0b, <16 x float> %x1, <16 x float> %x2, i16 %mask)
|
||||
ret <16 x float> %res
|
||||
%2 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x1, <16 x i32> %x0b, <16 x float> %x2)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
%4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %x1
|
||||
ret <16 x float> %4
|
||||
}
|
||||
|
||||
define <16 x float> @stack_fold_vpermt2ps_maskz(<16 x i32>* %x0, <16 x float> %x1, <16 x float> %x2, i16 %mask) {
|
||||
|
@ -661,28 +665,29 @@ define <16 x float> @stack_fold_vpermt2ps_maskz(<16 x i32>* %x0, <16 x float> %x
|
|||
;CHECK: vpermt2ps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%x0b = load <16 x i32>, <16 x i32>* %x0
|
||||
%res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0b, <16 x float> %x1, <16 x float> %x2, i16 %mask)
|
||||
ret <16 x float> %res
|
||||
%2 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x1, <16 x i32> %x0b, <16 x float> %x2)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
%4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
|
||||
ret <16 x float> %4
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
|
||||
|
||||
define <8 x double> @stack_fold_vpermt2pd(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2pd
|
||||
;CHECK: vpermt2pd {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
|
||||
ret <8 x double> %res
|
||||
%2 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2)
|
||||
%3 = bitcast <8 x i64> %x1 to <8 x double>
|
||||
ret <8 x double> %2
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
|
||||
declare <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>)
|
||||
|
||||
define <8 x double> @stack_fold_vpermi2pd(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2pd
|
||||
;CHECK: vpermi2pd {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
|
||||
ret <8 x double> %res
|
||||
%2 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x1, <8 x i64> %x0, <8 x double> %x2)
|
||||
ret <8 x double> %2
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
|
||||
|
||||
define <8 x double> @stack_fold_permpd(<8 x double> %a0) {
|
||||
;CHECK-LABEL: stack_fold_permpd
|
||||
|
@ -772,16 +777,16 @@ define <8 x double> @stack_fold_permilpdvar_zmm(<8 x double> %a0, <8 x i64> %a1)
|
|||
;CHECK-LABEL: stack_fold_permilpdvar_zmm
|
||||
;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %a1, <8 x double> undef, i8 -1)
|
||||
%2 = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %a1)
|
||||
ret <8 x double> %2
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>) nounwind readnone
|
||||
|
||||
define <8 x double> @stack_fold_permilpdvar_zmm_mask(<8 x double>* %passthru, <8 x double> %a0, <8 x i64> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_permilpdvar_zmm_mask
|
||||
;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %a1, <8 x double> undef, i8 -1)
|
||||
%2 = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %a1)
|
||||
%3 = bitcast i8 %mask to <8 x i1>
|
||||
; load needed to keep the operation from being scheduled above the asm block
|
||||
%4 = load <8 x double>, <8 x double>* %passthru
|
||||
|
@ -793,7 +798,7 @@ define <8 x double> @stack_fold_permilpdvar_zmm_maskz(<8 x double> %a0, <8 x i64
|
|||
;CHECK-LABEL: stack_fold_permilpdvar_zmm_maskz
|
||||
;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %a1, <8 x double> undef, i8 -1)
|
||||
%2 = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %a1)
|
||||
%3 = bitcast i8 %mask to <8 x i1>
|
||||
%4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
|
||||
ret <8 x double> %4
|
||||
|
@ -833,16 +838,16 @@ define <16 x float> @stack_fold_permilpsvar_zmm(<16 x float> %a0, <16 x i32> %a1
|
|||
;CHECK-LABEL: stack_fold_permilpsvar_zmm
|
||||
;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %a1, <16 x float> undef, i16 -1)
|
||||
%2 = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %a1)
|
||||
ret <16 x float> %2
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>) nounwind readnone
|
||||
|
||||
define <16 x float> @stack_fold_permilpsvar_zmm_mask(<16 x float>* %passthru, <16 x float> %a0, <16 x i32> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_permilpsvar_zmm_mask
|
||||
;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %a1, <16 x float> undef, i16 -1)
|
||||
%2 = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %a1)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
; load needed to keep the operation from being scheduled above the asm block
|
||||
%4 = load <16 x float>, <16 x float>* %passthru
|
||||
|
@ -854,7 +859,7 @@ define <16 x float> @stack_fold_permilpsvar_zmm_maskz(<16 x float> %a0, <16 x i3
|
|||
;CHECK-LABEL: stack_fold_permilpsvar_zmm_maskz
|
||||
;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %a1, <16 x float> undef, i16 -1)
|
||||
%2 = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %a1)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
%4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
|
||||
ret <16 x float> %4
|
||||
|
|
|
@ -36,10 +36,12 @@ define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|||
;CHECK-LABEL: stack_fold_addsd_int
|
||||
;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1)
|
||||
ret <2 x double> %2
|
||||
%2 = extractelement <2 x double> %a0, i32 0
|
||||
%3 = extractelement <2 x double> %a1, i32 0
|
||||
%4 = fadd double %2, %3
|
||||
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define float @stack_fold_addss(float %a0, float %a1) {
|
||||
;CHECK-LABEL: stack_fold_addss
|
||||
|
@ -53,10 +55,12 @@ define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
|
|||
;CHECK-LABEL: stack_fold_addss_int
|
||||
;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1)
|
||||
ret <4 x float> %2
|
||||
%2 = extractelement <4 x float> %a0, i32 0
|
||||
%3 = extractelement <4 x float> %a1, i32 0
|
||||
%4 = fadd float %2, %3
|
||||
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_addsubpd
|
||||
|
@ -257,10 +261,10 @@ define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) {
|
|||
;CHECK-LABEL: stack_fold_cvtdq2pd_int
|
||||
;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0)
|
||||
ret <2 x double> %2
|
||||
%2 = shufflevector <4 x i32> %a0, <4 x i32> %a0, <2 x i32> <i32 0, i32 1>
|
||||
%cvt = sitofp <2 x i32> %2 to <2 x double>
|
||||
ret <2 x double> %cvt
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
|
||||
|
||||
define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_cvtdq2ps
|
||||
|
@ -309,10 +313,10 @@ define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) {
|
|||
;CHECK-LABEL: stack_fold_cvtps2pd_int
|
||||
;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0)
|
||||
ret <2 x double> %2
|
||||
%2 = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
|
||||
%cvtps2pd = fpext <2 x float> %2 to <2 x double>
|
||||
ret <2 x double> %cvtps2pd
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_cvtsd2si
|
||||
|
||||
|
@ -365,10 +369,10 @@ define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0, <2 x double> %b0) {
|
|||
;CHECK-LABEL: stack_fold_cvtsi2sd_int
|
||||
;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %b0, i32 %a0)
|
||||
ret <2 x double> %2
|
||||
%2 = sitofp i32 %a0 to double
|
||||
%3 = insertelement <2 x double> %b0, double %2, i64 0
|
||||
ret <2 x double> %3
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
|
||||
|
||||
define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
|
||||
;CHECK-LABEL: stack_fold_cvtsi642sd
|
||||
|
@ -382,10 +386,10 @@ define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0, <2 x double> %b0) {
|
|||
;CHECK-LABEL: stack_fold_cvtsi642sd_int
|
||||
;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %b0, i64 %a0)
|
||||
ret <2 x double> %2
|
||||
%2 = sitofp i64 %a0 to double
|
||||
%3 = insertelement <2 x double> %b0, double %2, i64 0
|
||||
ret <2 x double> %3
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
|
||||
|
||||
define float @stack_fold_cvtsi2ss(i32 %a0) minsize {
|
||||
;CHECK-LABEL: stack_fold_cvtsi2ss
|
||||
|
@ -399,10 +403,10 @@ define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0, <4 x float> %b0) {
|
|||
;CHECK-LABEL: stack_fold_cvtsi2ss_int
|
||||
;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %b0, i32 %a0)
|
||||
ret <4 x float> %2
|
||||
%2 = sitofp i32 %a0 to float
|
||||
%3 = insertelement <4 x float> %b0, float %2, i64 0
|
||||
ret <4 x float> %3
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
|
||||
|
||||
define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
|
||||
;CHECK-LABEL: stack_fold_cvtsi642ss
|
||||
|
@ -416,10 +420,10 @@ define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0, <4 x float> %b0) {
|
|||
;CHECK-LABEL: stack_fold_cvtsi642ss_int
|
||||
;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %b0, i64 %a0)
|
||||
ret <4 x float> %2
|
||||
%2 = sitofp i64 %a0 to float
|
||||
%3 = insertelement <4 x float> %b0, float %2, i64 0
|
||||
ret <4 x float> %3
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
|
||||
|
||||
define double @stack_fold_cvtss2sd(float %a0) minsize {
|
||||
;CHECK-LABEL: stack_fold_cvtss2sd
|
||||
|
@ -433,10 +437,11 @@ define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize {
|
|||
;CHECK-LABEL: stack_fold_cvtss2sd_int
|
||||
;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0)
|
||||
ret <2 x double> %2
|
||||
%2 = extractelement <4 x float> %a0, i64 0
|
||||
%3 = fpext float %2 to double
|
||||
%4 = insertelement <2 x double> zeroinitializer, double %3, i64 0
|
||||
ret <2 x double> %4
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_cvtss2si
|
||||
|
||||
|
@ -573,8 +578,11 @@ define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|||
;CHECK-LABEL: stack_fold_divsd_int
|
||||
;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1)
|
||||
ret <2 x double> %2
|
||||
%2 = extractelement <2 x double> %a0, i32 0
|
||||
%3 = extractelement <2 x double> %a1, i32 0
|
||||
%4 = fdiv double %2, %3
|
||||
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
@ -590,8 +598,11 @@ define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
|
|||
;CHECK-LABEL: stack_fold_divss_int
|
||||
;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1)
|
||||
ret <4 x float> %2
|
||||
%2 = extractelement <4 x float> %a0, i32 0
|
||||
%3 = extractelement <4 x float> %a1, i32 0
|
||||
%4 = fdiv float %2, %3
|
||||
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
@ -904,10 +915,12 @@ define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|||
;CHECK-LABEL: stack_fold_mulsd_int
|
||||
;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1)
|
||||
ret <2 x double> %2
|
||||
%2 = extractelement <2 x double> %a0, i32 0
|
||||
%3 = extractelement <2 x double> %a1, i32 0
|
||||
%4 = fmul double %2, %3
|
||||
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define float @stack_fold_mulss(float %a0, float %a1) {
|
||||
;CHECK-LABEL: stack_fold_mulss
|
||||
|
@ -921,10 +934,12 @@ define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
|
|||
;CHECK-LABEL: stack_fold_mulss_int
|
||||
;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1)
|
||||
ret <4 x float> %2
|
||||
%2 = extractelement <4 x float> %a0, i32 0
|
||||
%3 = extractelement <4 x float> %a1, i32 0
|
||||
%4 = fmul float %2, %3
|
||||
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_orpd
|
||||
|
@ -1123,12 +1138,13 @@ define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0, <4 x float> %a1) opts
|
|||
;CHECK-LABEL: stack_fold_sqrtss_int
|
||||
;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a1)
|
||||
%3 = extractelement <4 x float> %2, i32 0
|
||||
%4 = insertelement <4 x float> %a0, float %3, i32 0
|
||||
ret <4 x float> %4
|
||||
%2 = extractelement <4 x float> %a1, i64 0
|
||||
%3 = call float @llvm.sqrt.f32(float %2)
|
||||
%4 = insertelement <4 x float> %a1, float %3, i64 0
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
%6 = insertelement <4 x float> %a0, float %5, i32 0
|
||||
ret <4 x float> %6
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
|
||||
|
||||
define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_subpd
|
||||
|
@ -1158,10 +1174,12 @@ define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|||
;CHECK-LABEL: stack_fold_subsd_int
|
||||
;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1)
|
||||
ret <2 x double> %2
|
||||
%2 = extractelement <2 x double> %a0, i32 0
|
||||
%3 = extractelement <2 x double> %a1, i32 0
|
||||
%4 = fsub double %2, %3
|
||||
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define float @stack_fold_subss(float %a0, float %a1) {
|
||||
;CHECK-LABEL: stack_fold_subss
|
||||
|
@ -1175,10 +1193,12 @@ define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
|
|||
;CHECK-LABEL: stack_fold_subss_int
|
||||
;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1)
|
||||
ret <4 x float> %2
|
||||
%2 = extractelement <4 x float> %a0, i32 0
|
||||
%3 = extractelement <4 x float> %a1, i32 0
|
||||
%4 = fsub float %2, %3
|
||||
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define i32 @stack_fold_ucomisd(double %a0, double %a1) {
|
||||
;CHECK-LABEL: stack_fold_ucomisd
|
||||
|
|
|
@ -115,28 +115,31 @@ define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
|
|||
;CHECK-LABEL: stack_fold_pabsb
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp sgt <16 x i8> %a0, zeroinitializer
|
||||
%3 = sub <16 x i8> zeroinitializer, %a0
|
||||
%4 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %3
|
||||
ret <16 x i8> %4
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsd
|
||||
;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp sgt <4 x i32> %a0, zeroinitializer
|
||||
%3 = sub <4 x i32> zeroinitializer, %a0
|
||||
%4 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %3
|
||||
ret <4 x i32> %4
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsw
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp sgt <8 x i16> %a0, zeroinitializer
|
||||
%3 = sub <8 x i16> zeroinitializer, %a0
|
||||
%4 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %3
|
||||
ret <8 x i16> %4
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_packssdw
|
||||
|
@ -310,10 +313,9 @@ define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pblendw
|
||||
;CHECK: vpblendw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7)
|
||||
%2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pclmulqdq
|
||||
|
@ -573,107 +575,108 @@ define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmaxsb
|
||||
;CHECK: vpmaxsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp sgt <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsd
|
||||
;CHECK: vpmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp sgt <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsw
|
||||
;CHECK: vpmaxsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp sgt <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxub
|
||||
;CHECK: vpmaxub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp ugt <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxud
|
||||
;CHECK: vpmaxud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp ugt <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuw
|
||||
;CHECK: vpmaxuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp ugt <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsb
|
||||
;CHECK: vpminsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp slt <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsd
|
||||
;CHECK: vpminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp slt <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsw
|
||||
;CHECK: vpminsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp slt <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminub
|
||||
;CHECK: vpminub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp ult <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminud
|
||||
;CHECK: vpminud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp ult <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminuw
|
||||
;CHECK: vpminuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp ult <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
|
@ -681,10 +684,15 @@ define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmuldq
|
||||
;CHECK: vpmuldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <2 x i64> %2
|
||||
%2 = bitcast <4 x i32> %a0 to <2 x i64>
|
||||
%3 = bitcast <4 x i32> %a1 to <2 x i64>
|
||||
%4 = shl <2 x i64> %2, <i64 32, i64 32>
|
||||
%5 = ashr <2 x i64> %4, <i64 32, i64 32>
|
||||
%6 = shl <2 x i64> %3, <i64 32, i64 32>
|
||||
%7 = ashr <2 x i64> %6, <i64 32, i64 32>
|
||||
%8 = mul <2 x i64> %5, %7
|
||||
ret <2 x i64> %8
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmulhrsw
|
||||
|
@ -733,10 +741,13 @@ define <2 x i64> @stack_fold_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmuludq
|
||||
;CHECK: vpmuludq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <2 x i64> %2
|
||||
%2 = bitcast <4 x i32> %a0 to <2 x i64>
|
||||
%3 = bitcast <4 x i32> %a1 to <2 x i64>
|
||||
%4 = and <2 x i64> %2, <i64 4294967295, i64 4294967295>
|
||||
%5 = and <2 x i64> %3, <i64 4294967295, i64 4294967295>
|
||||
%6 = mul <2 x i64> %4, %5
|
||||
ret <2 x i64> %6
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_por(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_por
|
||||
|
|
|
@ -71,28 +71,31 @@ define <32 x i8> @stack_fold_pabsb(<32 x i8> %a0) {
|
|||
;CHECK-LABEL: stack_fold_pabsb
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
|
||||
ret <32 x i8> %2
|
||||
%2 = icmp sgt <32 x i8> %a0, zeroinitializer
|
||||
%3 = sub <32 x i8> zeroinitializer, %a0
|
||||
%4 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %3
|
||||
ret <32 x i8> %4
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pabsd(<8 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsd
|
||||
;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
|
||||
ret <8 x i32> %2
|
||||
%2 = icmp sgt <8 x i32> %a0, zeroinitializer
|
||||
%3 = sub <8 x i32> zeroinitializer, %a0
|
||||
%4 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %3
|
||||
ret <8 x i32> %4
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pabsw(<16 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsw
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
|
||||
ret <16 x i16> %2
|
||||
%2 = icmp sgt <16 x i16> %a0, zeroinitializer
|
||||
%3 = sub <16 x i16> zeroinitializer, %a0
|
||||
%4 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %3
|
||||
ret <16 x i16> %4
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_packssdw
|
||||
|
@ -286,10 +289,9 @@ define <16 x i16> @stack_fold_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pblendw
|
||||
;CHECK: vpblendw $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7)
|
||||
%2 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <16 x i16> %2
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pbroadcastb(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pbroadcastb
|
||||
|
@ -561,109 +563,109 @@ define <32 x i8> @stack_fold_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmaxsb
|
||||
;CHECK: vpmaxsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
%2 = icmp sgt <32 x i8> %a0, %a1
|
||||
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||||
ret <32 x i8> %3
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsd
|
||||
;CHECK: vpmaxsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
%2 = icmp sgt <8 x i32> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||||
ret <8 x i32> %3
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pmaxsw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsw
|
||||
;CHECK: vpmaxsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
%2 = icmp sgt <16 x i16> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||||
ret <16 x i16> %3
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pmaxub(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxub
|
||||
;CHECK: vpmaxub {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
%2 = icmp ugt <32 x i8> %a0, %a1
|
||||
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||||
ret <32 x i8> %3
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pmaxud(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxud
|
||||
;CHECK: vpmaxud {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
%2 = icmp ugt <8 x i32> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||||
ret <8 x i32> %3
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuw
|
||||
;CHECK: vpmaxuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
%2 = icmp ugt <16 x i16> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||||
ret <16 x i16> %3
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pminsb(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsb
|
||||
;CHECK: vpminsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
%2 = icmp slt <32 x i8> %a0, %a1
|
||||
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||||
ret <32 x i8> %3
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pminsd(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsd
|
||||
;CHECK: vpminsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
%2 = icmp slt <8 x i32> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||||
ret <8 x i32> %3
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pminsw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsw
|
||||
;CHECK: vpminsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
%2 = icmp slt <16 x i16> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||||
ret <16 x i16> %3
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pminub(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminub
|
||||
;CHECK: vpminub {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
%2 = icmp ult <32 x i8> %a0, %a1
|
||||
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||||
ret <32 x i8> %3
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pminud(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminud
|
||||
;CHECK: vpminud {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
%2 = icmp ult <8 x i32> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||||
ret <8 x i32> %3
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminuw
|
||||
;CHECK: vpminuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
%2 = icmp ult <16 x i16> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||||
ret <16 x i16> %3
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxbd
|
||||
|
@ -771,10 +773,15 @@ define <4 x i64> @stack_fold_pmuldq(<8 x i32> %a0, <8 x i32> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmuldq
|
||||
;CHECK: vpmuldq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <4 x i64> %2
|
||||
%2 = bitcast <8 x i32> %a0 to <4 x i64>
|
||||
%3 = bitcast <8 x i32> %a1 to <4 x i64>
|
||||
%4 = shl <4 x i64> %2, <i64 32, i64 32, i64 32, i64 32>
|
||||
%5 = ashr <4 x i64> %4, <i64 32, i64 32, i64 32, i64 32>
|
||||
%6 = shl <4 x i64> %3, <i64 32, i64 32, i64 32, i64 32>
|
||||
%7 = ashr <4 x i64> %6, <i64 32, i64 32, i64 32, i64 32>
|
||||
%8 = mul <4 x i64> %5, %7
|
||||
ret <4 x i64> %8
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmulhrsw
|
||||
|
@ -823,10 +830,13 @@ define <4 x i64> @stack_fold_pmuludq(<8 x i32> %a0, <8 x i32> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmuludq
|
||||
;CHECK: vpmuludq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <4 x i64> %2
|
||||
%2 = bitcast <8 x i32> %a0 to <4 x i64>
|
||||
%3 = bitcast <8 x i32> %a1 to <4 x i64>
|
||||
%4 = and <4 x i64> %2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
|
||||
%5 = and <4 x i64> %3, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
|
||||
%6 = mul <4 x i64> %4, %5
|
||||
ret <4 x i64> %6
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_por(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_por
|
||||
|
|
|
@ -218,8 +218,10 @@ define <64 x i8> @stack_fold_pabsb(<64 x i8> %a0) {
|
|||
;CHECK-LABEL: stack_fold_pabsb
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %a0, <64 x i8> undef, i64 -1)
|
||||
ret <64 x i8> %2
|
||||
%2 = icmp sgt <64 x i8> %a0, zeroinitializer
|
||||
%3 = sub <64 x i8> zeroinitializer, %a0
|
||||
%4 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %3
|
||||
ret <64 x i8> %4
|
||||
}
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) nounwind readnone
|
||||
|
||||
|
@ -227,145 +229,184 @@ define <64 x i8> @stack_fold_pabsb_mask(<64 x i8> %passthru, <64 x i8> %a0, i64
|
|||
;CHECK-LABEL: stack_fold_pabsb_mask
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask)
|
||||
ret <64 x i8> %2
|
||||
%2 = icmp sgt <64 x i8> %a0, zeroinitializer
|
||||
%3 = sub <64 x i8> zeroinitializer, %a0
|
||||
%4 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %3
|
||||
%5 = bitcast i64 %mask to <64 x i1>
|
||||
%6 = select <64 x i1> %5, <64 x i8> %4, <64 x i8> %passthru
|
||||
ret <64 x i8> %6
|
||||
}
|
||||
|
||||
define <64 x i8> @stack_fold_pabsb_maskz(<64 x i8> %a0, i64 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsb_maskz
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %a0, <64 x i8> zeroinitializer, i64 %mask)
|
||||
ret <64 x i8> %2
|
||||
%2 = icmp sgt <64 x i8> %a0, zeroinitializer
|
||||
%3 = sub <64 x i8> zeroinitializer, %a0
|
||||
%4 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %3
|
||||
%5 = bitcast i64 %mask to <64 x i1>
|
||||
%6 = select <64 x i1> %5, <64 x i8> %4, <64 x i8> zeroinitializer
|
||||
ret <64 x i8> %6
|
||||
}
|
||||
|
||||
define <16 x i32> @stack_fold_pabsd(<16 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsd
|
||||
;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a0, <16 x i32> undef, i16 -1)
|
||||
ret <16 x i32> %2
|
||||
%2 = icmp sgt <16 x i32> %a0, zeroinitializer
|
||||
%3 = sub <16 x i32> zeroinitializer, %a0
|
||||
%4 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %3
|
||||
ret <16 x i32> %4
|
||||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) nounwind readnone
|
||||
|
||||
define <16 x i32> @stack_fold_pabsd_mask(<16 x i32> %passthru, <16 x i32> %a0, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsd_mask
|
||||
;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask)
|
||||
ret <16 x i32> %2
|
||||
%2 = icmp sgt <16 x i32> %a0, zeroinitializer
|
||||
%3 = sub <16 x i32> zeroinitializer, %a0
|
||||
%4 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %3
|
||||
%5 = bitcast i16 %mask to <16 x i1>
|
||||
%6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> %passthru
|
||||
ret <16 x i32> %6
|
||||
}
|
||||
|
||||
define <16 x i32> @stack_fold_pabsd_maskz(<16 x i32> %a0, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsd_maskz
|
||||
;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a0, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret <16 x i32> %2
|
||||
%2 = icmp sgt <16 x i32> %a0, zeroinitializer
|
||||
%3 = sub <16 x i32> zeroinitializer, %a0
|
||||
%4 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %3
|
||||
%5 = bitcast i16 %mask to <16 x i1>
|
||||
%6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
|
||||
ret <16 x i32> %6
|
||||
}
|
||||
|
||||
define <8 x i64> @stack_fold_pabsq(<8 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsq
|
||||
;CHECK: vpabsq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a0, <8 x i64> undef, i8 -1)
|
||||
ret <8 x i64> %2
|
||||
%2 = icmp sgt <8 x i64> %a0, zeroinitializer
|
||||
%3 = sub <8 x i64> zeroinitializer, %a0
|
||||
%4 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %3
|
||||
ret <8 x i64> %4
|
||||
}
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) nounwind readnone
|
||||
|
||||
define <8 x i64> @stack_fold_pabsq_mask(<8 x i64> %passthru, <8 x i64> %a0, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsq_mask
|
||||
;CHECK: vpabsq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask)
|
||||
ret <8 x i64> %2
|
||||
%2 = icmp sgt <8 x i64> %a0, zeroinitializer
|
||||
%3 = sub <8 x i64> zeroinitializer, %a0
|
||||
%4 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %3
|
||||
%5 = bitcast i8 %mask to <8 x i1>
|
||||
%6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> %passthru
|
||||
ret <8 x i64> %6
|
||||
}
|
||||
|
||||
define <8 x i64> @stack_fold_pabsq_maskz(<8 x i64> %a0, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsq_maskz
|
||||
;CHECK: vpabsq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a0, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret <8 x i64> %2
|
||||
%2 = icmp sgt <8 x i64> %a0, zeroinitializer
|
||||
%3 = sub <8 x i64> zeroinitializer, %a0
|
||||
%4 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %3
|
||||
%5 = bitcast i8 %mask to <8 x i1>
|
||||
%6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
|
||||
ret <8 x i64> %6
|
||||
}
|
||||
|
||||
define <32 x i16> @stack_fold_pabsw(<32 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsw
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %a0, <32 x i16> undef, i32 -1)
|
||||
ret <32 x i16> %2
|
||||
%2 = icmp sgt <32 x i16> %a0, zeroinitializer
|
||||
%3 = sub <32 x i16> zeroinitializer, %a0
|
||||
%4 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %3
|
||||
ret <32 x i16> %4
|
||||
}
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) nounwind readnone
|
||||
|
||||
define <32 x i16> @stack_fold_pabsw_mask(<32 x i16> %passthru, <32 x i16> %a0, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsw_mask
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask)
|
||||
ret <32 x i16> %2
|
||||
%2 = icmp sgt <32 x i16> %a0, zeroinitializer
|
||||
%3 = sub <32 x i16> zeroinitializer, %a0
|
||||
%4 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %3
|
||||
%5 = bitcast i32 %mask to <32 x i1>
|
||||
%6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> %passthru
|
||||
ret <32 x i16> %6
|
||||
}
|
||||
|
||||
define <32 x i16> @stack_fold_pabsw_maskz(<32 x i16> %a0, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsw_maskz
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %a0, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %2
|
||||
%2 = icmp sgt <32 x i16> %a0, zeroinitializer
|
||||
%3 = sub <32 x i16> zeroinitializer, %a0
|
||||
%4 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %3
|
||||
%5 = bitcast i32 %mask to <32 x i1>
|
||||
%6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
|
||||
ret <32 x i16> %6
|
||||
}
|
||||
|
||||
define <32 x i16> @stack_fold_packssdw(<16 x i32> %a0, <16 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_packssdw
|
||||
;CHECK: vpackssdw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a0, <16 x i32> %a1, <32 x i16> undef, i32 -1)
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a0, <16 x i32> %a1)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) nounwind readnone
|
||||
declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) nounwind readnone
|
||||
|
||||
define <64 x i8> @stack_fold_packsswb(<32 x i16> %a0, <32 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_packsswb
|
||||
;CHECK: vpacksswb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a0, <32 x i16> %a1, <64 x i8> undef, i64 -1)
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a0, <32 x i16> %a1)
|
||||
ret <64 x i8> %2
|
||||
}
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) nounwind readnone
|
||||
declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) nounwind readnone
|
||||
|
||||
define <32 x i16> @stack_fold_packusdw(<16 x i32> %a0, <16 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_packusdw
|
||||
;CHECK: vpackusdw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a0, <16 x i32> %a1, <32 x i16> undef, i32 -1)
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a0, <16 x i32> %a1)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) nounwind readnone
|
||||
declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) nounwind readnone
|
||||
|
||||
define <32 x i16> @stack_fold_packusdw_mask(<32 x i16>* %passthru, <16 x i32> %a0, <16 x i32> %a1, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_packusdw_mask
|
||||
;CHECK: vpackusdw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = load <32 x i16>, <32 x i16>* %passthru
|
||||
%3 = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a0, <16 x i32> %a1, <32 x i16> %2, i32 %mask)
|
||||
ret <32 x i16> %3
|
||||
%3 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a0, <16 x i32> %a1)
|
||||
%4 = bitcast i32 %mask to <32 x i1>
|
||||
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %2
|
||||
ret <32 x i16> %5
|
||||
}
|
||||
|
||||
define <32 x i16> @stack_fold_packusdw_maskz(<16 x i32> %a0, <16 x i32> %a1, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_packusdw_maskz
|
||||
;CHECK: vpackusdw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a0, <16 x i32> %a1, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %2
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a0, <16 x i32> %a1)
|
||||
%3 = bitcast i32 %mask to <32 x i1>
|
||||
%4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
|
||||
ret <32 x i16> %4
|
||||
}
|
||||
|
||||
define <64 x i8> @stack_fold_packuswb(<32 x i16> %a0, <32 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_packuswb
|
||||
;CHECK: vpackuswb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a0, <32 x i16> %a1, <64 x i8> undef, i64 -1)
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a0, <32 x i16> %a1)
|
||||
ret <64 x i8> %2
|
||||
}
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) nounwind readnone
|
||||
declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) nounwind readnone
|
||||
|
||||
define <64 x i8> @stack_fold_paddb(<64 x i8> %a0, <64 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_paddb
|
||||
|
@ -641,37 +682,33 @@ define <64 x i8> @stack_fold_vpermi2b(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x
|
|||
;CHECK-LABEL: stack_fold_vpermi2b
|
||||
;CHECK: vpermi2b {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
||||
ret <64 x i8> %res
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x1, <64 x i8> %x0, <64 x i8> %x2)
|
||||
ret <64 x i8> %2
|
||||
}
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define <16 x i32> @stack_fold_vpermi2d(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2d
|
||||
;CHECK: vpermi2d {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
|
||||
ret <16 x i32> %res
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
|
||||
ret <16 x i32> %2
|
||||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <8 x i64> @stack_fold_vpermi2q(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2q
|
||||
;CHECK: vpermi2q {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2)
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <32 x i16> @stack_fold_vpermi2w(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2w
|
||||
;CHECK: vpermi2w {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <8 x i64> @stack_fold_permq(<8 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_permq
|
||||
|
@ -736,37 +773,37 @@ define <64 x i8> @stack_fold_vpermt2b(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x
|
|||
;CHECK-LABEL: stack_fold_vpermt2b
|
||||
;CHECK: vpermt2b {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
||||
ret <64 x i8> %res
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2)
|
||||
ret <64 x i8> %2
|
||||
}
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
declare <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>)
|
||||
|
||||
define <16 x i32> @stack_fold_vpermt2d(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2d
|
||||
;CHECK: vpermt2d {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
|
||||
ret <16 x i32> %res
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
|
||||
ret <16 x i32> %2
|
||||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
declare <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>)
|
||||
|
||||
define <8 x i64> @stack_fold_vpermt2q(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2q
|
||||
;CHECK: vpermt2q {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
declare <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>)
|
||||
|
||||
define <32 x i16> @stack_fold_vpermt2w(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2w
|
||||
;CHECK: vpermt2w {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>)
|
||||
|
||||
define <32 x i16> @stack_fold_permwvar(<32 x i16> %a0, <32 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permwvar
|
||||
|
@ -873,16 +910,16 @@ define <32 x i16> @stack_fold_pmaddubsw_zmm(<64 x i8> %a0, <64 x i8> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmaddubsw_zmm
|
||||
;CHECK: vpmaddubsw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %a0, <64 x i8> %a1, <32 x i16> undef, i32 -1)
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %a0, <64 x i8> %a1)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) nounwind readnone
|
||||
declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i16> @stack_fold_pmaddubsw_zmm_mask(<32 x i16>* %passthru, <64 x i8> %a0, <64 x i8> %a1, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmaddubsw_zmm_mask
|
||||
;CHECK: vpmaddubsw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %a0, <64 x i8> %a1, <32 x i16> undef, i32 -1)
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %a0, <64 x i8> %a1)
|
||||
%3 = bitcast i32 %mask to <32 x i1>
|
||||
; load needed to keep the operation from being scheduled about the asm block
|
||||
%4 = load <32 x i16>, <32 x i16>* %passthru
|
||||
|
@ -894,7 +931,7 @@ define <32 x i16> @stack_fold_pmaddubsw_zmm_maskz(<64 x i8> %a0, <64 x i8> %a1,
|
|||
;CHECK-LABEL: stack_fold_pmaddubsw_zmm_maskz
|
||||
;CHECK: vpmaddubsw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %a0, <64 x i8> %a1, <32 x i16> undef, i32 -1)
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %a0, <64 x i8> %a1)
|
||||
%3 = bitcast i32 %mask to <32 x i1>
|
||||
%4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
|
||||
ret <32 x i16> %4
|
||||
|
@ -904,16 +941,16 @@ define <16 x i32> @stack_fold_pmaddwd_zmm(<32 x i16> %a0, <32 x i16> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmaddwd_zmm
|
||||
;CHECK: vpmaddwd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %a0, <32 x i16> %a1, <16 x i32> undef, i16 -1)
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a0, <32 x i16> %a1)
|
||||
ret <16 x i32> %2
|
||||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16) nounwind readnone
|
||||
declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i32> @stack_fold_pmaddwd_zmm_mask(<16 x i32>* %passthru, <32 x i16> %a0, <32 x i16> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmaddwd_zmm_mask
|
||||
;CHECK: vpmaddwd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %a0, <32 x i16> %a1, <16 x i32> undef, i16 -1)
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a0, <32 x i16> %a1)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
; load needed to keep the operation from being scheduled about the asm block
|
||||
%4 = load <16 x i32>, <16 x i32>* %passthru
|
||||
|
@ -925,7 +962,7 @@ define <16 x i32> @stack_fold_pmaddwd_zmm_maskz(<16 x i32>* %passthru, <32 x i16
|
|||
;CHECK-LABEL: stack_fold_pmaddwd_zmm_maskz
|
||||
;CHECK: vpmaddwd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %a0, <32 x i16> %a1, <16 x i32> undef, i16 -1)
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a0, <32 x i16> %a1)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
%4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
|
||||
ret <16 x i32> %4
|
||||
|
@ -1245,26 +1282,30 @@ define <64 x i8> @stack_fold_pshufb_zmm(<64 x i8> %a0, <64 x i8> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pshufb_zmm
|
||||
;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> undef, i64 -1)
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %a0, <64 x i8> %a1)
|
||||
ret <64 x i8> %2
|
||||
}
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
|
||||
|
||||
define <64 x i8> @stack_fold_pshufb_zmm_mask(<64 x i8>* %passthru, <64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pshufb_zmm_mask
|
||||
;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = load <64 x i8>, <64 x i8>* %passthru
|
||||
%3 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %2, i64 %mask)
|
||||
ret <64 x i8> %3
|
||||
%3 = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %a0, <64 x i8> %a1)
|
||||
%4 = bitcast i64 %mask to <64 x i1>
|
||||
%5 = select <64 x i1> %4, <64 x i8> %3, <64 x i8> %2
|
||||
ret <64 x i8> %5
|
||||
}
|
||||
|
||||
define <64 x i8> @stack_fold_pshufb_zmm_maskz(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pshufb_zmm_maskz
|
||||
;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> zeroinitializer, i64 %mask)
|
||||
ret <64 x i8> %2
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %a0, <64 x i8> %a1)
|
||||
%3 = bitcast i64 %mask to <64 x i1>
|
||||
%4 = select <64 x i1> %3, <64 x i8> %2, <64 x i8> zeroinitializer
|
||||
ret <64 x i8> %4
|
||||
}
|
||||
|
||||
define <16 x i32> @stack_fold_pshufd_zmm(<16 x i32> %a0) {
|
||||
|
|
|
@ -177,73 +177,81 @@ define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
|
|||
;CHECK-LABEL: stack_fold_pabsb
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp sgt <16 x i8> %a0, zeroinitializer
|
||||
%3 = sub <16 x i8> zeroinitializer, %a0
|
||||
%4 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %3
|
||||
ret <16 x i8> %4
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pabsb_ymm(<32 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsb_ymm
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
|
||||
ret <32 x i8> %2
|
||||
%2 = icmp sgt <32 x i8> %a0, zeroinitializer
|
||||
%3 = sub <32 x i8> zeroinitializer, %a0
|
||||
%4 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %3
|
||||
ret <32 x i8> %4
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsd
|
||||
;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp sgt <4 x i32> %a0, zeroinitializer
|
||||
%3 = sub <4 x i32> zeroinitializer, %a0
|
||||
%4 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %3
|
||||
ret <4 x i32> %4
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pabsd_ymm(<8 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsd_ymm
|
||||
;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
|
||||
ret <8 x i32> %2
|
||||
%2 = icmp sgt <8 x i32> %a0, zeroinitializer
|
||||
%3 = sub <8 x i32> zeroinitializer, %a0
|
||||
%4 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %3
|
||||
ret <8 x i32> %4
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pabsq(<2 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsq
|
||||
;CHECK: vpabsq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %a0, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %2
|
||||
%2 = icmp sgt <2 x i64> %a0, zeroinitializer
|
||||
%3 = sub <2 x i64> zeroinitializer, %a0
|
||||
%4 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %3
|
||||
ret <2 x i64> %4
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pabsq_ymm(<4 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsq_ymm
|
||||
;CHECK: vpabsq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %a0, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %2
|
||||
%2 = icmp sgt <4 x i64> %a0, zeroinitializer
|
||||
%3 = sub <4 x i64> zeroinitializer, %a0
|
||||
%4 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %3
|
||||
ret <4 x i64> %4
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsw
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp sgt <8 x i16> %a0, zeroinitializer
|
||||
%3 = sub <8 x i16> zeroinitializer, %a0
|
||||
%4 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %3
|
||||
ret <8 x i16> %4
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pabsw_ymm(<16 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsw_ymm
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
|
||||
ret <16 x i16> %2
|
||||
%2 = icmp sgt <16 x i16> %a0, zeroinitializer
|
||||
%3 = sub <16 x i16> zeroinitializer, %a0
|
||||
%4 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %3
|
||||
ret <16 x i16> %4
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_packssdw
|
||||
|
@ -590,73 +598,65 @@ define <16 x i8> @stack_fold_vpermi2b(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x
|
|||
;CHECK-LABEL: stack_fold_vpermi2b
|
||||
;CHECK: vpermi2b {{-?[0-9]*}}(%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
|
||||
ret <16 x i8> %res
|
||||
%2 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x1, <16 x i8> %x0, <16 x i8> %x2)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define <32 x i8> @stack_fold_vpermi2b_ymm(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2b_ymm
|
||||
;CHECK: vpermi2b {{-?[0-9]*}}(%rsp), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
%1 = tail call <4 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
|
||||
ret <32 x i8> %res
|
||||
%2 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x1, <32 x i8> %x0, <32 x i8> %x2)
|
||||
ret <32 x i8> %2
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define <4 x i32> @stack_fold_vpermi2d(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2d
|
||||
;CHECK: vpermi2d {{-?[0-9]*}}(%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
%2 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define <8 x i32> @stack_fold_vpermi2d_ymm(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2d_ymm
|
||||
;CHECK: vpermi2d {{-?[0-9]*}}(%rsp), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
%1 = tail call <4 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
%2 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define <2 x i64> @stack_fold_vpermi2q(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2q
|
||||
;CHECK: vpermi2q {{-?[0-9]*}}(%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
|
||||
|
||||
define <4 x i64> @stack_fold_vpermi2q_ymm(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2q_ymm
|
||||
;CHECK: vpermi2q {{-?[0-9]*}}(%rsp), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
%1 = tail call <4 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
|
||||
ret <4 x i64> %res
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define <8 x i16> @stack_fold_vpermi2w(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2w
|
||||
;CHECK: vpermi2w {{-?[0-9]*}}(%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
%2 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <16 x i16> @stack_fold_vpermi2w_ymm(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2w_ymm
|
||||
;CHECK: vpermi2w {{-?[0-9]*}}(%rsp), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
%1 = tail call <4 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
%2 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
|
||||
ret <16 x i16> %2
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <4 x i64> @stack_fold_permq(<4 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_permq
|
||||
|
@ -683,73 +683,73 @@ define <16 x i8> @stack_fold_vpermt2b(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x
|
|||
;CHECK-LABEL: stack_fold_vpermt2b
|
||||
;CHECK: vpermt2b {{-?[0-9]*}}(%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
|
||||
ret <16 x i8> %res
|
||||
%2 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
declare <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
|
||||
define <32 x i8> @stack_fold_vpermt2b_ymm(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2b_ymm
|
||||
;CHECK: vpermt2b {{-?[0-9]*}}(%rsp), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
%1 = tail call <4 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
|
||||
ret <32 x i8> %res
|
||||
%2 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2)
|
||||
ret <32 x i8> %2
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
declare <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>)
|
||||
|
||||
define <4 x i32> @stack_fold_vpermt2d(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2d
|
||||
;CHECK: vpermt2d {{-?[0-9]*}}(%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
%2 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
|
||||
declare <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
|
||||
define <8 x i32> @stack_fold_vpermt2d_ymm(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2d_ymm
|
||||
;CHECK: vpermt2d {{-?[0-9]*}}(%rsp), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
%1 = tail call <4 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
%2 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
declare <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>)
|
||||
|
||||
define <2 x i64> @stack_fold_vpermt2q(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2q
|
||||
;CHECK: vpermt2q {{-?[0-9]*}}(%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
|
||||
declare <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
|
||||
define <4 x i64> @stack_fold_vpermt2q_ymm(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2q_ymm
|
||||
;CHECK: vpermt2q {{-?[0-9]*}}(%rsp), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
%1 = tail call <4 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
|
||||
ret <4 x i64> %res
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
declare <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>)
|
||||
|
||||
define <8 x i16> @stack_fold_vpermt2w(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2w
|
||||
;CHECK: vpermt2w {{-?[0-9]*}}(%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
%2 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>)
|
||||
|
||||
define <16 x i16> @stack_fold_vpermt2w_ymm(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2w_ymm
|
||||
;CHECK: vpermt2w {{-?[0-9]*}}(%rsp), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
%1 = tail call <4 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
%2 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
|
||||
ret <16 x i16> %2
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>)
|
||||
|
||||
define <16 x i16> @stack_fold_permwvar(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permwvar
|
||||
|
@ -928,177 +928,194 @@ define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmaxsb
|
||||
;CHECK: vpmaxsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp sgt <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pmaxsb_ymm(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsb_ymm
|
||||
;CHECK: vpmaxsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
%2 = icmp sgt <32 x i8> %a0, %a1
|
||||
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||||
ret <32 x i8> %3
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsd
|
||||
;CHECK: vpmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp sgt <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pmaxsd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsd_ymm
|
||||
;CHECK: vpmaxsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
%2 = icmp sgt <8 x i32> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||||
ret <8 x i32> %3
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmaxsq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsq
|
||||
;CHECK: vpmaxsq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %2
|
||||
%2 = icmp sgt <2 x i64> %a0, %a1
|
||||
%3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %a1
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pmaxsq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsq_ymm
|
||||
;CHECK: vpmaxsq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %2
|
||||
%2 = icmp sgt <4 x i64> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %a1
|
||||
ret <4 x i64> %3
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsw
|
||||
;CHECK: vpmaxsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp sgt <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pmaxsw_ymm(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsw_ymm
|
||||
;CHECK: vpmaxsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
%2 = icmp sgt <16 x i16> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||||
ret <16 x i16> %3
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxub
|
||||
;CHECK: vpmaxub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp ugt <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pmaxub_ymm(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxub_ymm
|
||||
;CHECK: vpmaxub {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
%2 = icmp ugt <32 x i8> %a0, %a1
|
||||
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||||
ret <32 x i8> %3
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxud
|
||||
;CHECK: vpmaxud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp ugt <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pmaxud_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxud_ymm
|
||||
;CHECK: vpmaxud {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
%2 = icmp ugt <8 x i32> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||||
ret <8 x i32> %3
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmaxuq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %2
|
||||
%2 = icmp ugt <2 x i64> %a0, %a1
|
||||
%3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %a1
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmaxuq_mask(<2 x i64>* %passthru, <2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq_mask
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = load <2 x i64>, <2 x i64>* %passthru
|
||||
%3 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %2, i8 %mask)
|
||||
ret <2 x i64> %3
|
||||
%3 = icmp ugt <2 x i64> %a0, %a1
|
||||
%4 = select <2 x i1> %3, <2 x i64> %a0, <2 x i64> %a1
|
||||
%5 = bitcast i8 %mask to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
|
||||
%6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> %2
|
||||
ret <2 x i64> %6
|
||||
}
|
||||
|
||||
define <2 x i64> @stack_fold_pmaxuq_maskz(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq_maskz
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> zeroinitializer, i8 %mask)
|
||||
ret <2 x i64> %2
|
||||
%2 = icmp ugt <2 x i64> %a0, %a1
|
||||
%3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %a1
|
||||
%4 = bitcast i8 %mask to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %4, <8 x i1> %4, <2 x i32> <i32 0, i32 1>
|
||||
%5 = select <2 x i1> %extract, <2 x i64> %3, <2 x i64> zeroinitializer
|
||||
ret <2 x i64> %5
|
||||
}
|
||||
|
||||
define <4 x i64> @stack_fold_pmaxuq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq_ymm
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %2
|
||||
%2 = icmp ugt <4 x i64> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %a1
|
||||
ret <4 x i64> %3
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pmaxuq_ymm_mask(<4 x i64>* %passthru, <4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq_ymm_mask
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = load <4 x i64>, <4 x i64>* %passthru
|
||||
%3 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %2, i8 %mask)
|
||||
ret <4 x i64> %3
|
||||
%3 = icmp ugt <4 x i64> %a0, %a1
|
||||
%4 = select <4 x i1> %3, <4 x i64> %a0, <4 x i64> %a1
|
||||
%5 = bitcast i8 %mask to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> %2
|
||||
ret <4 x i64> %6
|
||||
}
|
||||
|
||||
define <4 x i64> @stack_fold_pmaxuq_ymm_maskz(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq_ymm_maskz
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> zeroinitializer, i8 %mask)
|
||||
ret <4 x i64> %2
|
||||
%2 = icmp ugt <4 x i64> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %a1
|
||||
%4 = bitcast i8 %mask to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%5 = select <4 x i1> %extract, <4 x i64> %3, <4 x i64> zeroinitializer
|
||||
ret <4 x i64> %5
|
||||
}
|
||||
|
||||
define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuw
|
||||
;CHECK: vpmaxuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp ugt <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pmaxuw_ymm(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuw_ymm
|
||||
;CHECK: vpmaxuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
%2 = icmp ugt <16 x i16> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||||
ret <16 x i16> %3
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
|
@ -1106,145 +1123,145 @@ define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pminsb
|
||||
;CHECK: vpminsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp slt <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pminsb_ymm(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsb_ymm
|
||||
;CHECK: vpminsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
%2 = icmp slt <32 x i8> %a0, %a1
|
||||
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||||
ret <32 x i8> %3
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsd
|
||||
;CHECK: vpminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp slt <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pminsd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsd_ymm
|
||||
;CHECK: vpminsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
%2 = icmp slt <8 x i32> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||||
ret <8 x i32> %3
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pminsq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsq
|
||||
;CHECK: vpminsq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %2
|
||||
%2 = icmp slt <2 x i64> %a0, %a1
|
||||
%3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %a1
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pminsq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsq_ymm
|
||||
;CHECK: vpminsq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %2
|
||||
%2 = icmp slt <4 x i64> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %a1
|
||||
ret <4 x i64> %3
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsw
|
||||
;CHECK: vpminsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp slt <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pminsw_ymm(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsw_ymm
|
||||
;CHECK: vpminsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
%2 = icmp slt <16 x i16> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||||
ret <16 x i16> %3
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminub
|
||||
;CHECK: vpminub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp ult <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pminub_ymm(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminub_ymm
|
||||
;CHECK: vpminub {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
%2 = icmp ult <32 x i8> %a0, %a1
|
||||
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||||
ret <32 x i8> %3
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminud
|
||||
;CHECK: vpminud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp ult <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pminud_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminud_ymm
|
||||
;CHECK: vpminud {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
%2 = icmp ult <8 x i32> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||||
ret <8 x i32> %3
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pminuq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminuq
|
||||
;CHECK: vpminuq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %2
|
||||
%2 = icmp ult <2 x i64> %a0, %a1
|
||||
%3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %a1
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pminuq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminuq_ymm
|
||||
;CHECK: vpminuq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %2
|
||||
%2 = icmp ult <4 x i64> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %a1
|
||||
ret <4 x i64> %3
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminuw
|
||||
;CHECK: vpminuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp ult <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pminuw_ymm(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminuw_ymm
|
||||
;CHECK: vpminuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
%2 = icmp ult <16 x i16> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||||
ret <16 x i16> %3
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_vpmovdw(<8 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_vpmovdw
|
||||
|
@ -1565,59 +1582,83 @@ define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmuldq
|
||||
;CHECK: vpmuldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <2 x i64> %2
|
||||
%2 = bitcast <4 x i32> %a0 to <2 x i64>
|
||||
%3 = bitcast <4 x i32> %a1 to <2 x i64>
|
||||
%4 = shl <2 x i64> %2, <i64 32, i64 32>
|
||||
%5 = ashr <2 x i64> %4, <i64 32, i64 32>
|
||||
%6 = shl <2 x i64> %3, <i64 32, i64 32>
|
||||
%7 = ashr <2 x i64> %6, <i64 32, i64 32>
|
||||
%8 = mul <2 x i64> %5, %7
|
||||
ret <2 x i64> %8
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pmuldq_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmuldq_ymm
|
||||
;CHECK: vpmuldq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <4 x i64> %2
|
||||
%2 = bitcast <8 x i32> %a0 to <4 x i64>
|
||||
%3 = bitcast <8 x i32> %a1 to <4 x i64>
|
||||
%4 = shl <4 x i64> %2, <i64 32, i64 32, i64 32, i64 32>
|
||||
%5 = ashr <4 x i64> %4, <i64 32, i64 32, i64 32, i64 32>
|
||||
%6 = shl <4 x i64> %3, <i64 32, i64 32, i64 32, i64 32>
|
||||
%7 = ashr <4 x i64> %6, <i64 32, i64 32, i64 32, i64 32>
|
||||
%8 = mul <4 x i64> %5, %7
|
||||
ret <4 x i64> %8
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmuludq
|
||||
;CHECK: vpmuludq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <2 x i64> %2
|
||||
%2 = bitcast <4 x i32> %a0 to <2 x i64>
|
||||
%3 = bitcast <4 x i32> %a1 to <2 x i64>
|
||||
%4 = and <2 x i64> %2, <i64 4294967295, i64 4294967295>
|
||||
%5 = and <2 x i64> %3, <i64 4294967295, i64 4294967295>
|
||||
%6 = mul <2 x i64> %4, %5
|
||||
ret <2 x i64> %6
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pmuludq_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmuludq_ymm
|
||||
;CHECK: vpmuludq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <4 x i64> %2
|
||||
%2 = bitcast <8 x i32> %a0 to <4 x i64>
|
||||
%3 = bitcast <8 x i32> %a1 to <4 x i64>
|
||||
%4 = and <4 x i64> %2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
|
||||
%5 = and <4 x i64> %3, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
|
||||
%6 = mul <4 x i64> %4, %5
|
||||
ret <4 x i64> %6
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pmuludq_ymm_mask(<4 x i64>* %passthru, <8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmuludq_ymm_mask
|
||||
;CHECK: vpmuludq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
|
||||
%3 = bitcast i8 %mask to <8 x i1>
|
||||
%4 = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%5 = load <4 x i64>, <4 x i64>* %passthru
|
||||
%6 = select <4 x i1> %4, <4 x i64> %2, <4 x i64> %5
|
||||
ret <4 x i64> %6
|
||||
%2 = bitcast <8 x i32> %a0 to <4 x i64>
|
||||
%3 = bitcast <8 x i32> %a1 to <4 x i64>
|
||||
%4 = and <4 x i64> %2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
|
||||
%5 = and <4 x i64> %3, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
|
||||
%6 = mul <4 x i64> %4, %5
|
||||
%7 = bitcast i8 %mask to <8 x i1>
|
||||
%8 = shufflevector <8 x i1> %7, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%9 = load <4 x i64>, <4 x i64>* %passthru
|
||||
%10 = select <4 x i1> %8, <4 x i64> %6, <4 x i64> %9
|
||||
ret <4 x i64> %10
|
||||
}
|
||||
|
||||
define <4 x i64> @stack_fold_pmuludq_ymm_maskz(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmuludq_ymm_maskz
|
||||
;CHECK: vpmuludq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
|
||||
%3 = bitcast i8 %mask to <8 x i1>
|
||||
%4 = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%5 = select <4 x i1> %4, <4 x i64> %2, <4 x i64> zeroinitializer
|
||||
ret <4 x i64> %5
|
||||
%2 = bitcast <8 x i32> %a0 to <4 x i64>
|
||||
%3 = bitcast <8 x i32> %a1 to <4 x i64>
|
||||
%4 = and <4 x i64> %2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
|
||||
%5 = and <4 x i64> %3, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
|
||||
%6 = mul <4 x i64> %4, %5
|
||||
%7 = bitcast i8 %mask to <8 x i1>
|
||||
%8 = shufflevector <8 x i1> %7, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%9 = select <4 x i1> %8, <4 x i64> %6, <4 x i64> zeroinitializer
|
||||
ret <4 x i64> %9
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_vpopcntd(<4 x i32> %a0) {
|
||||
|
@ -1678,52 +1719,60 @@ define <16 x i8> @stack_fold_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pshufb
|
||||
;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> undef, i16 -1)
|
||||
%2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) nounwind readnone
|
||||
declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pshufb_mask(<16 x i8>* %passthru, <16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pshufb_mask
|
||||
;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = load <16 x i8>, <16 x i8>* %passthru
|
||||
%3 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %2, i16 %mask)
|
||||
ret <16 x i8> %3
|
||||
%3 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
|
||||
%4 = bitcast i16 %mask to <16 x i1>
|
||||
%5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> %2
|
||||
ret <16 x i8> %5
|
||||
}
|
||||
|
||||
define <16 x i8> @stack_fold_pshufb_maskz(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pshufb_maskz
|
||||
;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> zeroinitializer, i16 %mask)
|
||||
ret <16 x i8> %2
|
||||
%2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
%4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer
|
||||
ret <16 x i8> %4
|
||||
}
|
||||
|
||||
define <32 x i8> @stack_fold_pshufb_ymm(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pshufb_ymm
|
||||
;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> undef, i32 -1)
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
|
||||
|
||||
define <32 x i8> @stack_fold_pshufb_ymm_mask(<32 x i8>* %passthru, <32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pshufb_ymm_mask
|
||||
;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = load <32 x i8>, <32 x i8>* %passthru
|
||||
%3 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %2, i32 %mask)
|
||||
ret <32 x i8> %3
|
||||
%3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
%4 = bitcast i32 %mask to <32 x i1>
|
||||
%5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> %2
|
||||
ret <32 x i8> %5
|
||||
}
|
||||
|
||||
define <32 x i8> @stack_fold_pshufb_ymm_maskz(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pshufb_ymm_maskz
|
||||
;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> zeroinitializer, i32 %mask)
|
||||
ret <32 x i8> %2
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
%3 = bitcast i32 %mask to <32 x i1>
|
||||
%4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer
|
||||
ret <32 x i8> %4
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_pshufd(<4 x i32> %a0) {
|
||||
|
|
|
@ -151,28 +151,31 @@ define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
|
|||
;CHECK-LABEL: stack_fold_pabsb
|
||||
;CHECK: pabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp sgt <16 x i8> %a0, zeroinitializer
|
||||
%3 = sub <16 x i8> zeroinitializer, %a0
|
||||
%4 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %3
|
||||
ret <16 x i8> %4
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsd
|
||||
;CHECK: pabsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp sgt <4 x i32> %a0, zeroinitializer
|
||||
%3 = sub <4 x i32> zeroinitializer, %a0
|
||||
%4 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %3
|
||||
ret <4 x i32> %4
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsw
|
||||
;CHECK: pabsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp sgt <8 x i16> %a0, zeroinitializer
|
||||
%3 = sub <8 x i16> zeroinitializer, %a0
|
||||
%4 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %3
|
||||
ret <8 x i16> %4
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_packssdw
|
||||
|
@ -346,10 +349,9 @@ define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pblendw
|
||||
;CHECK: pblendw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7)
|
||||
%2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pclmulqdq
|
||||
|
@ -622,109 +624,109 @@ define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmaxsb
|
||||
;CHECK: pmaxsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp sgt <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsd
|
||||
;CHECK: pmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp sgt <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsw
|
||||
;CHECK: pmaxsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp sgt <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxub
|
||||
;CHECK: pmaxub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp ugt <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxud
|
||||
;CHECK: pmaxud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp ugt <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuw
|
||||
;CHECK: pmaxuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp ugt <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsb
|
||||
;CHECK: pminsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp slt <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsd
|
||||
;CHECK: pminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp slt <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsw
|
||||
;CHECK: pminsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp slt <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminub
|
||||
;CHECK: pminub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
%2 = icmp ult <16 x i8> %a0, %a1
|
||||
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminud
|
||||
;CHECK: pminud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
%2 = icmp ult <4 x i32> %a0, %a1
|
||||
%3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminuw
|
||||
;CHECK: pminuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
%2 = icmp ult <8 x i16> %a0, %a1
|
||||
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxbd
|
||||
|
@ -838,10 +840,15 @@ define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmuldq
|
||||
;CHECK: pmuldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <2 x i64> %2
|
||||
%2 = bitcast <4 x i32> %a0 to <2 x i64>
|
||||
%3 = bitcast <4 x i32> %a1 to <2 x i64>
|
||||
%4 = shl <2 x i64> %2, <i64 32, i64 32>
|
||||
%5 = ashr <2 x i64> %4, <i64 32, i64 32>
|
||||
%6 = shl <2 x i64> %3, <i64 32, i64 32>
|
||||
%7 = ashr <2 x i64> %6, <i64 32, i64 32>
|
||||
%8 = mul <2 x i64> %5, %7
|
||||
ret <2 x i64> %8
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmulhrsw
|
||||
|
@ -890,10 +897,13 @@ define <2 x i64> @stack_fold_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
;CHECK-LABEL: stack_fold_pmuludq
|
||||
;CHECK: pmuludq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <2 x i64> %2
|
||||
%2 = bitcast <4 x i32> %a0 to <2 x i64>
|
||||
%3 = bitcast <4 x i32> %a1 to <2 x i64>
|
||||
%4 = and <2 x i64> %2, <i64 4294967295, i64 4294967295>
|
||||
%5 = and <2 x i64> %3, <i64 4294967295, i64 4294967295>
|
||||
%6 = mul <2 x i64> %4, %5
|
||||
ret <2 x i64> %6
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_por(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_por
|
||||
|
|
Loading…
Reference in New Issue