forked from OSchip/llvm-project
[X86] Correct pattern for VSQRTSSr_Int, VSQRTSDr_Int, VRCPSSr_Int, and VRSQRTSSr_Int to not have an IMPLICIT_DEF on the first input. The semantics of the intrinsic are clear and not undefined.
The intrinsic takes one argument, the lower bits are affected by the operation and the upper bits should be passed through. The instruction itself takes two operands, the high bits of the first operand are passed through and the low bits of the second operand are modified by the operation. To match this to the intrinsic we should pass the single intrinsic input to both operands. I had to remove the stack folding test for these instructions since they depended on the incorrect behavior. The same register is now used for both inputs so the load can't be folded. llvm-svn: 288779
This commit is contained in:
parent
4facc13108
commit
5fc7bc91f9
|
@ -3481,7 +3481,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||||
}
|
}
|
||||||
let Predicates = [HasAVX] in {
|
let Predicates = [HasAVX] in {
|
||||||
def : Pat<(Intr VR128:$src),
|
def : Pat<(Intr VR128:$src),
|
||||||
(!cast<Instruction>("V"#NAME#Suffix##r_Int) (vt (IMPLICIT_DEF)),
|
(!cast<Instruction>("V"#NAME#Suffix##r_Int) VR128:$src,
|
||||||
VR128:$src)>;
|
VR128:$src)>;
|
||||||
}
|
}
|
||||||
let Predicates = [HasAVX, OptForSize] in {
|
let Predicates = [HasAVX, OptForSize] in {
|
||||||
|
|
|
@ -1504,15 +1504,7 @@ define <8 x float> @stack_fold_rcpps_ymm_int(<8 x float> %a0) {
|
||||||
declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
|
||||||
|
|
||||||
; TODO stack_fold_rcpss
|
; TODO stack_fold_rcpss
|
||||||
|
; TODO stack_fold_rcpss_int
|
||||||
define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0) {
|
|
||||||
;CHECK-LABEL: stack_fold_rcpss_int
|
|
||||||
;CHECK: vrcpss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
||||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
||||||
%2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0)
|
|
||||||
ret <4 x float> %2
|
|
||||||
}
|
|
||||||
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
|
||||||
|
|
||||||
define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
|
define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
|
||||||
;CHECK-LABEL: stack_fold_roundpd
|
;CHECK-LABEL: stack_fold_roundpd
|
||||||
|
@ -1609,15 +1601,7 @@ define <8 x float> @stack_fold_rsqrtps_ymm_int(<8 x float> %a0) {
|
||||||
declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
|
||||||
|
|
||||||
; TODO stack_fold_rsqrtss
|
; TODO stack_fold_rsqrtss
|
||||||
|
; TODO stack_fold_rsqrtss_int
|
||||||
define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0) {
|
|
||||||
;CHECK-LABEL: stack_fold_rsqrtss_int
|
|
||||||
;CHECK: vrsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
||||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
||||||
%2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0)
|
|
||||||
ret <4 x float> %2
|
|
||||||
}
|
|
||||||
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
|
||||||
|
|
||||||
define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
|
define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
|
||||||
;CHECK-LABEL: stack_fold_shufpd
|
;CHECK-LABEL: stack_fold_shufpd
|
||||||
|
@ -1696,14 +1680,7 @@ define double @stack_fold_sqrtsd(double %a0) {
|
||||||
}
|
}
|
||||||
declare double @llvm.sqrt.f64(double) nounwind readnone
|
declare double @llvm.sqrt.f64(double) nounwind readnone
|
||||||
|
|
||||||
define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0) {
|
; TODO stack_fold_sqrtsd_int
|
||||||
;CHECK-LABEL: stack_fold_sqrtsd_int
|
|
||||||
;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
||||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
||||||
%2 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
|
|
||||||
ret <2 x double> %2
|
|
||||||
}
|
|
||||||
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
|
||||||
|
|
||||||
define float @stack_fold_sqrtss(float %a0) {
|
define float @stack_fold_sqrtss(float %a0) {
|
||||||
;CHECK-LABEL: stack_fold_sqrtss
|
;CHECK-LABEL: stack_fold_sqrtss
|
||||||
|
@ -1714,14 +1691,7 @@ define float @stack_fold_sqrtss(float %a0) {
|
||||||
}
|
}
|
||||||
declare float @llvm.sqrt.f32(float) nounwind readnone
|
declare float @llvm.sqrt.f32(float) nounwind readnone
|
||||||
|
|
||||||
define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0) {
|
; TODO stack_fold_sqrtss_int
|
||||||
;CHECK-LABEL: stack_fold_sqrtss_int
|
|
||||||
;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
||||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
||||||
%2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
|
|
||||||
ret <4 x float> %2
|
|
||||||
}
|
|
||||||
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|
|
||||||
|
|
||||||
define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
|
define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
|
||||||
;CHECK-LABEL: stack_fold_subpd
|
;CHECK-LABEL: stack_fold_subpd
|
||||||
|
|
Loading…
Reference in New Issue