forked from OSchip/llvm-project
[X86][AVX] Added (V)MOVDDUP / (V)MOVSLDUP / (V)MOVSHDUP memory folding + tests.
Minor tweak now that D7042 is complete, we can enable stack folding for (V)MOVDDUP and do proper testing. Added missing AVX ymm folding patterns and fixed alignment for AVX VMOVSLDUP / VMOVSHDUP. llvm-svn: 226873
This commit is contained in:
parent
c976e8eef4
commit
7e6d573e87
|
@ -572,8 +572,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
|
||||
{ X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
|
||||
{ X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
|
||||
{ X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 },
|
||||
{ X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 },
|
||||
{ X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, 0 },
|
||||
{ X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 },
|
||||
{ X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
|
||||
{ X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
|
||||
{ X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
|
||||
|
@ -629,7 +629,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 },
|
||||
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
|
||||
{ X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
|
||||
{ X86::VMOVDDUPYrr, X86::VMOVDDUPYrm, 0 },
|
||||
{ X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
|
||||
{ X86::VMOVSLDUPYrr, X86::VMOVSLDUPYrm, 0 },
|
||||
{ X86::VMOVSHDUPYrr, X86::VMOVSHDUPYrm, 0 },
|
||||
{ X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
|
||||
{ X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
|
||||
{ X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
|
||||
|
|
|
@ -1085,11 +1085,21 @@ define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) {
|
|||
}
|
||||
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_movd (load / store)
|
||||
; TODO stack_fold_movq (load / store)
|
||||
define <2 x double> @stack_fold_movddup(<2 x double> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movddup
|
||||
;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %2
|
||||
}
|
||||
|
||||
; TODO stack_fold_movddup
|
||||
; TODO stack_fold_movddup_ymm
|
||||
define <4 x double> @stack_fold_movddup_ymm(<4 x double> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movddup_ymm
|
||||
;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
||||
ret <4 x double> %2
|
||||
}
|
||||
|
||||
; TODO stack_fold_movhpd (load / store)
|
||||
; TODO stack_fold_movhps (load / store)
|
||||
|
@ -1097,9 +1107,6 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind read
|
|||
; TODO stack_fold_movlpd (load / store)
|
||||
; TODO stack_fold_movlps (load / store)
|
||||
|
||||
; TODO stack_fold_movsd (load / store)
|
||||
; TODO stack_fold_movss (load / store)
|
||||
|
||||
define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movshdup
|
||||
;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
|
@ -1108,7 +1115,13 @@ define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
|
|||
ret <4 x float> %2
|
||||
}
|
||||
|
||||
; TODO stack_fold_movshdup_ymm
|
||||
define <8 x float> @stack_fold_movshdup_ymm(<8 x float> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movshdup_ymm
|
||||
;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
|
||||
ret <8 x float> %2
|
||||
}
|
||||
|
||||
define <4 x float> @stack_fold_movsldup(<4 x float> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movsldup
|
||||
|
@ -1118,7 +1131,13 @@ define <4 x float> @stack_fold_movsldup(<4 x float> %a0) {
|
|||
ret <4 x float> %2
|
||||
}
|
||||
|
||||
; TODO stack_fold_movshdup_ymm
|
||||
define <8 x float> @stack_fold_movsldup_ymm(<8 x float> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movsldup_ymm
|
||||
;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
|
||||
ret <8 x float> %2
|
||||
}
|
||||
|
||||
define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_mulpd
|
||||
|
|
|
@ -737,20 +737,19 @@ define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) {
|
|||
}
|
||||
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_movd (load / store)
|
||||
; TODO stack_fold_movq (load / store)
|
||||
|
||||
; TODO stack_fold_movddup
|
||||
|
||||
define <2 x double> @stack_fold_movddup(<2 x double> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movddup
|
||||
;CHECK: movddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %2
|
||||
}
|
||||
; TODO stack_fold_movhpd (load / store)
|
||||
; TODO stack_fold_movhps (load / store)
|
||||
|
||||
; TODO stack_fold_movlpd (load / store)
|
||||
; TODO stack_fold_movlps (load / store)
|
||||
|
||||
; TODO stack_fold_movsd (load / store)
|
||||
; TODO stack_fold_movss (load / store)
|
||||
|
||||
define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movshdup
|
||||
;CHECK: movshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
|
|
Loading…
Reference in New Issue