forked from OSchip/llvm-project
[AVX-512] Add VPABSB/D/Q/W to load folding tables.
llvm-svn: 294169
This commit is contained in:
parent
864b1a5376
commit
f0eb60a6f3
|
@ -881,8 +881,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 },
|
||||
{ X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 },
|
||||
{ X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
|
||||
{ X86::VPABSBZrr, X86::VPABSBZrm, 0 },
|
||||
{ X86::VPABSDZrr, X86::VPABSDZrm, 0 },
|
||||
{ X86::VPABSQZrr, X86::VPABSQZrm, 0 },
|
||||
{ X86::VPABSWZrr, X86::VPABSWZrm, 0 },
|
||||
{ X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 },
|
||||
{ X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 },
|
||||
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
|
||||
|
@ -916,6 +918,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
|
||||
{ X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
|
||||
{ X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
|
||||
{ X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0 },
|
||||
{ X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 },
|
||||
{ X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 },
|
||||
{ X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 },
|
||||
{ X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 },
|
||||
{ X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 },
|
||||
{ X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 },
|
||||
|
@ -948,6 +954,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
|
||||
{ X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
|
||||
{ X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
|
||||
{ X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0 },
|
||||
{ X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 },
|
||||
{ X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 },
|
||||
{ X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 },
|
||||
{ X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 },
|
||||
{ X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 },
|
||||
{ X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE },
|
||||
|
@ -2139,6 +2149,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
// AVX-512 masked foldable instructions
|
||||
{ X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
|
||||
{ X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
|
||||
{ X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 },
|
||||
{ X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 },
|
||||
{ X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 },
|
||||
{ X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 },
|
||||
{ X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 },
|
||||
{ X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 },
|
||||
{ X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 },
|
||||
|
@ -2162,6 +2176,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
// AVX-512VL 256-bit masked foldable instructions
|
||||
{ X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
|
||||
{ X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
|
||||
{ X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 },
|
||||
{ X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 },
|
||||
{ X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 },
|
||||
{ X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 },
|
||||
{ X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 },
|
||||
{ X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 },
|
||||
{ X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 },
|
||||
|
@ -2184,6 +2202,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
|
||||
// AVX-512VL 128-bit masked foldable instructions
|
||||
{ X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
|
||||
{ X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 },
|
||||
{ X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 },
|
||||
{ X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 },
|
||||
{ X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 },
|
||||
{ X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 },
|
||||
{ X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 },
|
||||
{ X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE },
|
||||
|
@ -2601,6 +2623,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
// AVX-512 masked foldable instructions
|
||||
{ X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
|
||||
{ X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
|
||||
{ X86::VPABSBZrrk, X86::VPABSBZrmk, 0 },
|
||||
{ X86::VPABSDZrrk, X86::VPABSDZrmk, 0 },
|
||||
{ X86::VPABSQZrrk, X86::VPABSQZrmk, 0 },
|
||||
{ X86::VPABSWZrrk, X86::VPABSWZrmk, 0 },
|
||||
{ X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 },
|
||||
{ X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 },
|
||||
{ X86::VPERMPDZrik, X86::VPERMPDZmik, 0 },
|
||||
|
@ -2624,6 +2650,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
// AVX-512VL 256-bit masked foldable instructions
|
||||
{ X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
|
||||
{ X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
|
||||
{ X86::VPABSBZ256rrk, X86::VPABSBZ256rmk, 0 },
|
||||
{ X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 },
|
||||
{ X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 },
|
||||
{ X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 },
|
||||
{ X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 },
|
||||
{ X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 },
|
||||
{ X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 },
|
||||
|
@ -2646,6 +2676,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
|
||||
// AVX-512VL 128-bit masked foldable instructions
|
||||
{ X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
|
||||
{ X86::VPABSBZ128rrk, X86::VPABSBZ128rmk, 0 },
|
||||
{ X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 },
|
||||
{ X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 },
|
||||
{ X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 },
|
||||
{ X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 },
|
||||
{ X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 },
|
||||
{ X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE },
|
||||
|
|
|
@ -8,6 +8,106 @@ target triple = "x86_64-unknown-unknown"
|
|||
; By including a nop call with sideeffects we can force a partial register spill of the
|
||||
; relevant registers and check that the reload is correctly folded into the instruction.
|
||||
|
||||
define <64 x i8> @stack_fold_pabsb(<64 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsb
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %a0, <64 x i8> undef, i64 -1)
|
||||
ret <64 x i8> %2
|
||||
}
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) nounwind readnone
|
||||
|
||||
define <64 x i8> @stack_fold_pabsb_mask(<64 x i8> %passthru, <64 x i8> %a0, i64 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsb_mask
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask)
|
||||
ret <64 x i8> %2
|
||||
}
|
||||
|
||||
define <64 x i8> @stack_fold_pabsb_maskz(<64 x i8> %a0, i64 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsb_maskz
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %a0, <64 x i8> zeroinitializer, i64 %mask)
|
||||
ret <64 x i8> %2
|
||||
}
|
||||
|
||||
define <16 x i32> @stack_fold_pabsd(<16 x i32> %a0) {
|
||||
;check-label: stack_fold_pabsd
|
||||
;check: vpabsd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte folded reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a0, <16 x i32> undef, i16 -1)
|
||||
ret <16 x i32> %2
|
||||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) nounwind readnone
|
||||
|
||||
define <16 x i32> @stack_fold_pabsd_mask(<16 x i32> %passthru, <16 x i32> %a0, i16 %mask) {
|
||||
;check-label: stack_fold_pabsd
|
||||
;check: vpabsd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte folded reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask)
|
||||
ret <16 x i32> %2
|
||||
}
|
||||
|
||||
define <16 x i32> @stack_fold_pabsd_maskz(<16 x i32> %a0, i16 %mask) {
|
||||
;check-label: stack_fold_pabsd
|
||||
;check: vpabsd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte folded reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a0, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret <16 x i32> %2
|
||||
}
|
||||
|
||||
define <8 x i64> @stack_fold_pabsq(<8 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsq
|
||||
;CHECK: vpabsq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a0, <8 x i64> undef, i8 -1)
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) nounwind readnone
|
||||
|
||||
define <8 x i64> @stack_fold_pabsq_mask(<8 x i64> %passthru, <8 x i64> %a0, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsq_mask
|
||||
;CHECK: vpabsq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask)
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
|
||||
define <8 x i64> @stack_fold_pabsq_maskz(<8 x i64> %a0, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsq_maskz
|
||||
;CHECK: vpabsq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a0, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
|
||||
define <32 x i16> @stack_fold_pabsw(<32 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsw
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %a0, <32 x i16> undef, i32 -1)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) nounwind readnone
|
||||
|
||||
define <32 x i16> @stack_fold_pabsw_mask(<32 x i16> %passthru, <32 x i16> %a0, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsw_mask
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
|
||||
define <32 x i16> @stack_fold_pabsw_maskz(<32 x i16> %a0, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pabsw_maskz
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %a0, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
|
||||
define <64 x i8> @stack_fold_paddb(<64 x i8> %a0, <64 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_paddb
|
||||
;CHECK: vpaddb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
|
|
|
@ -8,6 +8,78 @@ target triple = "x86_64-unknown-unknown"
|
|||
; By including a nop call with sideeffects we can force a partial register spill of the
|
||||
; relevant registers and check that the reload is correctly folded into the instruction.
|
||||
|
||||
define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsb
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pabsb_ymm(<32 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsb_ymm
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
|
||||
ret <32 x i8> %2
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsd
|
||||
;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pabsd_ymm(<8 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsd_ymm
|
||||
;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pabsq(<2 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsq
|
||||
;CHECK: vpabsq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %a0, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pabsq_ymm(<4 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsq_ymm
|
||||
;CHECK: vpabsq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %a0, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsw
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pabsw_ymm(<16 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsw_ymm
|
||||
;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
|
||||
ret <16 x i16> %2
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_paddb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_paddb
|
||||
;CHECK: vpaddb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
|
|
Loading…
Reference in New Issue