forked from OSchip/llvm-project
[X86] Add masked versions of VPERMT2* and VPERMI2* to load folding tables.
llvm-svn: 289186
This commit is contained in:
parent
f74fcdd30c
commit
c4f2b0996d
|
@ -2675,11 +2675,23 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPANDQZrrk, X86::VPANDQZrmk, 0 },
|
||||
{ X86::VPERMBZrrk, X86::VPERMBZrmk, 0 },
|
||||
{ X86::VPERMDZrrk, X86::VPERMDZrmk, 0 },
|
||||
{ X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 },
|
||||
{ X86::VPERMI2Drrk, X86::VPERMI2Drmk, 0 },
|
||||
{ X86::VPERMI2PSrrk, X86::VPERMI2PSrmk, 0 },
|
||||
{ X86::VPERMI2PDrrk, X86::VPERMI2PDrmk, 0 },
|
||||
{ X86::VPERMI2Qrrk, X86::VPERMI2Qrmk, 0 },
|
||||
{ X86::VPERMI2Wrrk, X86::VPERMI2Wrmk, 0 },
|
||||
{ X86::VPERMILPDZrrk, X86::VPERMILPDZrmk, 0 },
|
||||
{ X86::VPERMILPSZrrk, X86::VPERMILPSZrmk, 0 },
|
||||
{ X86::VPERMPDZrrk, X86::VPERMPDZrmk, 0 },
|
||||
{ X86::VPERMPSZrrk, X86::VPERMPSZrmk, 0 },
|
||||
{ X86::VPERMQZrrk, X86::VPERMQZrmk, 0 },
|
||||
{ X86::VPERMT2Brrk, X86::VPERMT2Brmk, 0 },
|
||||
{ X86::VPERMT2Drrk, X86::VPERMT2Drmk, 0 },
|
||||
{ X86::VPERMT2PSrrk, X86::VPERMT2PSrmk, 0 },
|
||||
{ X86::VPERMT2PDrrk, X86::VPERMT2PDrmk, 0 },
|
||||
{ X86::VPERMT2Qrrk, X86::VPERMT2Qrmk, 0 },
|
||||
{ X86::VPERMT2Wrrk, X86::VPERMT2Wrmk, 0 },
|
||||
{ X86::VPERMWZrrk, X86::VPERMWZrmk, 0 },
|
||||
{ X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 },
|
||||
{ X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 },
|
||||
|
@ -2694,9 +2706,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPSUBUSBZrrk, X86::VPSUBUSBZrmk, 0 },
|
||||
{ X86::VPSUBUSWZrrk, X86::VPSUBUSWZrmk, 0 },
|
||||
{ X86::VPTERNLOGDZrrik, X86::VPTERNLOGDZrmik, 0 },
|
||||
{ X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 },
|
||||
{ X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmik, 0 },
|
||||
{ X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmikz, 0 },
|
||||
{ X86::VPUNPCKHBWZrrk, X86::VPUNPCKHBWZrmk, 0 },
|
||||
{ X86::VPUNPCKHDQZrrk, X86::VPUNPCKHDQZrmk, 0 },
|
||||
{ X86::VPUNPCKHQDQZrrk, X86::VPUNPCKHQDQZrmk, 0 },
|
||||
|
@ -2755,11 +2765,23 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 },
|
||||
{ X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 },
|
||||
{ X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 },
|
||||
{ X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 },
|
||||
{ X86::VPERMI2D256rrk, X86::VPERMI2D256rmk, 0 },
|
||||
{ X86::VPERMI2PD256rrk, X86::VPERMI2PD256rmk, 0 },
|
||||
{ X86::VPERMI2PS256rrk, X86::VPERMI2PS256rmk, 0 },
|
||||
{ X86::VPERMI2Q256rrk, X86::VPERMI2Q256rmk, 0 },
|
||||
{ X86::VPERMI2W256rrk, X86::VPERMI2W256rmk, 0 },
|
||||
{ X86::VPERMILPDZ256rrk, X86::VPERMILPDZ256rmk, 0 },
|
||||
{ X86::VPERMILPSZ256rrk, X86::VPERMILPSZ256rmk, 0 },
|
||||
{ X86::VPERMPDZ256rrk, X86::VPERMPDZ256rmk, 0 },
|
||||
{ X86::VPERMPSZ256rrk, X86::VPERMPSZ256rmk, 0 },
|
||||
{ X86::VPERMQZ256rrk, X86::VPERMQZ256rmk, 0 },
|
||||
{ X86::VPERMT2B256rrk, X86::VPERMT2B256rmk, 0 },
|
||||
{ X86::VPERMT2D256rrk, X86::VPERMT2D256rmk, 0 },
|
||||
{ X86::VPERMT2PD256rrk, X86::VPERMT2PD256rmk, 0 },
|
||||
{ X86::VPERMT2PS256rrk, X86::VPERMT2PS256rmk, 0 },
|
||||
{ X86::VPERMT2Q256rrk, X86::VPERMT2Q256rmk, 0 },
|
||||
{ X86::VPERMT2W256rrk, X86::VPERMT2W256rmk, 0 },
|
||||
{ X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 },
|
||||
{ X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 },
|
||||
{ X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 },
|
||||
|
@ -2775,9 +2797,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPSUBUSWZ256rrk, X86::VPSUBUSWZ256rmk, 0 },
|
||||
{ X86::VPSUBWZ256rrk, X86::VPSUBWZ256rmk, 0 },
|
||||
{ X86::VPTERNLOGDZ256rrik, X86::VPTERNLOGDZ256rmik, 0 },
|
||||
{ X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz, 0 },
|
||||
{ X86::VPTERNLOGQZ256rrik, X86::VPTERNLOGQZ256rmik, 0 },
|
||||
{ X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz, 0 },
|
||||
{ X86::VPUNPCKHBWZ256rrk, X86::VPUNPCKHBWZ256rmk, 0 },
|
||||
{ X86::VPUNPCKHDQZ256rrk, X86::VPUNPCKHDQZ256rmk, 0 },
|
||||
{ X86::VPUNPCKHQDQZ256rrk, X86::VPUNPCKHQDQZ256rmk, 0 },
|
||||
|
@ -2831,8 +2851,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 },
|
||||
{ X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 },
|
||||
{ X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 },
|
||||
{ X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 },
|
||||
{ X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 },
|
||||
{ X86::VPERMI2PD128rrk, X86::VPERMI2PD128rmk, 0 },
|
||||
{ X86::VPERMI2PS128rrk, X86::VPERMI2PS128rmk, 0 },
|
||||
{ X86::VPERMI2Q128rrk, X86::VPERMI2Q128rmk, 0 },
|
||||
{ X86::VPERMI2W128rrk, X86::VPERMI2W128rmk, 0 },
|
||||
{ X86::VPERMILPDZ128rrk, X86::VPERMILPDZ128rmk, 0 },
|
||||
{ X86::VPERMILPSZ128rrk, X86::VPERMILPSZ128rmk, 0 },
|
||||
{ X86::VPERMT2B128rrk, X86::VPERMT2B128rmk, 0 },
|
||||
{ X86::VPERMT2D128rrk, X86::VPERMT2D128rmk, 0 },
|
||||
{ X86::VPERMT2PD128rrk, X86::VPERMT2PD128rmk, 0 },
|
||||
{ X86::VPERMT2PS128rrk, X86::VPERMT2PS128rmk, 0 },
|
||||
{ X86::VPERMT2Q128rrk, X86::VPERMT2Q128rmk, 0 },
|
||||
{ X86::VPERMT2W128rrk, X86::VPERMT2W128rmk, 0 },
|
||||
{ X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 },
|
||||
{ X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 },
|
||||
{ X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 },
|
||||
|
@ -2848,9 +2880,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPSUBUSWZ128rrk, X86::VPSUBUSWZ128rmk, 0 },
|
||||
{ X86::VPSUBWZ128rrk, X86::VPSUBWZ128rmk, 0 },
|
||||
{ X86::VPTERNLOGDZ128rrik, X86::VPTERNLOGDZ128rmik, 0 },
|
||||
{ X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz, 0 },
|
||||
{ X86::VPTERNLOGQZ128rrik, X86::VPTERNLOGQZ128rmik, 0 },
|
||||
{ X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz, 0 },
|
||||
{ X86::VPUNPCKHBWZ128rrk, X86::VPUNPCKHBWZ128rmk, 0 },
|
||||
{ X86::VPUNPCKHDQZ128rrk, X86::VPUNPCKHDQZ128rmk, 0 },
|
||||
{ X86::VPUNPCKHQDQZ128rrk, X86::VPUNPCKHQDQZ128rmk, 0 },
|
||||
|
@ -2869,6 +2899,54 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VUNPCKLPSZ128rrk, X86::VUNPCKLPSZ128rmk, 0 },
|
||||
{ X86::VXORPDZ128rrk, X86::VXORPDZ128rmk, 0 },
|
||||
{ X86::VXORPSZ128rrk, X86::VXORPSZ128rmk, 0 },
|
||||
|
||||
// 512-bit three source instructions with zero masking.
|
||||
{ X86::VPERMI2Brrkz, X86::VPERMI2Brmkz, 0 },
|
||||
{ X86::VPERMI2Drrkz, X86::VPERMI2Drmkz, 0 },
|
||||
{ X86::VPERMI2PSrrkz, X86::VPERMI2PSrmkz, 0 },
|
||||
{ X86::VPERMI2PDrrkz, X86::VPERMI2PDrmkz, 0 },
|
||||
{ X86::VPERMI2Qrrkz, X86::VPERMI2Qrmkz, 0 },
|
||||
{ X86::VPERMI2Wrrkz, X86::VPERMI2Wrmkz, 0 },
|
||||
{ X86::VPERMT2Brrkz, X86::VPERMT2Brmkz, 0 },
|
||||
{ X86::VPERMT2Drrkz, X86::VPERMT2Drmkz, 0 },
|
||||
{ X86::VPERMT2PSrrkz, X86::VPERMT2PSrmkz, 0 },
|
||||
{ X86::VPERMT2PDrrkz, X86::VPERMT2PDrmkz, 0 },
|
||||
{ X86::VPERMT2Qrrkz, X86::VPERMT2Qrmkz, 0 },
|
||||
{ X86::VPERMT2Wrrkz, X86::VPERMT2Wrmkz, 0 },
|
||||
{ X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 },
|
||||
{ X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmikz, 0 },
|
||||
|
||||
// 256-bit three source instructions with zero masking.
|
||||
{ X86::VPERMI2B256rrkz, X86::VPERMI2B256rmkz, 0 },
|
||||
{ X86::VPERMI2D256rrkz, X86::VPERMI2D256rmkz, 0 },
|
||||
{ X86::VPERMI2PD256rrkz, X86::VPERMI2PD256rmkz, 0 },
|
||||
{ X86::VPERMI2PS256rrkz, X86::VPERMI2PS256rmkz, 0 },
|
||||
{ X86::VPERMI2Q256rrkz, X86::VPERMI2Q256rmkz, 0 },
|
||||
{ X86::VPERMI2W256rrkz, X86::VPERMI2W256rmkz, 0 },
|
||||
{ X86::VPERMT2B256rrkz, X86::VPERMT2B256rmkz, 0 },
|
||||
{ X86::VPERMT2D256rrkz, X86::VPERMT2D256rmkz, 0 },
|
||||
{ X86::VPERMT2PD256rrkz, X86::VPERMT2PD256rmkz, 0 },
|
||||
{ X86::VPERMT2PS256rrkz, X86::VPERMT2PS256rmkz, 0 },
|
||||
{ X86::VPERMT2Q256rrkz, X86::VPERMT2Q256rmkz, 0 },
|
||||
{ X86::VPERMT2W256rrkz, X86::VPERMT2W256rmkz, 0 },
|
||||
{ X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz, 0 },
|
||||
{ X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz, 0 },
|
||||
|
||||
// 128-bit three source instructions with zero masking.
|
||||
{ X86::VPERMI2B128rrkz, X86::VPERMI2B128rmkz, 0 },
|
||||
{ X86::VPERMI2D128rrkz, X86::VPERMI2D128rmkz, 0 },
|
||||
{ X86::VPERMI2PD128rrkz, X86::VPERMI2PD128rmkz, 0 },
|
||||
{ X86::VPERMI2PS128rrkz, X86::VPERMI2PS128rmkz, 0 },
|
||||
{ X86::VPERMI2Q128rrkz, X86::VPERMI2Q128rmkz, 0 },
|
||||
{ X86::VPERMI2W128rrkz, X86::VPERMI2W128rmkz, 0 },
|
||||
{ X86::VPERMT2B128rrkz, X86::VPERMT2B128rmkz, 0 },
|
||||
{ X86::VPERMT2D128rrkz, X86::VPERMT2D128rmkz, 0 },
|
||||
{ X86::VPERMT2PD128rrkz, X86::VPERMT2PD128rmkz, 0 },
|
||||
{ X86::VPERMT2PS128rrkz, X86::VPERMT2PS128rmkz, 0 },
|
||||
{ X86::VPERMT2Q128rrkz, X86::VPERMT2Q128rmkz, 0 },
|
||||
{ X86::VPERMT2W128rrkz, X86::VPERMT2W128rmkz, 0 },
|
||||
{ X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz, 0 },
|
||||
{ X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz, 0 },
|
||||
};
|
||||
|
||||
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) {
|
||||
|
|
|
@ -534,6 +534,34 @@ define <16 x float> @stack_fold_vpermi2ps(<16 x i32> %x0, <16 x float> %x1, <16
|
|||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
|
||||
|
||||
define <16 x float> @stack_fold_vpermi2ps_mask(<16 x float> %x0, <16 x i32>* %x1, <16 x float> %x2, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2ps_mask
|
||||
;CHECK: vpermi2ps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%x1b = load <16 x i32>, <16 x i32>* %x1
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1b, <16 x float> %x2, i16 %mask)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @stack_fold_vpermt2ps_mask(<16 x i32>* %x0, <16 x float> %x1, <16 x float> %x2, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2ps_mask
|
||||
;CHECK: vpermt2ps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%x0b = load <16 x i32>, <16 x i32>* %x0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermt2var.ps.512(<16 x i32> %x0b, <16 x float> %x1, <16 x float> %x2, i16 %mask)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @stack_fold_vpermt2ps_maskz(<16 x i32>* %x0, <16 x float> %x1, <16 x float> %x2, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2ps_maskz
|
||||
;CHECK: vpermt2ps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%x0b = load <16 x i32>, <16 x i32>* %x0
|
||||
%res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0b, <16 x float> %x1, <16 x float> %x2, i16 %mask)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
|
||||
|
||||
define <8 x double> @stack_fold_vpermt2pd(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2pd
|
||||
;CHECK: vpermt2pd {{-?[0-9]*}}(%rsp), %zmm1, %zmm0 # 64-byte Folded Reload
|
||||
|
|
Loading…
Reference in New Issue