forked from OSchip/llvm-project
[AVX-512] Add masked shift instructions to load folding tables.
This adds the masked versions of everything, but the shift by immediate instructions. llvm-svn: 294286
This commit is contained in:
parent
45d9ddc687
commit
62304d80e3
|
@ -2478,6 +2478,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPORDZrrkz, X86::VPORDZrmkz, 0 },
|
||||
{ X86::VPORQZrrkz, X86::VPORQZrmkz, 0 },
|
||||
{ X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 },
|
||||
{ X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 },
|
||||
{ X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 },
|
||||
{ X86::VPSLLVDZrrkz, X86::VPSLLVDZrmkz, 0 },
|
||||
{ X86::VPSLLVQZrrkz, X86::VPSLLVQZrmkz, 0 },
|
||||
{ X86::VPSLLVWZrrkz, X86::VPSLLVWZrmkz, 0 },
|
||||
{ X86::VPSLLWZrrkz, X86::VPSLLWZrmkz, 0 },
|
||||
{ X86::VPSRADZrrkz, X86::VPSRADZrmkz, 0 },
|
||||
{ X86::VPSRAQZrrkz, X86::VPSRAQZrmkz, 0 },
|
||||
{ X86::VPSRAVDZrrkz, X86::VPSRAVDZrmkz, 0 },
|
||||
{ X86::VPSRAVQZrrkz, X86::VPSRAVQZrmkz, 0 },
|
||||
{ X86::VPSRAVWZrrkz, X86::VPSRAVWZrmkz, 0 },
|
||||
{ X86::VPSRAWZrrkz, X86::VPSRAWZrmkz, 0 },
|
||||
{ X86::VPSRLDZrrkz, X86::VPSRLDZrmkz, 0 },
|
||||
{ X86::VPSRLQZrrkz, X86::VPSRLQZrmkz, 0 },
|
||||
{ X86::VPSRLVDZrrkz, X86::VPSRLVDZrmkz, 0 },
|
||||
{ X86::VPSRLVQZrrkz, X86::VPSRLVQZrmkz, 0 },
|
||||
{ X86::VPSRLVWZrrkz, X86::VPSRLVWZrmkz, 0 },
|
||||
{ X86::VPSRLWZrrkz, X86::VPSRLWZrmkz, 0 },
|
||||
{ X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 },
|
||||
{ X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 },
|
||||
{ X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 },
|
||||
|
@ -2567,6 +2585,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 },
|
||||
{ X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 },
|
||||
{ X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 },
|
||||
{ X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 },
|
||||
{ X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 },
|
||||
{ X86::VPSLLVDZ256rrkz, X86::VPSLLVDZ256rmkz, 0 },
|
||||
{ X86::VPSLLVQZ256rrkz, X86::VPSLLVQZ256rmkz, 0 },
|
||||
{ X86::VPSLLVWZ256rrkz, X86::VPSLLVWZ256rmkz, 0 },
|
||||
{ X86::VPSLLWZ256rrkz, X86::VPSLLWZ256rmkz, 0 },
|
||||
{ X86::VPSRADZ256rrkz, X86::VPSRADZ256rmkz, 0 },
|
||||
{ X86::VPSRAQZ256rrkz, X86::VPSRAQZ256rmkz, 0 },
|
||||
{ X86::VPSRAVDZ256rrkz, X86::VPSRAVDZ256rmkz, 0 },
|
||||
{ X86::VPSRAVQZ256rrkz, X86::VPSRAVQZ256rmkz, 0 },
|
||||
{ X86::VPSRAVWZ256rrkz, X86::VPSRAVWZ256rmkz, 0 },
|
||||
{ X86::VPSRAWZ256rrkz, X86::VPSRAWZ256rmkz, 0 },
|
||||
{ X86::VPSRLDZ256rrkz, X86::VPSRLDZ256rmkz, 0 },
|
||||
{ X86::VPSRLQZ256rrkz, X86::VPSRLQZ256rmkz, 0 },
|
||||
{ X86::VPSRLVDZ256rrkz, X86::VPSRLVDZ256rmkz, 0 },
|
||||
{ X86::VPSRLVQZ256rrkz, X86::VPSRLVQZ256rmkz, 0 },
|
||||
{ X86::VPSRLVWZ256rrkz, X86::VPSRLVWZ256rmkz, 0 },
|
||||
{ X86::VPSRLWZ256rrkz, X86::VPSRLWZ256rmkz, 0 },
|
||||
{ X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 },
|
||||
{ X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 },
|
||||
{ X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 },
|
||||
|
@ -2646,6 +2682,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 },
|
||||
{ X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 },
|
||||
{ X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 },
|
||||
{ X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 },
|
||||
{ X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 },
|
||||
{ X86::VPSLLVDZ128rrkz, X86::VPSLLVDZ128rmkz, 0 },
|
||||
{ X86::VPSLLVQZ128rrkz, X86::VPSLLVQZ128rmkz, 0 },
|
||||
{ X86::VPSLLVWZ128rrkz, X86::VPSLLVWZ128rmkz, 0 },
|
||||
{ X86::VPSLLWZ128rrkz, X86::VPSLLWZ128rmkz, 0 },
|
||||
{ X86::VPSRADZ128rrkz, X86::VPSRADZ128rmkz, 0 },
|
||||
{ X86::VPSRAQZ128rrkz, X86::VPSRAQZ128rmkz, 0 },
|
||||
{ X86::VPSRAVDZ128rrkz, X86::VPSRAVDZ128rmkz, 0 },
|
||||
{ X86::VPSRAVQZ128rrkz, X86::VPSRAVQZ128rmkz, 0 },
|
||||
{ X86::VPSRAVWZ128rrkz, X86::VPSRAVWZ128rmkz, 0 },
|
||||
{ X86::VPSRAWZ128rrkz, X86::VPSRAWZ128rmkz, 0 },
|
||||
{ X86::VPSRLDZ128rrkz, X86::VPSRLDZ128rmkz, 0 },
|
||||
{ X86::VPSRLQZ128rrkz, X86::VPSRLQZ128rmkz, 0 },
|
||||
{ X86::VPSRLVDZ128rrkz, X86::VPSRLVDZ128rmkz, 0 },
|
||||
{ X86::VPSRLVQZ128rrkz, X86::VPSRLVQZ128rmkz, 0 },
|
||||
{ X86::VPSRLVWZ128rrkz, X86::VPSRLVWZ128rmkz, 0 },
|
||||
{ X86::VPSRLWZ128rrkz, X86::VPSRLWZ128rmkz, 0 },
|
||||
{ X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 },
|
||||
{ X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 },
|
||||
{ X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 },
|
||||
|
@ -2862,6 +2916,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPORDZrrk, X86::VPORDZrmk, 0 },
|
||||
{ X86::VPORQZrrk, X86::VPORQZrmk, 0 },
|
||||
{ X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 },
|
||||
{ X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 },
|
||||
{ X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 },
|
||||
{ X86::VPSLLVDZrrk, X86::VPSLLVDZrmk, 0 },
|
||||
{ X86::VPSLLVQZrrk, X86::VPSLLVQZrmk, 0 },
|
||||
{ X86::VPSLLVWZrrk, X86::VPSLLVWZrmk, 0 },
|
||||
{ X86::VPSLLWZrrk, X86::VPSLLWZrmk, 0 },
|
||||
{ X86::VPSRADZrrk, X86::VPSRADZrmk, 0 },
|
||||
{ X86::VPSRAQZrrk, X86::VPSRAQZrmk, 0 },
|
||||
{ X86::VPSRAVDZrrk, X86::VPSRAVDZrmk, 0 },
|
||||
{ X86::VPSRAVQZrrk, X86::VPSRAVQZrmk, 0 },
|
||||
{ X86::VPSRAVWZrrk, X86::VPSRAVWZrmk, 0 },
|
||||
{ X86::VPSRAWZrrk, X86::VPSRAWZrmk, 0 },
|
||||
{ X86::VPSRLDZrrk, X86::VPSRLDZrmk, 0 },
|
||||
{ X86::VPSRLQZrrk, X86::VPSRLQZrmk, 0 },
|
||||
{ X86::VPSRLVDZrrk, X86::VPSRLVDZrmk, 0 },
|
||||
{ X86::VPSRLVQZrrk, X86::VPSRLVQZrmk, 0 },
|
||||
{ X86::VPSRLVWZrrk, X86::VPSRLVWZrmk, 0 },
|
||||
{ X86::VPSRLWZrrk, X86::VPSRLWZrmk, 0 },
|
||||
{ X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 },
|
||||
{ X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 },
|
||||
{ X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 },
|
||||
|
@ -2964,6 +3036,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 },
|
||||
{ X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 },
|
||||
{ X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 },
|
||||
{ X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 },
|
||||
{ X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 },
|
||||
{ X86::VPSLLVDZ256rrk, X86::VPSLLVDZ256rmk, 0 },
|
||||
{ X86::VPSLLVQZ256rrk, X86::VPSLLVQZ256rmk, 0 },
|
||||
{ X86::VPSLLVWZ256rrk, X86::VPSLLVWZ256rmk, 0 },
|
||||
{ X86::VPSLLWZ256rrk, X86::VPSLLWZ256rmk, 0 },
|
||||
{ X86::VPSRADZ256rrk, X86::VPSRADZ256rmk, 0 },
|
||||
{ X86::VPSRAQZ256rrk, X86::VPSRAQZ256rmk, 0 },
|
||||
{ X86::VPSRAVDZ256rrk, X86::VPSRAVDZ256rmk, 0 },
|
||||
{ X86::VPSRAVQZ256rrk, X86::VPSRAVQZ256rmk, 0 },
|
||||
{ X86::VPSRAVWZ256rrk, X86::VPSRAVWZ256rmk, 0 },
|
||||
{ X86::VPSRAWZ256rrk, X86::VPSRAWZ256rmk, 0 },
|
||||
{ X86::VPSRLDZ256rrk, X86::VPSRLDZ256rmk, 0 },
|
||||
{ X86::VPSRLQZ256rrk, X86::VPSRLQZ256rmk, 0 },
|
||||
{ X86::VPSRLVDZ256rrk, X86::VPSRLVDZ256rmk, 0 },
|
||||
{ X86::VPSRLVQZ256rrk, X86::VPSRLVQZ256rmk, 0 },
|
||||
{ X86::VPSRLVWZ256rrk, X86::VPSRLVWZ256rmk, 0 },
|
||||
{ X86::VPSRLWZ256rrk, X86::VPSRLWZ256rmk, 0 },
|
||||
{ X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 },
|
||||
{ X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 },
|
||||
{ X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 },
|
||||
|
@ -3057,6 +3147,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 },
|
||||
{ X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 },
|
||||
{ X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 },
|
||||
{ X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 },
|
||||
{ X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 },
|
||||
{ X86::VPSLLVDZ128rrk, X86::VPSLLVDZ128rmk, 0 },
|
||||
{ X86::VPSLLVQZ128rrk, X86::VPSLLVQZ128rmk, 0 },
|
||||
{ X86::VPSLLVWZ128rrk, X86::VPSLLVWZ128rmk, 0 },
|
||||
{ X86::VPSLLWZ128rrk, X86::VPSLLWZ128rmk, 0 },
|
||||
{ X86::VPSRADZ128rrk, X86::VPSRADZ128rmk, 0 },
|
||||
{ X86::VPSRAQZ128rrk, X86::VPSRAQZ128rmk, 0 },
|
||||
{ X86::VPSRAVDZ128rrk, X86::VPSRAVDZ128rmk, 0 },
|
||||
{ X86::VPSRAVQZ128rrk, X86::VPSRAVQZ128rmk, 0 },
|
||||
{ X86::VPSRAVWZ128rrk, X86::VPSRAVWZ128rmk, 0 },
|
||||
{ X86::VPSRAWZ128rrk, X86::VPSRAWZ128rmk, 0 },
|
||||
{ X86::VPSRLDZ128rrk, X86::VPSRLDZ128rmk, 0 },
|
||||
{ X86::VPSRLQZ128rrk, X86::VPSRLQZ128rmk, 0 },
|
||||
{ X86::VPSRLVDZ128rrk, X86::VPSRLVDZ128rmk, 0 },
|
||||
{ X86::VPSRLVQZ128rrk, X86::VPSRLVQZ128rmk, 0 },
|
||||
{ X86::VPSRLVWZ128rrk, X86::VPSRLVWZ128rmk, 0 },
|
||||
{ X86::VPSRLWZ128rrk, X86::VPSRLWZ128rmk, 0 },
|
||||
{ X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 },
|
||||
{ X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 },
|
||||
{ X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 },
|
||||
|
|
|
@ -1047,6 +1047,27 @@ define <16 x i32> @stack_fold_pslld(<16 x i32> %a0, <4 x i32> %a1) {
|
|||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <16 x i32> @stack_fold_pslld_mask(<16 x i32>* %passthru, <16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pslld_mask
|
||||
;CHECK: vpslld {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
%4 = load <16 x i32>, <16 x i32>* %passthru
|
||||
%5 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> %4
|
||||
ret <16 x i32> %5
|
||||
}
|
||||
|
||||
define <16 x i32> @stack_fold_pslld_maskz(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pslld_maskz
|
||||
;CHECK: vpslld {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
%4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
|
||||
ret <16 x i32> %4
|
||||
}
|
||||
|
||||
define <64 x i8> @stack_fold_pslldq(<64 x i8> %a, <64 x i8> %b) {
|
||||
;CHECK-LABEL: stack_fold_pslldq
|
||||
;CHECK: vpslldq $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
|
@ -1073,6 +1094,27 @@ define <16 x i32> @stack_fold_psllvd(<16 x i32> %a0, <16 x i32> %a1) {
|
|||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) nounwind readnone
|
||||
|
||||
define <16 x i32> @stack_fold_psllvd_mask(<16 x i32>* %passthru, <16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_psllvd_mask
|
||||
;CHECK: vpsllvd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
%4 = load <16 x i32>, <16 x i32>* %passthru
|
||||
%5 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> %4
|
||||
ret <16 x i32> %5
|
||||
}
|
||||
|
||||
define <16 x i32> @stack_fold_psllvd_maskz(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: stack_fold_psllvd_maskz
|
||||
;CHECK: vpsllvd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
%4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
|
||||
ret <16 x i32> %4
|
||||
}
|
||||
|
||||
define <8 x i64> @stack_fold_psllvq(<8 x i64> %a0, <8 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_psllvq
|
||||
;CHECK: vpsllvq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
|
|
Loading…
Reference in New Issue