forked from OSchip/llvm-project
[AVX-512] Add PAVGB/PAVGW to load folding tables.
llvm-svn: 295035
This commit is contained in:
parent
f1b3fc7356
commit
d2d50cba2a
|
@ -1907,6 +1907,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPANDNDZrr, X86::VPANDNDZrm, 0 },
|
||||
{ X86::VPANDNQZrr, X86::VPANDNQZrm, 0 },
|
||||
{ X86::VPANDQZrr, X86::VPANDQZrm, 0 },
|
||||
{ X86::VPAVGBZrr, X86::VPAVGBZrm, 0 },
|
||||
{ X86::VPAVGWZrr, X86::VPAVGWZrm, 0 },
|
||||
{ X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 },
|
||||
{ X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 },
|
||||
{ X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 },
|
||||
|
@ -2092,6 +2094,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPANDNQZ256rr, X86::VPANDNQZ256rm, 0 },
|
||||
{ X86::VPANDQZ128rr, X86::VPANDQZ128rm, 0 },
|
||||
{ X86::VPANDQZ256rr, X86::VPANDQZ256rm, 0 },
|
||||
{ X86::VPAVGBZ128rr, X86::VPAVGBZ128rm, 0 },
|
||||
{ X86::VPAVGBZ256rr, X86::VPAVGBZ256rm, 0 },
|
||||
{ X86::VPAVGWZ128rr, X86::VPAVGWZ128rm, 0 },
|
||||
{ X86::VPAVGWZ256rr, X86::VPAVGWZ256rm, 0 },
|
||||
{ X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 },
|
||||
{ X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 },
|
||||
{ X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 },
|
||||
|
@ -2569,6 +2575,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 },
|
||||
{ X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 },
|
||||
{ X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 },
|
||||
{ X86::VPAVGBZrrkz, X86::VPAVGBZrmkz, 0 },
|
||||
{ X86::VPAVGWZrrkz, X86::VPAVGWZrmkz, 0 },
|
||||
{ X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 },
|
||||
{ X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 },
|
||||
{ X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 },
|
||||
|
@ -2692,6 +2700,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPANDNDZ256rrkz, X86::VPANDNDZ256rmkz, 0 },
|
||||
{ X86::VPANDNQZ256rrkz, X86::VPANDNQZ256rmkz, 0 },
|
||||
{ X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 },
|
||||
{ X86::VPAVGBZ256rrkz, X86::VPAVGBZ256rmkz, 0 },
|
||||
{ X86::VPAVGWZ256rrkz, X86::VPAVGWZ256rmkz, 0 },
|
||||
{ X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 },
|
||||
{ X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 },
|
||||
{ X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 },
|
||||
|
@ -2809,6 +2819,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPANDNDZ128rrkz, X86::VPANDNDZ128rmkz, 0 },
|
||||
{ X86::VPANDNQZ128rrkz, X86::VPANDNQZ128rmkz, 0 },
|
||||
{ X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 },
|
||||
{ X86::VPAVGBZ128rrkz, X86::VPAVGBZ128rmkz, 0 },
|
||||
{ X86::VPAVGWZ128rrkz, X86::VPAVGWZ128rmkz, 0 },
|
||||
{ X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 },
|
||||
{ X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 },
|
||||
{ X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 },
|
||||
|
@ -3070,6 +3082,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 },
|
||||
{ X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 },
|
||||
{ X86::VPANDQZrrk, X86::VPANDQZrmk, 0 },
|
||||
{ X86::VPAVGBZrrk, X86::VPAVGBZrmk, 0 },
|
||||
{ X86::VPAVGWZrrk, X86::VPAVGWZrmk, 0 },
|
||||
{ X86::VPERMBZrrk, X86::VPERMBZrmk, 0 },
|
||||
{ X86::VPERMDZrrk, X86::VPERMDZrmk, 0 },
|
||||
{ X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 },
|
||||
|
@ -3206,6 +3220,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPANDNDZ256rrk, X86::VPANDNDZ256rmk, 0 },
|
||||
{ X86::VPANDNQZ256rrk, X86::VPANDNQZ256rmk, 0 },
|
||||
{ X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 },
|
||||
{ X86::VPAVGBZ256rrk, X86::VPAVGBZ256rmk, 0 },
|
||||
{ X86::VPAVGWZ256rrk, X86::VPAVGWZ256rmk, 0 },
|
||||
{ X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 },
|
||||
{ X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 },
|
||||
{ X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 },
|
||||
|
@ -3337,6 +3353,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPANDNDZ128rrk, X86::VPANDNDZ128rmk, 0 },
|
||||
{ X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 },
|
||||
{ X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 },
|
||||
{ X86::VPAVGBZ128rrk, X86::VPAVGBZ128rmk, 0 },
|
||||
{ X86::VPAVGWZ128rrk, X86::VPAVGWZ128rmk, 0 },
|
||||
{ X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 },
|
||||
{ X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 },
|
||||
{ X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 },
|
||||
|
|
|
@ -66,6 +66,58 @@ define <8 x i64> @stack_fold_valignq_maskz(<8 x i64> %a, <8 x i64> %b, i8 %mask)
|
|||
ret <8 x i64> %4
|
||||
}
|
||||
|
||||
define <64 x i8> @stack_fold_pavgb(<64 x i8> %a0, <64 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pavgb
|
||||
;CHECK: vpavgb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> undef, i64 -1)
|
||||
ret <64 x i8> %2
|
||||
}
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) nounwind readnone
|
||||
|
||||
define <64 x i8> @stack_fold_pavgb_mask(<64 x i8>* %passthru, <64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pavgb_mask
|
||||
;CHECK: vpavgb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = load <64 x i8>, <64 x i8>* %passthru
|
||||
%3 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %2, i64 %mask)
|
||||
ret <64 x i8> %3
|
||||
}
|
||||
|
||||
define <64 x i8> @stack_fold_pavgb_maskz(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pavgb_maskz
|
||||
;CHECK: vpavgb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> zeroinitializer, i64 %mask)
|
||||
ret <64 x i8> %2
|
||||
}
|
||||
|
||||
define <32 x i16> @stack_fold_pavgw(<32 x i16> %a0, <32 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pavgw
|
||||
;CHECK: vpavgw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %a0, <32 x i16> %a1, <32 x i16> undef, i32 -1)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) nounwind readnone
|
||||
|
||||
define <32 x i16> @stack_fold_pavgw_mask(<32 x i16>* %passthru, <32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pavgw_mask
|
||||
;CHECK: vpavgw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = load <32 x i16>, <32 x i16>* %passthru
|
||||
%3 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %a0, <32 x i16> %a1, <32 x i16> %2, i32 %mask)
|
||||
ret <32 x i16> %3
|
||||
}
|
||||
|
||||
define <32 x i16> @stack_fold_pavgw_maskz(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pavgw_maskz
|
||||
;CHECK: vpavgw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %a0, <32 x i16> %a1, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_extracti32x4(<16 x i32> %a0, <16 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_extracti32x4
|
||||
;CHECK: vextracti32x4 $3, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
|
||||
|
|
|
@ -45,6 +45,42 @@ define <4 x i64> @stack_fold_valignq_ymm(<4 x i64> %a, <4 x i64> %b) {
|
|||
ret <4 x i64> %2
|
||||
}
|
||||
|
||||
define <16 x i8> @stack_fold_pavgb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pavgb
|
||||
;CHECK: vpavgb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pavgb_ymm(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pavgb_ymm
|
||||
;CHECK: vpavgb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pavgw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pavgw
|
||||
;CHECK: vpavgw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pavgw_ymm(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pavgw_ymm
|
||||
;CHECK: vpavgw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_extracti32x4(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_extracti32x4
|
||||
;CHECK: vextracti128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
|
||||
|
|
Loading…
Reference in New Issue