forked from OSchip/llvm-project
[X86] Make masked pcmpeq commutable during isel so we can fold loads in other operand to the shorter encoding.
Previously we used the immediate encoding if the load was in operand 0 and the short encoding if the load was in operand 1. This added an insane number of bytes to the size of the isel table. I'm wondering if we should always use the immediate form during isel and change to the short form during emission. This would remove the need to pattern match every combination for both the immediate form and the short form during isel. We could do the same with vpcmpgt llvm-svn: 325456
This commit is contained in:
parent
b824050658
commit
1040f236a3
|
@ -2190,9 +2190,9 @@ multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
|
|||
}
|
||||
|
||||
def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
|
||||
(X86cmpm node:$src1, node:$src2, (i8 0))>;
|
||||
(X86cmpm_c node:$src1, node:$src2, (i8 0))>;
|
||||
def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
|
||||
(X86cmpm node:$src1, node:$src2, (i8 4))>;
|
||||
(X86cmpm_c node:$src1, node:$src2, (i8 4))>;
|
||||
def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
|
||||
(X86cmpm node:$src1, node:$src2, (i8 6))>;
|
||||
|
||||
|
|
|
@ -171,6 +171,8 @@ def X86CmpMaskCCScalarRound :
|
|||
SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
|
||||
|
||||
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
|
||||
// Hack to make CMPM commutable in tablegen patterns for load folding.
|
||||
def X86cmpm_c : SDNode<"X86ISD::CMPM", X86CmpMaskCC, [SDNPCommutative]>;
|
||||
def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
|
||||
def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
|
||||
def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>;
|
||||
|
|
|
@ -1081,7 +1081,7 @@ define i16 @pcmpeq_mem_1(<16 x i32> %a, <16 x i32>* %b) {
|
|||
define i16 @pcmpeq_mem_2(<16 x i32> %a, <16 x i32>* %b) {
|
||||
; KNL-LABEL: pcmpeq_mem_2:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x07,0x00]
|
||||
; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
|
||||
; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
|
@ -1089,7 +1089,7 @@ define i16 @pcmpeq_mem_2(<16 x i32> %a, <16 x i32>* %b) {
|
|||
;
|
||||
; AVX512BW-LABEL: pcmpeq_mem_2:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x07,0x00]
|
||||
; AVX512BW-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
|
@ -1097,7 +1097,7 @@ define i16 @pcmpeq_mem_2(<16 x i32> %a, <16 x i32>* %b) {
|
|||
;
|
||||
; SKX-LABEL: pcmpeq_mem_2:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x07,0x00]
|
||||
; SKX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
|
||||
; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
|
|
Loading…
Reference in New Issue