forked from OSchip/llvm-project
[X86] Add broadcast load folding patterns to the NoVLX compare patterns.
These patterns use zmm registers for 128/256-bit compares when the VLX instructions aren't available. Previously we only supported registers, but as PR36191 notes we can fold broadcast loads, but not regular loads. llvm-svn: 373423
This commit is contained in:
parent
c3aab6eaaa
commit
8d6a863b02
|
@ -2392,7 +2392,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
|
|||
(_.VT _.RC:$src1), cond))),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#"rmibk")
|
||||
_.KRCWM:$mask, _.RC:$src1, addr:$src2,
|
||||
(CommFrag.OperandTransform $cc))>;
|
||||
(CommFrag_su.OperandTransform $cc))>;
|
||||
}
|
||||
|
||||
multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
|
||||
|
@ -3172,6 +3172,30 @@ multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
|
|||
Narrow.KRC)>;
|
||||
}
|
||||
|
||||
multiclass axv512_icmp_packed_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
|
||||
string InstStr,
|
||||
X86VectorVTInfo Narrow,
|
||||
X86VectorVTInfo Wide> {
|
||||
// Broadcast load.
|
||||
def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)))),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(InstStr#"Zrmb")
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
addr:$src2),
|
||||
Narrow.KRC)>;
|
||||
|
||||
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
|
||||
(Frag_su (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.BroadcastLdFrag addr:$src2)))),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(InstStr#"Zrmbk")
|
||||
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
addr:$src2),
|
||||
Narrow.KRC)>;
|
||||
}
|
||||
|
||||
// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
|
||||
multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
|
||||
string InstStr,
|
||||
|
@ -3180,7 +3204,7 @@ multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
|
|||
def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.VT Narrow.RC:$src2), cond)),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(InstStr##Zrri)
|
||||
(!cast<Instruction>(InstStr#"Zrri")
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
|
||||
(Frag.OperandTransform $cc)), Narrow.KRC)>;
|
||||
|
@ -3189,34 +3213,108 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
|
|||
(Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.VT Narrow.RC:$src2),
|
||||
cond)))),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
|
||||
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
|
||||
(Frag.OperandTransform $cc)), Narrow.KRC)>;
|
||||
(Frag_su.OperandTransform $cc)), Narrow.KRC)>;
|
||||
}
|
||||
|
||||
multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
|
||||
PatFrag CommFrag, PatFrag CommFrag_su,
|
||||
string InstStr,
|
||||
X86VectorVTInfo Narrow,
|
||||
X86VectorVTInfo Wide> {
|
||||
// Broadcast load.
|
||||
def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.BroadcastLdFrag addr:$src2), cond)),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(InstStr#"Zrmib")
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
|
||||
|
||||
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
|
||||
(Narrow.KVT
|
||||
(Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.BroadcastLdFrag addr:$src2),
|
||||
cond)))),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
|
||||
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
|
||||
|
||||
// Commuted with broadcast load.
|
||||
def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
|
||||
(Narrow.VT Narrow.RC:$src1),
|
||||
cond)),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(InstStr#"Zrmib")
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
|
||||
|
||||
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
|
||||
(Narrow.KVT
|
||||
(CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
|
||||
(Narrow.VT Narrow.RC:$src1),
|
||||
cond)))),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
|
||||
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
|
||||
}
|
||||
|
||||
// Same as above, but for fp types which don't use PatFrags.
|
||||
multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su,
|
||||
string InstStr,
|
||||
multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
|
||||
X86VectorVTInfo Narrow,
|
||||
X86VectorVTInfo Wide> {
|
||||
def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.VT Narrow.RC:$src2), timm:$cc)),
|
||||
def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.VT Narrow.RC:$src2), timm:$cc)),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(InstStr##Zrri)
|
||||
(!cast<Instruction>(InstStr#"Zrri")
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
|
||||
timm:$cc), Narrow.KRC)>;
|
||||
|
||||
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
|
||||
(OpNode_su (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.VT Narrow.RC:$src2), timm:$cc))),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
|
||||
(X86cmpm_su (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.VT Narrow.RC:$src2), timm:$cc))),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
|
||||
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
|
||||
timm:$cc), Narrow.KRC)>;
|
||||
|
||||
// Broadcast load.
|
||||
def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(InstStr#"Zrmbi")
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
addr:$src2, timm:$cc), Narrow.KRC)>;
|
||||
|
||||
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
|
||||
(X86cmpm_su (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
|
||||
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
addr:$src2, timm:$cc), Narrow.KRC)>;
|
||||
|
||||
// Commuted with broadcast load.
|
||||
def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
|
||||
(Narrow.VT Narrow.RC:$src1), timm:$cc)),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(InstStr#"Zrmbi")
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
|
||||
|
||||
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
|
||||
(X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
|
||||
(Narrow.VT Narrow.RC:$src1), timm:$cc))),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
|
||||
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
|
@ -3234,6 +3332,18 @@ let Predicates = [HasAVX512, NoVLX] in {
|
|||
|
||||
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
|
||||
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
|
||||
|
||||
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
|
||||
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
|
||||
|
||||
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
|
||||
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
|
||||
|
||||
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
|
||||
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
|
||||
|
||||
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
|
||||
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
|
||||
}
|
||||
|
||||
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
|
||||
|
@ -3248,10 +3358,22 @@ let Predicates = [HasAVX512, NoVLX] in {
|
|||
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
|
||||
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
|
||||
|
||||
defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>;
|
||||
defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>;
|
||||
defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>;
|
||||
defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>;
|
||||
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
|
||||
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
|
||||
|
||||
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
|
||||
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
|
||||
|
||||
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
|
||||
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
|
||||
|
||||
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
|
||||
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
|
||||
|
||||
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
|
||||
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
|
||||
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
|
||||
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
|
||||
}
|
||||
|
||||
let Predicates = [HasBWI, NoVLX] in {
|
||||
|
|
|
@ -861,8 +861,7 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nou
|
|||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512-NEXT: vbroadcastsd (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x19,0x17]
|
||||
; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
|
||||
; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
|
||||
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
|
||||
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512-NEXT: retq ## encoding: [0xc3]
|
||||
|
@ -887,8 +886,7 @@ define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, double* %
|
|||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512-NEXT: vbroadcastsd (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x19,0x17]
|
||||
; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
|
||||
; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
|
||||
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
|
||||
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512-NEXT: retq ## encoding: [0xc3]
|
||||
|
@ -913,9 +911,7 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nou
|
|||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512-NEXT: vmovddup (%rdi), %xmm2 ## encoding: [0xc5,0xfb,0x12,0x17]
|
||||
; AVX512-NEXT: ## xmm2 = mem[0,0]
|
||||
; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
|
||||
; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
|
||||
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
|
||||
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
|
@ -941,9 +937,7 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, double* %
|
|||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512-NEXT: vmovddup (%rdi), %xmm2 ## encoding: [0xc5,0xfb,0x12,0x17]
|
||||
; AVX512-NEXT: ## xmm2 = mem[0,0]
|
||||
; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
|
||||
; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
|
||||
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
|
||||
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
|
@ -1002,8 +996,7 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) noun
|
|||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512-NEXT: vbroadcastss (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x18,0x17]
|
||||
; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
|
||||
; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
|
||||
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
|
||||
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512-NEXT: retq ## encoding: [0xc3]
|
||||
|
@ -1028,8 +1021,7 @@ define <8 x float> @test41_commute(<8 x float> %x, <8 x float> %x1, float* %p
|
|||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512-NEXT: vbroadcastss (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x18,0x17]
|
||||
; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
|
||||
; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
|
||||
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
|
||||
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512-NEXT: retq ## encoding: [0xc3]
|
||||
|
@ -1054,8 +1046,7 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun
|
|||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0xc4,0xe2,0x79,0x18,0x17]
|
||||
; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
|
||||
; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
|
||||
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
|
||||
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
|
@ -1081,8 +1072,7 @@ define <4 x float> @test42_commute(<4 x float> %x, <4 x float> %x1, float* %p
|
|||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0xc4,0xe2,0x79,0x18,0x17]
|
||||
; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
|
||||
; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
|
||||
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
|
||||
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
|
|
|
@ -378,8 +378,7 @@ define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind
|
|||
; NoVLX: # %bb.0:
|
||||
; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2
|
||||
; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
|
||||
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
|
||||
; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; NoVLX-NEXT: retq
|
||||
|
@ -402,8 +401,7 @@ define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind
|
|||
; NoVLX: # %bb.0:
|
||||
; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm2
|
||||
; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1
|
||||
; NoVLX-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
|
||||
; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; NoVLX-NEXT: retq
|
||||
|
@ -429,8 +427,7 @@ define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32
|
|||
; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; NoVLX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1
|
||||
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm2
|
||||
; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 {%k1}
|
||||
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
|
||||
; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; NoVLX-NEXT: retq
|
||||
|
@ -458,8 +455,7 @@ define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64
|
|||
; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; NoVLX-NEXT: vpcmpnltq %zmm2, %zmm1, %k1
|
||||
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2
|
||||
; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
|
||||
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k1 {%k1}
|
||||
; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; NoVLX-NEXT: retq
|
||||
|
@ -937,8 +933,7 @@ define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind
|
|||
; NoVLX: # %bb.0:
|
||||
; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2
|
||||
; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
|
||||
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
|
||||
; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; NoVLX-NEXT: retq
|
||||
|
@ -961,8 +956,7 @@ define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind
|
|||
; NoVLX: # %bb.0:
|
||||
; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2
|
||||
; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1
|
||||
; NoVLX-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
|
||||
; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; NoVLX-NEXT: retq
|
||||
|
@ -988,8 +982,7 @@ define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32
|
|||
; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; NoVLX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1
|
||||
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2
|
||||
; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 {%k1}
|
||||
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
|
||||
; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; NoVLX-NEXT: retq
|
||||
|
@ -1017,8 +1010,7 @@ define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64
|
|||
; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; NoVLX-NEXT: vpcmpnltq %zmm2, %zmm1, %k1
|
||||
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2
|
||||
; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
|
||||
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k1 {%k1}
|
||||
; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; NoVLX-NEXT: retq
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -608,10 +608,8 @@ define <4 x i1> @bitselect_v4i1_loop(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; AVX512F: # %bb.0: # %bb
|
||||
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [12,12,12,12]
|
||||
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
|
||||
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15]
|
||||
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm1, %k2
|
||||
; AVX512F-NEXT: vpcmpeqd {{.*}}(%rip){1to16}, %zmm1, %k1
|
||||
; AVX512F-NEXT: vpcmpeqd {{.*}}(%rip){1to16}, %zmm1, %k2
|
||||
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 {%k2}
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
|
||||
; AVX512F-NEXT: korw %k0, %k1, %k1
|
||||
|
|
|
@ -156,8 +156,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
|
|||
; AVX512F-LABEL: trunc_usat_v4i64_v4i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
|
||||
; AVX512F-NEXT: vpcmpltuq %zmm1, %zmm0, %k1
|
||||
; AVX512F-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
|
||||
; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
|
||||
; AVX512F-NEXT: vpmovqd %zmm1, %ymm0
|
||||
|
@ -177,8 +176,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
|
|||
; AVX512BW-LABEL: trunc_usat_v4i64_v4i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
|
||||
; AVX512BW-NEXT: vpcmpltuq %zmm1, %zmm0, %k1
|
||||
; AVX512BW-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpmovqd %zmm1, %ymm0
|
||||
|
|
Loading…
Reference in New Issue