[X86] Add broadcast load folding patterns to the NoVLX compare patterns.

These patterns use zmm registers for 128/256-bit compares when
the VLX instructions aren't available. Previously we only
supported registers, but as PR36191 notes we can fold broadcast
loads, but not regular loads.

llvm-svn: 373423
This commit is contained in:
Craig Topper 2019-10-02 04:45:02 +00:00
parent c3aab6eaaa
commit 8d6a863b02
6 changed files with 318 additions and 378 deletions

View File

@ -2392,7 +2392,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
(_.VT _.RC:$src1), cond))),
(!cast<Instruction>(Name#_.ZSuffix#"rmibk")
_.KRCWM:$mask, _.RC:$src1, addr:$src2,
(CommFrag.OperandTransform $cc))>;
(CommFrag_su.OperandTransform $cc))>;
}
multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
@ -3172,6 +3172,30 @@ multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
Narrow.KRC)>;
}
multiclass axv512_icmp_packed_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
// Broadcast load.
def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
(Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)))),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmb")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
addr:$src2),
Narrow.KRC)>;
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
(Frag_su (Narrow.VT Narrow.RC:$src1),
(Narrow.BroadcastLdFrag addr:$src2)))),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmbk")
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
addr:$src2),
Narrow.KRC)>;
}
// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
string InstStr,
@ -3180,7 +3204,7 @@ multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), cond)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr##Zrri)
(!cast<Instruction>(InstStr#"Zrri")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
(Frag.OperandTransform $cc)), Narrow.KRC)>;
@ -3189,34 +3213,108 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
(Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2),
cond)))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
(Frag.OperandTransform $cc)), Narrow.KRC)>;
(Frag_su.OperandTransform $cc)), Narrow.KRC)>;
}
multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
PatFrag CommFrag, PatFrag CommFrag_su,
string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
// Broadcast load.
def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
(Narrow.BroadcastLdFrag addr:$src2), cond)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmib")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
(Narrow.KVT
(Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
(Narrow.BroadcastLdFrag addr:$src2),
cond)))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
// Commuted with broadcast load.
def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
(Narrow.VT Narrow.RC:$src1),
cond)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmib")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
(Narrow.KVT
(CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
(Narrow.VT Narrow.RC:$src1),
cond)))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
}
// Same as above, but for fp types which don't use PatFrags.
multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su,
string InstStr,
multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), timm:$cc)),
def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr##Zrri)
(!cast<Instruction>(InstStr#"Zrri")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
timm:$cc), Narrow.KRC)>;
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
(OpNode_su (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), timm:$cc))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
(X86cmpm_su (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), timm:$cc))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
timm:$cc), Narrow.KRC)>;
// Broadcast load.
def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
(Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmbi")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
addr:$src2, timm:$cc), Narrow.KRC)>;
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
(X86cmpm_su (Narrow.VT Narrow.RC:$src1),
(Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
addr:$src2, timm:$cc), Narrow.KRC)>;
// Commuted with broadcast load.
def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
(Narrow.VT Narrow.RC:$src1), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmbi")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
(X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
(Narrow.VT Narrow.RC:$src1), timm:$cc))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
}
let Predicates = [HasAVX512, NoVLX] in {
@ -3234,6 +3332,18 @@ let Predicates = [HasAVX512, NoVLX] in {
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
}
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
@ -3248,10 +3358,22 @@ let Predicates = [HasAVX512, NoVLX] in {
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>;
defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>;
defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>;
defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>;
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
}
let Predicates = [HasBWI, NoVLX] in {

View File

@ -861,8 +861,7 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nou
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vbroadcastsd (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x19,0x17]
; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
@ -887,8 +886,7 @@ define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, double* %
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vbroadcastsd (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x19,0x17]
; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
@ -913,9 +911,7 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nou
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vmovddup (%rdi), %xmm2 ## encoding: [0xc5,0xfb,0x12,0x17]
; AVX512-NEXT: ## xmm2 = mem[0,0]
; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
@ -941,9 +937,7 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, double* %
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vmovddup (%rdi), %xmm2 ## encoding: [0xc5,0xfb,0x12,0x17]
; AVX512-NEXT: ## xmm2 = mem[0,0]
; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
@ -1002,8 +996,7 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) noun
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vbroadcastss (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x18,0x17]
; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
@ -1028,8 +1021,7 @@ define <8 x float> @test41_commute(<8 x float> %x, <8 x float> %x1, float* %p
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vbroadcastss (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x18,0x17]
; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
@ -1054,8 +1046,7 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0xc4,0xe2,0x79,0x18,0x17]
; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
@ -1081,8 +1072,7 @@ define <4 x float> @test42_commute(<4 x float> %x, <4 x float> %x1, float* %p
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0xc4,0xe2,0x79,0x18,0x17]
; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]

View File

@ -378,8 +378,7 @@ define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind
; NoVLX: # %bb.0:
; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2
; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; NoVLX-NEXT: retq
@ -402,8 +401,7 @@ define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind
; NoVLX: # %bb.0:
; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm2
; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1
; NoVLX-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; NoVLX-NEXT: retq
@ -429,8 +427,7 @@ define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32
; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; NoVLX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; NoVLX-NEXT: retq
@ -458,8 +455,7 @@ define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64
; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; NoVLX-NEXT: vpcmpnltq %zmm2, %zmm1, %k1
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; NoVLX-NEXT: retq
@ -937,8 +933,7 @@ define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind
; NoVLX: # %bb.0:
; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2
; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; NoVLX-NEXT: retq
@ -961,8 +956,7 @@ define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind
; NoVLX: # %bb.0:
; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2
; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1
; NoVLX-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; NoVLX-NEXT: retq
@ -988,8 +982,7 @@ define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32
; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; NoVLX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2
; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; NoVLX-NEXT: retq
@ -1017,8 +1010,7 @@ define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64
; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; NoVLX-NEXT: vpcmpnltq %zmm2, %zmm1, %k1
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2
; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; NoVLX-NEXT: retq

File diff suppressed because it is too large Load Diff

View File

@ -608,10 +608,8 @@ define <4 x i1> @bitselect_v4i1_loop(<4 x i32> %a0, <4 x i32> %a1) {
; AVX512F: # %bb.0: # %bb
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [12,12,12,12]
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15]
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm1, %k2
; AVX512F-NEXT: vpcmpeqd {{.*}}(%rip){1to16}, %zmm1, %k1
; AVX512F-NEXT: vpcmpeqd {{.*}}(%rip){1to16}, %zmm1, %k2
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 {%k2}
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; AVX512F-NEXT: korw %k0, %k1, %k1

View File

@ -156,8 +156,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; AVX512F-LABEL: trunc_usat_v4i64_v4i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
; AVX512F-NEXT: vpcmpltuq %zmm1, %zmm0, %k1
; AVX512F-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; AVX512F-NEXT: vpmovqd %zmm1, %ymm0
@ -177,8 +176,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; AVX512BW-LABEL: trunc_usat_v4i64_v4i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
; AVX512BW-NEXT: vpcmpltuq %zmm1, %zmm0, %k1
; AVX512BW-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; AVX512BW-NEXT: vpmovqd %zmm1, %ymm0