[X86] Introduce X86SchedWriteWidths schedule wrapper for different vector widths.

We need to split most of the scheduler classes by vector width to remove more of the InstRW overrides, this patch should make this easier/tidier by allowing us to pass the X86SchedWriteWidths wrapper to multi-width multiclasses and then split as required.

I've included fields for Scl (scalar float/double), MMX (MMX integer), XMM, YMM and ZMM widths. These fields mostly share the same classes but it should give us the flexibility that we may need in the future.

This patch has replaced a set of example SSE/AVX512 instruction cases but isn't exhaustive as it gets very noisy before we really need the functionality.

Differential Revision: https://reviews.llvm.org/D46266

llvm-svn: 331208
This commit is contained in:
Simon Pilgrim 2018-04-30 18:18:38 +00:00
parent 395ab2e212
commit 3c35408e48
3 changed files with 160 additions and 93 deletions

View File

@ -1931,45 +1931,47 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
}
}
multiclass blendmask_dq<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched128,
X86FoldableSchedWrite sched256,
multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo> {
defm Z : WriteFVarBlendask <opc, OpcodeStr, sched256, VTInfo.info512>,
WriteFVarBlendask_rmb <opc, OpcodeStr, sched256, VTInfo.info512>, EVEX_V512;
defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
EVEX_V512;
let Predicates = [HasVLX] in {
defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched256, VTInfo.info256>,
WriteFVarBlendask_rmb<opc, OpcodeStr, sched256, VTInfo.info256>, EVEX_V256;
defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched128, VTInfo.info128>,
WriteFVarBlendask_rmb<opc, OpcodeStr, sched128, VTInfo.info128>, EVEX_V128;
defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
EVEX_V256;
defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
EVEX_V128;
}
}
multiclass blendmask_bw<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched128,
X86FoldableSchedWrite sched256,
multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo> {
let Predicates = [HasBWI] in
defm Z : WriteFVarBlendask<opc, OpcodeStr, sched256, VTInfo.info512>, EVEX_V512;
defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
EVEX_V512;
let Predicates = [HasBWI, HasVLX] in {
defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched256, VTInfo.info256>, EVEX_V256;
defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched128, VTInfo.info128>, EVEX_V128;
defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
EVEX_V256;
defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
EVEX_V128;
}
}
defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", WriteFVarBlend, WriteFVarBlendY,
defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
avx512vl_f32_info>;
defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", WriteFVarBlend, WriteFVarBlendY,
defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
avx512vl_f64_info>, VEX_W;
defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", WriteVarBlend, WriteVarBlend,
defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
avx512vl_i32_info>;
defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", WriteVarBlend, WriteVarBlend,
defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
avx512vl_i64_info>, VEX_W;
defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", WriteVarBlend, WriteVarBlend,
defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
avx512vl_i8_info>;
defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", WriteVarBlend, WriteVarBlend,
defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
avx512vl_i16_info>, VEX_W;
//===----------------------------------------------------------------------===//
@ -5508,43 +5510,43 @@ multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo> {
let Predicates = [HasAVX512] in
defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, sched,
VTInfo.info512>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched,
defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
sched.ZMM, VTInfo.info512>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
VTInfo.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, sched,
VTInfo.info256>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched,
defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
sched.YMM, VTInfo.info256>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
VTInfo.info256>, EVEX_V256;
defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, sched,
VTInfo.info128>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched,
defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
sched.XMM, VTInfo.info128>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
VTInfo.info128>, EVEX_V128;
}
}
multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched> {
X86SchedWriteWidths sched> {
let Predicates = [HasBWI] in
defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
sched, v32i16_info>, EVEX_V512, VEX_WIG;
sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
sched, v16i16x_info>, EVEX_V256, VEX_WIG;
sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
sched, v8i16x_info>, EVEX_V128, VEX_WIG;
sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
}
}
multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched> {
X86SchedWriteWidths sched> {
defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
@ -5552,24 +5554,24 @@ multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
}
defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
WriteVecShift>,
SchedWriteVecShift>,
avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
WriteVecShift>, AVX512BIi8Base, EVEX_4V;
SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
WriteVecShift>,
SchedWriteVecShift>,
avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
WriteVecShift>, AVX512BIi8Base, EVEX_4V;
SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V;
defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
WriteVecShift>,
SchedWriteVecShift>,
avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
WriteVecShift>, AVX512BIi8Base, EVEX_4V;
SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V;
defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
WriteVecShift>, AVX512BIi8Base, EVEX_4V;
SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V;
defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
WriteVecShift>, AVX512BIi8Base, EVEX_4V;
SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, WriteVecShift>;
defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, WriteVecShift>;
@ -5975,28 +5977,27 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
X86FoldableSchedWrite sched128,
X86FoldableSchedWrite sched256,
X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _,
AVX512VLVectorVTInfo Ctrl> {
let Predicates = [HasAVX512] in {
defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched256,
defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
_.info512, Ctrl.info512>, EVEX_V512;
}
let Predicates = [HasAVX512, HasVLX] in {
defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched128,
defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
_.info128, Ctrl.info128>, EVEX_V128;
defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched256,
defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
_.info256, Ctrl.info256>, EVEX_V256;
}
}
multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, WriteFVarShuffle,
WriteFVarShuffleY, _, Ctrl>;
defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
_, Ctrl>;
defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
X86VPermilpi, WriteFShuffle, _>,
X86VPermilpi, SchedWriteFShuffle, _>,
EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
@ -6012,12 +6013,14 @@ defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
//===----------------------------------------------------------------------===//
defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
X86PShufd, WriteShuffle, avx512vl_i32_info>,
X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
X86PShufhw, WriteShuffle>, EVEX, AVX512XSIi8Base;
X86PShufhw, SchedWriteShuffle>,
EVEX, AVX512XSIi8Base;
defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
X86PShuflw, WriteShuffle>, EVEX, AVX512XDIi8Base;
X86PShuflw, SchedWriteShuffle>,
EVEX, AVX512XDIi8Base;
multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched> {

View File

@ -2332,41 +2332,41 @@ defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
/// There are no patterns here because isel prefers integer versions for SSE2
/// and later. There are SSE1 v4f32 patterns later.
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
SDNode OpNode, X86SchedWriteWidths sched> {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem, WriteFLogicY,
!strconcat(OpcodeStr, "ps"), f256mem, sched.YMM,
[], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem, WriteFLogicY,
!strconcat(OpcodeStr, "pd"), f256mem, sched.YMM,
[], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, WriteFLogic,
!strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
[], [], 0>, PS, VEX_4V, VEX_WIG;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem, WriteFLogic,
!strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
[], [], 0>, PD, VEX_4V, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, WriteFLogic,
!strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
[], []>, PS;
defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem, WriteFLogic,
!strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
[], []>, PD;
}
}
defm AND : sse12_fp_packed_logical<0x54, "and", and>;
defm OR : sse12_fp_packed_logical<0x56, "or", or>;
defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
defm AND : sse12_fp_packed_logical<0x54, "and", and, SchedWriteFLogic>;
defm OR : sse12_fp_packed_logical<0x56, "or", or, SchedWriteFLogic>;
defm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>;
let isCommutable = 0 in
defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>;
// If only AVX1 is supported, we need to handle integer operations with
// floating point instructions since the integer versions aren't available.
@ -6053,42 +6053,42 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
let Predicates = [HasAVX] in {
defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32,
VR128, loadv4f32, f128mem, 0, SSEPackedSingle,
WriteFBlend, BlendCommuteImm4>,
SchedWriteFBlend.XMM, BlendCommuteImm4>,
VEX_4V, VEX_WIG;
defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32,
VR256, loadv8f32, f256mem, 0, SSEPackedSingle,
WriteFBlendY, BlendCommuteImm8>,
SchedWriteFBlend.YMM, BlendCommuteImm8>,
VEX_4V, VEX_L, VEX_WIG;
defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
VR128, loadv2f64, f128mem, 0, SSEPackedDouble,
WriteFBlend, BlendCommuteImm2>,
SchedWriteFBlend.XMM, BlendCommuteImm2>,
VEX_4V, VEX_WIG;
defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
VR256, loadv4f64, f256mem, 0, SSEPackedDouble,
WriteFBlendY, BlendCommuteImm4>,
SchedWriteFBlend.YMM, BlendCommuteImm4>,
VEX_4V, VEX_L, VEX_WIG;
defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
VR128, loadv2i64, i128mem, 0, SSEPackedInt,
WriteBlend, BlendCommuteImm8>,
SchedWriteBlend.XMM, BlendCommuteImm8>,
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2] in {
defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
VR256, loadv4i64, i256mem, 0, SSEPackedInt,
WriteBlend, BlendCommuteImm8>,
SchedWriteBlend.YMM, BlendCommuteImm8>,
VEX_4V, VEX_L, VEX_WIG;
}
defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
VR128, memopv4f32, f128mem, 1, SSEPackedSingle,
WriteFBlend, BlendCommuteImm4>;
SchedWriteFBlend.XMM, BlendCommuteImm4>;
defm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64,
VR128, memopv2f64, f128mem, 1, SSEPackedDouble,
WriteFBlend, BlendCommuteImm2>;
SchedWriteFBlend.XMM, BlendCommuteImm2>;
defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
VR128, memopv2i64, i128mem, 1, SSEPackedInt,
WriteBlend, BlendCommuteImm8>;
SchedWriteBlend.XMM, BlendCommuteImm8>;
// For insertion into the zero index (low half) of a 256-bit vector, it is
// more efficient to generate a blend with immediate instead of an insert*128.
@ -6135,28 +6135,28 @@ let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedDouble in {
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
loadv2f64, int_x86_sse41_blendvpd,
WriteFVarBlend>;
SchedWriteFVarBlend.XMM>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
loadv4f64, int_x86_avx_blendv_pd_256,
WriteFVarBlendY>, VEX_L;
SchedWriteFVarBlend.YMM>, VEX_L;
} // ExeDomain = SSEPackedDouble
let ExeDomain = SSEPackedSingle in {
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
loadv4f32, int_x86_sse41_blendvps,
WriteFVarBlend>;
SchedWriteFVarBlend.XMM>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
loadv8f32, int_x86_avx_blendv_ps_256,
WriteFVarBlendY>, VEX_L;
SchedWriteFVarBlend.YMM>, VEX_L;
} // ExeDomain = SSEPackedSingle
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
loadv2i64, int_x86_sse41_pblendvb,
WriteVarBlend>;
SchedWriteVarBlend.XMM>;
}
let Predicates = [HasAVX2] in {
defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
loadv4i64, int_x86_avx2_pblendvb,
WriteVarBlend>, VEX_L;
SchedWriteVarBlend.YMM>, VEX_L;
}
let Predicates = [HasAVX] in {
@ -6265,12 +6265,12 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedDouble in
defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem,
int_x86_sse41_blendvpd, WriteFVarBlend>;
int_x86_sse41_blendvpd, SchedWriteFVarBlend.XMM>;
let ExeDomain = SSEPackedSingle in
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem,
int_x86_sse41_blendvps, WriteFVarBlend>;
int_x86_sse41_blendvps, SchedWriteFVarBlend.XMM>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem,
int_x86_sse41_pblendvb, WriteVarBlend>;
int_x86_sse41_pblendvb, SchedWriteVarBlend.XMM>;
// Aliases with the implicit xmm0 argument
def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
@ -7120,18 +7120,18 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
let ExeDomain = SSEPackedSingle in {
defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
loadv2i64, v4f32, v4i32, WriteFShuffle,
WriteFVarShuffle>;
SchedWriteFVarShuffle.XMM>;
defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
loadv4i64, v8f32, v8i32, WriteFShuffle,
WriteFVarShuffleY>, VEX_L;
SchedWriteFVarShuffle.YMM>, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
loadv2i64, v2f64, v2i64, WriteFShuffle,
WriteFVarShuffle>;
SchedWriteFVarShuffle.XMM>;
defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
loadv4i64, v4f64, v4i64, WriteFShuffle,
WriteFVarShuffleY>, VEX_L;
SchedWriteFVarShuffle.YMM>, VEX_L;
}
//===----------------------------------------------------------------------===//
@ -7307,11 +7307,12 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
(commuteXForm imm:$src3))>;
}
defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32, WriteBlend,
VR128, loadv2i64, i128mem, BlendCommuteImm4>;
defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, WriteBlend,
VR256, loadv4i64, i256mem, BlendCommuteImm8>,
VEX_L;
defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32,
SchedWriteBlend.XMM, VR128, loadv2i64, i128mem,
BlendCommuteImm4>;
defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
SchedWriteBlend.YMM, VR256, loadv4i64, i256mem,
BlendCommuteImm8>, VEX_L;
// For insertion into the zero index (low half) of a 256-bit vector, it is
// more efficient to generate a blend with immediate instead of an insert*128.

View File

@ -38,6 +38,18 @@ multiclass X86SchedWritePair {
}
}
// Multiclass that wraps X86FoldableSchedWrite for each vector width.
class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
X86FoldableSchedWrite s128,
X86FoldableSchedWrite s256,
X86FoldableSchedWrite s512> {
X86FoldableSchedWrite Scl = sScl; // Scalar float/double operations.
X86FoldableSchedWrite MMX = sScl; // MMX operations.
X86FoldableSchedWrite XMM = s128; // XMM operations.
X86FoldableSchedWrite YMM = s256; // YMM operations.
X86FoldableSchedWrite ZMM = s512; // ZMM operations.
}
// Loads, stores, and moves, not folded with other operations.
def WriteLoad : SchedWrite;
def WriteStore : SchedWrite;
@ -185,6 +197,57 @@ def WriteFence : SchedWrite;
// Nop, not very useful expect it provides a model for nops!
def WriteNop : SchedWrite;
// Vector width wrappers.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAdd, WriteFAdd, WriteFAdd>;
def SchedWriteFCmp
: X86SchedWriteWidths<WriteFCmp, WriteFCmp, WriteFCmp, WriteFCmp>;
def SchedWriteFMul
: X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMul, WriteFMul>;
def SchedWriteFDiv
: X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDiv, WriteFDiv>;
def SchedWriteFLogic
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicY>;
def SchedWriteFShuffle
: X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
WriteFShuffle, WriteFShuffle>;
def SchedWriteFVarShuffle
: X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
WriteFVarShuffleY, WriteFVarShuffleY>;
def SchedWriteFBlend
: X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendY>;
def SchedWriteFVarBlend
: X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
WriteFVarBlendY, WriteFVarBlendY>;
def SchedWriteVecALU
: X86SchedWriteWidths<WriteVecALU, WriteVecALU, WriteVecALU, WriteVecALU>;
def SchedWriteVecLogic
: X86SchedWriteWidths<WriteVecLogic, WriteVecLogic,
WriteVecLogic, WriteVecLogic>;
def SchedWriteVecShift
: X86SchedWriteWidths<WriteVecShift, WriteVecShift,
WriteVecShift, WriteVecShift>;
def SchedWriteVecIMul
: X86SchedWriteWidths<WriteVecIMul, WriteVecIMul,
WriteVecIMul, WriteVecIMul>;
def SchedWritePMULLD
: X86SchedWriteWidths<WritePMULLD, WritePMULLD,
WritePMULLD, WritePMULLD>;
def SchedWriteShuffle
: X86SchedWriteWidths<WriteShuffle, WriteShuffle,
WriteShuffle, WriteShuffle>;
def SchedWriteVarShuffle
: X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffle,
WriteVarShuffle, WriteVarShuffle>;
def SchedWriteBlend
: X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlend, WriteBlend>;
def SchedWriteVarBlend
: X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
WriteVarBlend, WriteVarBlend>;
//===----------------------------------------------------------------------===//
// Generic Processor Scheduler Models.