[X86] Further rearrange the setOperationAction calls to separate the ones that require 512-bit registers OR VLX into separate sections. NFCI

We have several instructions that were introduced in AVX512F that are only available in 512-bit form on KNL. We still make use of them for 128/256 by artificially widening and extracting during isel.

This commit separates these operations from the true 512-bit operations. This way we can qualify the normal 512-bit operations with needing 512-bit register support. And these special operations will get qualified with needing 512-bit registers OR VLX.

The 512-bit register qualification will be introduced in a future patch this just gets everything grouped to minimize deltas on that patch.

llvm-svn: 320782
This commit is contained in:
Craig Topper 2017-12-15 01:03:43 +00:00
parent 07a28f777e
commit 5ebf3ac9c2
1 changed files with 83 additions and 66 deletions

View File

@ -1224,13 +1224,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
@ -1249,16 +1244,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasDQI()) {
for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) {
setOperationAction(ISD::SINT_TO_FP, VT, Legal);
setOperationAction(ISD::UINT_TO_FP, VT, Legal);
setOperationAction(ISD::FP_TO_SINT, VT, Legal);
setOperationAction(ISD::FP_TO_UINT, VT, Legal);
}
}
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
@ -1298,11 +1283,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
setOperationAction(ISD::ABS, MVT::v4i64, Legal);
setOperationAction(ISD::ABS, MVT::v2i64, Legal);
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
@ -1314,19 +1294,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
}
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
}
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64,
MVT::v8i64}) {
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
}
@ -1338,36 +1305,28 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
if (Subtarget.hasDQI()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
}
if (Subtarget.hasCDI()) {
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
MVT::v4i64, MVT::v8i64}) {
for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
setOperationAction(ISD::CTLZ, VT, Legal);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
} // Subtarget.hasCDI()
if (Subtarget.hasDQI()) {
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v4i64, Legal);
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
}
if (Subtarget.hasVPOPCNTDQ()) {
// VPOPCNTDQ sub-targets extend 128/256 vectors to use the avx512
// version of popcntd/q.
for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v8i32, MVT::v4i64,
MVT::v4i32, MVT::v2i64})
for (auto VT : { MVT::v16i32, MVT::v8i64 })
setOperationAction(ISD::CTPOP, VT, Legal);
}
// Custom lower several nodes.
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
setOperationAction(ISD::MSCATTER, VT, Custom);
// Extract subvector is special because the value type
// (result) is 256-bit but the source is 512-bit wide.
// 128-bit was made Legal under AVX1.
@ -1394,6 +1353,59 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}// has AVX-512
if (!Subtarget.useSoftFloat() &&
(Subtarget.hasAVX512() || Subtarget.hasVLX())) {
// These operations are handled on non-VLX by artificially widening in
// isel patterns.
// TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
}
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
}
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
setOperationAction(ISD::MSCATTER, VT, Custom);
if (Subtarget.hasDQI()) {
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::SINT_TO_FP, VT, Legal);
setOperationAction(ISD::UINT_TO_FP, VT, Legal);
setOperationAction(ISD::FP_TO_SINT, VT, Legal);
setOperationAction(ISD::FP_TO_UINT, VT, Legal);
setOperationAction(ISD::MUL, VT, Legal);
}
}
if (Subtarget.hasCDI()) {
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::CTLZ, VT, Legal);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
} // Subtarget.hasCDI()
if (Subtarget.hasVPOPCNTDQ()) {
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
setOperationAction(ISD::CTPOP, VT, Legal);
}
}
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
@ -1458,13 +1470,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
// We can custom lower these using 512-bit vectors. If we have VLX,
// they will be made legal later.
for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
}
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
@ -1492,8 +1497,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasBITALG()) {
for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v32i8,
MVT::v16i16, MVT::v16i8, MVT::v8i16 })
for (auto VT : { MVT::v64i8, MVT::v32i16 })
setOperationAction(ISD::CTPOP, VT, Legal);
}
}
if (!Subtarget.useSoftFloat() &&
(Subtarget.hasBWI() || Subtarget.hasVLX())) {
for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
}
// These operations are handled on non-VLX by artificially widening in
// isel patterns.
// TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
if (Subtarget.hasBITALG()) {
for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
setOperationAction(ISD::CTPOP, VT, Legal);
}
}
@ -1542,6 +1563,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
if (Subtarget.hasDQI()) {
// TODO: these shouldn't require VLX. We can widen to 512-bit with AVX512F.
// Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
// v2f32 UINT_TO_FP is already custom under SSE2.
setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
@ -1555,11 +1577,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasBWI()) {
setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
}
}
}