forked from OSchip/llvm-project
[x86][inline-asm][AVX512][llvm][PART-2]
Introducing "k" and "Yk" constraints for extended inline assembly, enabling use of AVX512 masked vectorized instructions. Commit on behalf of mharoush Extending inline assembly support, compatible with GCC as folowing: "k" constraint hints the compiler to select any of AVX512 k0-k7 registers. "Yk" constraint is a subset of "k" excluding k0 which is not allowd to be used as a mask. Reviewer: 1. rnk Differential Revision: https://reviews.llvm.org/D25062 llvm-svn: 285591
This commit is contained in:
parent
54bfd548aa
commit
68a5c53616
|
@ -32319,6 +32319,7 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
|
||||||
case 'Y':
|
case 'Y':
|
||||||
case 'l':
|
case 'l':
|
||||||
return C_RegisterClass;
|
return C_RegisterClass;
|
||||||
|
case 'k': // AVX512 masking registers.
|
||||||
case 'a':
|
case 'a':
|
||||||
case 'b':
|
case 'b':
|
||||||
case 'c':
|
case 'c':
|
||||||
|
@ -32342,6 +32343,19 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (Constraint.size() == 2) {
|
||||||
|
switch (Constraint[0]) {
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
case 'Y':
|
||||||
|
switch (Constraint[1]) {
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
case 'k':
|
||||||
|
return C_Register;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return TargetLowering::getConstraintType(Constraint);
|
return TargetLowering::getConstraintType(Constraint);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32385,16 +32399,28 @@ TargetLowering::ConstraintWeight
|
||||||
if (type->isX86_MMXTy() && Subtarget.hasMMX())
|
if (type->isX86_MMXTy() && Subtarget.hasMMX())
|
||||||
weight = CW_SpecificReg;
|
weight = CW_SpecificReg;
|
||||||
break;
|
break;
|
||||||
|
case 'Y':
|
||||||
|
// Other "Y<x>" (e.g. "Yk") constraints should be implemented below.
|
||||||
|
if (constraint[1] == 'k') {
|
||||||
|
// Support for 'Yk' (similarly to the 'k' variant below).
|
||||||
|
weight = CW_SpecificReg;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Else fall through (handle "Y" constraint).
|
||||||
|
LLVM_FALLTHROUGH;
|
||||||
case 'v':
|
case 'v':
|
||||||
if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
|
if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
|
||||||
weight = CW_Register;
|
weight = CW_Register;
|
||||||
LLVM_FALLTHROUGH;
|
LLVM_FALLTHROUGH;
|
||||||
case 'x':
|
case 'x':
|
||||||
case 'Y':
|
|
||||||
if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
|
if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
|
||||||
((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasFp256()))
|
((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasFp256()))
|
||||||
weight = CW_Register;
|
weight = CW_Register;
|
||||||
break;
|
break;
|
||||||
|
case 'k':
|
||||||
|
// Enable conditional vector operations using %k<#> registers.
|
||||||
|
weight = CW_SpecificReg;
|
||||||
|
break;
|
||||||
case 'I':
|
case 'I':
|
||||||
if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
|
if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
|
||||||
if (C->getZExtValue() <= 31)
|
if (C->getZExtValue() <= 31)
|
||||||
|
@ -32671,6 +32697,24 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||||
// TODO: Slight differences here in allocation order and leaving
|
// TODO: Slight differences here in allocation order and leaving
|
||||||
// RIP in the class. Do they matter any more here than they do
|
// RIP in the class. Do they matter any more here than they do
|
||||||
// in the normal allocation?
|
// in the normal allocation?
|
||||||
|
case 'k':
|
||||||
|
if (Subtarget.hasAVX512()) {
|
||||||
|
// Only supported in AVX512 or later.
|
||||||
|
switch (VT.SimpleTy) {
|
||||||
|
default: break;
|
||||||
|
case MVT::i32:
|
||||||
|
return std::make_pair(0U, &X86::VK32RegClass);
|
||||||
|
case MVT::i16:
|
||||||
|
return std::make_pair(0U, &X86::VK16RegClass);
|
||||||
|
case MVT::i8:
|
||||||
|
return std::make_pair(0U, &X86::VK8RegClass);
|
||||||
|
case MVT::i1:
|
||||||
|
return std::make_pair(0U, &X86::VK1RegClass);
|
||||||
|
case MVT::i64:
|
||||||
|
return std::make_pair(0U, &X86::VK64RegClass);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
|
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
|
||||||
if (Subtarget.is64Bit()) {
|
if (Subtarget.is64Bit()) {
|
||||||
if (VT == MVT::i32 || VT == MVT::f32)
|
if (VT == MVT::i32 || VT == MVT::f32)
|
||||||
|
@ -32772,6 +32816,29 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
} else if (Constraint.size() == 2 && Constraint[0] == 'Y') {
|
||||||
|
switch (Constraint[1]) {
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
case 'k':
|
||||||
|
// This register class doesn't allocate k0 for masked vector operation.
|
||||||
|
if (Subtarget.hasAVX512()) { // Only supported in AVX512.
|
||||||
|
switch (VT.SimpleTy) {
|
||||||
|
default: break;
|
||||||
|
case MVT::i32:
|
||||||
|
return std::make_pair(0U, &X86::VK32WMRegClass);
|
||||||
|
case MVT::i16:
|
||||||
|
return std::make_pair(0U, &X86::VK16WMRegClass);
|
||||||
|
case MVT::i8:
|
||||||
|
return std::make_pair(0U, &X86::VK8WMRegClass);
|
||||||
|
case MVT::i1:
|
||||||
|
return std::make_pair(0U, &X86::VK1WMRegClass);
|
||||||
|
case MVT::i64:
|
||||||
|
return std::make_pair(0U, &X86::VK64WMRegClass);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use the default implementation in TargetLowering to convert the register
|
// Use the default implementation in TargetLowering to convert the register
|
||||||
|
|
Loading…
Reference in New Issue