[RISCV][Clang] Add RVV Vector Indexed Load intrinsic functions.

Support Complex type transformer to define more complexity legal type.

Overall our downstream implementation there are only four instructions need to
use complex type transformer, it's not a common case.
I still feel using a string for prototypes is simple and clear.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D98848
This commit is contained in:
Zakk Chen 2021-03-17 20:25:32 -07:00
parent 54a40606e8
commit 88c2d4c8eb
4 changed files with 12379 additions and 19 deletions

View File

@ -84,6 +84,13 @@
// elements of the same width // elements of the same width
// S: given a vector type, computes its equivalent one for LMUL=1. This is a // S: given a vector type, computes its equivalent one for LMUL=1. This is a
// no-op if the vector was already LMUL=1 // no-op if the vector was already LMUL=1
// (Log2EEW:Value): Log2EEW value could be 3/4/5/6 (8/16/32/64), given a
// vector type (SEW and LMUL) and EEW (8/16/32/64), computes its
// equivalent integer vector type with EEW and corresponding ELMUL (elmul =
// (eew/sew) * lmul). For example, vector type is __rvv_float16m4
// (SEW=16, LMUL=4) and Log2EEW is 3 (EEW=8), and then equivalent vector
// type is __rvv_uint8m2_t (elmul=(8/16)*4 = 2). Ignore to define a new
// builtins if its equivalent type has illegal lmul.
// //
// Following with the example above, if t is "i", then "Ue" will yield unsigned // Following with the example above, if t is "i", then "Ue" will yield unsigned
// int and "Fv" will yield __rvv_float32m1_t (again assuming LMUL=1), Fw would // int and "Fv" will yield __rvv_float32m1_t (again assuming LMUL=1), Fw would
@ -210,6 +217,12 @@ multiclass RVVBinBuiltinSet<string intrinsic_name, string type_range,
} }
} }
defvar TypeList = ["c","s","i","l","f","d"];
defvar EEWList = [["8", "(Log2EEW:3)"],
["16", "(Log2EEW:4)"],
["32", "(Log2EEW:5)"],
["64", "(Log2EEW:6)"]];
class IsFloat<string type> { class IsFloat<string type> {
bit val = !or(!eq(type, "h"), !eq(type, "f"), !eq(type, "d")); bit val = !or(!eq(type, "h"), !eq(type, "f"), !eq(type, "d"));
} }
@ -236,6 +249,31 @@ multiclass RVVVLEBuiltin<list<string> types> {
} }
} }
multiclass RVVIndexedLoad<string op> {
let HasGeneric = false,
ManualCodegen = [{
IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[2]->getType()};
Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
}],
ManualCodegenMask = [{
IntrinsicTypes = {ResultType, Ops[2]->getType(), Ops[4]->getType()};
Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
}] in {
foreach type = TypeList in {
foreach eew_list = EEWList in {
defvar eew = eew_list[0];
defvar eew_type = eew_list[1];
let Name = op # eew # "_v", IRName = op, IRNameMask = op # "_mask" in {
def: RVVBuiltin<"v", "vPCe" # eew_type # "Uv", type>;
if !not(IsFloat<type>.val) then {
def: RVVBuiltin<"Uv", "UvPCUe" # eew_type # "Uv", type>;
}
}
}
}
}
}
multiclass RVVVSEBuiltin<list<string> types> { multiclass RVVVSEBuiltin<list<string> types> {
let Name = NAME # "_v", let Name = NAME # "_v",
IRName = "vse", IRName = "vse",
@ -345,6 +383,10 @@ defm vse16: RVVVSEBuiltin<["s"]>;
defm vse32: RVVVSEBuiltin<["i","f"]>; defm vse32: RVVVSEBuiltin<["i","f"]>;
defm vse64: RVVVSEBuiltin<["l","d"]>; defm vse64: RVVVSEBuiltin<["l","d"]>;
// 7.6. Vector Indexed Instructions
defm : RVVIndexedLoad<"vluxei">;
defm : RVVIndexedLoad<"vloxei">;
// 12. Vector Integer Arithmetic Instructions // 12. Vector Integer Arithmetic Instructions
// 12.1. Vector Single-Width Integer Add and Subtract // 12.1. Vector Single-Width Integer Add and Subtract
defm vadd : RVVBinBuiltinSet<"vadd", "csil", defm vadd : RVVBinBuiltinSet<"vadd", "csil",

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -40,7 +40,8 @@ public:
// Return the C/C++ string representation of LMUL // Return the C/C++ string representation of LMUL
std::string str() const; std::string str() const;
Optional<unsigned> getScale(unsigned ElementBitwidth) const; Optional<unsigned> getScale(unsigned ElementBitwidth) const;
LMULType &operator*=(unsigned RHS); void MulLog2LMUL(int Log2LMUL);
LMULType &operator*=(uint32_t RHS);
}; };
// This class is compact representation of a valid and invalid RVVType. // This class is compact representation of a valid and invalid RVVType.
@ -89,7 +90,13 @@ public:
const std::string &getTypeStr() const { return Str; } const std::string &getTypeStr() const { return Str; }
// Return the short name of a type for C/C++ name suffix. // Return the short name of a type for C/C++ name suffix.
const std::string &getShortStr() const { return ShortStr; } const std::string &getShortStr() {
// Not all types are used in short name, so compute the short name by
// demanded.
if (ShortStr.empty())
initShortStr();
return ShortStr;
}
bool isValid() const { return Valid; } bool isValid() const { return Valid; }
bool isScalar() const { return Scale.hasValue() && Scale.getValue() == 0; } bool isScalar() const { return Scale.hasValue() && Scale.getValue() == 0; }
@ -216,6 +223,8 @@ public:
/// Emit all the information needed to map builtin -> LLVM IR intrinsic. /// Emit all the information needed to map builtin -> LLVM IR intrinsic.
void createCodeGen(raw_ostream &o); void createCodeGen(raw_ostream &o);
std::string getSuffixStr(char Type, int Log2LMUL, StringRef Prototypes);
private: private:
/// Create all intrinsics and add them to \p Out /// Create all intrinsics and add them to \p Out
void createRVVIntrinsics(std::vector<std::unique_ptr<RVVIntrinsic>> &Out); void createRVVIntrinsics(std::vector<std::unique_ptr<RVVIntrinsic>> &Out);
@ -235,6 +244,10 @@ private:
// Emit the architecture preprocessor definitions. Return true when emits // Emit the architecture preprocessor definitions. Return true when emits
// non-empty string. // non-empty string.
bool emitExtDefStr(uint8_t Extensions, raw_ostream &o); bool emitExtDefStr(uint8_t Extensions, raw_ostream &o);
// Slice Prototypes string into sub prototype string and process each sub
// prototype string individually in the Handler.
void parsePrototypes(StringRef Prototypes,
std::function<void(StringRef)> Handler);
}; };
} // namespace } // namespace
@ -279,6 +292,8 @@ VScaleVal LMULType::getScale(unsigned ElementBitwidth) const {
return 1 << Log2ScaleResult; return 1 << Log2ScaleResult;
} }
void LMULType::MulLog2LMUL(int log2LMUL) { Log2LMUL += log2LMUL; }
LMULType &LMULType::operator*=(uint32_t RHS) { LMULType &LMULType::operator*=(uint32_t RHS) {
assert(isPowerOf2_32(RHS)); assert(isPowerOf2_32(RHS));
this->Log2LMUL = this->Log2LMUL + Log2_32(RHS); this->Log2LMUL = this->Log2LMUL + Log2_32(RHS);
@ -295,7 +310,6 @@ RVVType::RVVType(BasicType BT, int Log2LMUL, StringRef prototype)
initTypeStr(); initTypeStr();
if (isVector()) { if (isVector()) {
initClangBuiltinStr(); initClangBuiltinStr();
initShortStr();
} }
} }
} }
@ -318,6 +332,8 @@ RVVType::RVVType(BasicType BT, int Log2LMUL, StringRef prototype)
// clang-format on // clang-format on
bool RVVType::verifyType() const { bool RVVType::verifyType() const {
if (ScalarType == Invalid)
return false;
if (isScalar()) if (isScalar())
return true; return true;
if (!Scale.hasValue()) if (!Scale.hasValue())
@ -553,7 +569,8 @@ void RVVType::applyModifier(StringRef Transformer) {
if (Transformer.empty()) if (Transformer.empty())
return; return;
// Handle primitive type transformer // Handle primitive type transformer
switch (Transformer.back()) { auto PType = Transformer.back();
switch (PType) {
case 'e': case 'e':
Scale = 0; Scale = 0;
break; break;
@ -599,7 +616,40 @@ void RVVType::applyModifier(StringRef Transformer) {
} }
Transformer = Transformer.drop_back(); Transformer = Transformer.drop_back();
// Compute type transformers // Extract and compute complex type transformer. It can only appear one time.
if (Transformer.startswith("(")) {
size_t Idx = Transformer.find(')');
assert(Idx != StringRef::npos);
StringRef ComplexType = Transformer.slice(1, Idx);
Transformer = Transformer.drop_front(Idx + 1);
assert(Transformer.find('(') == StringRef::npos &&
"Only allow one complex type transformer");
auto UpdateAndCheckComplexProto = [&]() {
Scale = LMUL.getScale(ElementBitwidth);
const StringRef VectorPrototypes("vwqom");
if (!VectorPrototypes.contains(PType))
PrintFatalError("Complex type transformer only supports vector type!");
if (Transformer.find_first_of("PCKWS") != StringRef::npos)
PrintFatalError(
"Illegal type transformer for Complex type transformer");
};
auto ComplexTT = ComplexType.split(":");
if (ComplexTT.first == "Log2EEW") {
uint32_t Log2EEW;
ComplexTT.second.getAsInteger(10, Log2EEW);
// update new elmul = (eew/sew) * lmul
LMUL.MulLog2LMUL(Log2EEW - Log2_32(ElementBitwidth));
// update new eew
ElementBitwidth = 1 << Log2EEW;
ScalarType = ScalarTypeKind::SignedInteger;
UpdateAndCheckComplexProto();
} else {
PrintFatalError("Illegal complex type transformers!");
}
}
// Compute the remain type transformers
for (char I : Transformer) { for (char I : Transformer) {
switch (I) { switch (I) {
case 'P': case 'P':
@ -714,6 +764,7 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
// C type order: mask, op0, op1, ..., // C type order: mask, op0, op1, ...,
std::rotate(CTypeOrder.begin(), CTypeOrder.end() - 1, CTypeOrder.end()); std::rotate(CTypeOrder.begin(), CTypeOrder.end() - 1, CTypeOrder.end());
} }
// IntrinsicTypes is nonmasked version index. Need to update it // IntrinsicTypes is nonmasked version index. Need to update it
// if there is maskedoff operand (It is always in first operand). // if there is maskedoff operand (It is always in first operand).
IntrinsicTypes = NewIntrinsicTypes; IntrinsicTypes = NewIntrinsicTypes;
@ -876,8 +927,8 @@ void RVVEmitter::createHeader(raw_ostream &OS) {
OS << "#endif\n"; OS << "#endif\n";
OS << "#if defined(__riscv_d)\n"; OS << "#if defined(__riscv_d)\n";
for (int ELMul : Log2LMULs) { for (int Log2LMUL : Log2LMULs) {
auto T = computeType('d', ELMul, "v"); auto T = computeType('d', Log2LMUL, "v");
if (T.hasValue()) if (T.hasValue())
printType(T.getValue()); printType(T.getValue());
} }
@ -952,12 +1003,38 @@ void RVVEmitter::createCodeGen(raw_ostream &OS) {
OS << "\n"; OS << "\n";
} }
void RVVEmitter::parsePrototypes(StringRef Prototypes,
std::function<void(StringRef)> Handler) {
const StringRef Primaries("evwqom0ztc");
while (!Prototypes.empty()) {
size_t Idx = 0;
// Skip over complex prototype because it could contain primitive type
// character.
if (Prototypes[0] == '(')
Idx = Prototypes.find_first_of(')');
Idx = Prototypes.find_first_of(Primaries, Idx);
assert(Idx != StringRef::npos);
Handler(Prototypes.slice(0, Idx + 1));
Prototypes = Prototypes.drop_front(Idx + 1);
}
}
std::string RVVEmitter::getSuffixStr(char Type, int Log2LMUL,
StringRef Prototypes) {
SmallVector<std::string> SuffixStrs;
parsePrototypes(Prototypes, [&](StringRef Proto) {
auto T = computeType(Type, Log2LMUL, Proto);
SuffixStrs.push_back(T.getValue()->getShortStr());
});
return join(SuffixStrs, "_");
}
void RVVEmitter::createRVVIntrinsics( void RVVEmitter::createRVVIntrinsics(
std::vector<std::unique_ptr<RVVIntrinsic>> &Out) { std::vector<std::unique_ptr<RVVIntrinsic>> &Out) {
std::vector<Record *> RV = Records.getAllDerivedDefinitions("RVVBuiltin"); std::vector<Record *> RV = Records.getAllDerivedDefinitions("RVVBuiltin");
for (auto *R : RV) { for (auto *R : RV) {
StringRef Name = R->getValueAsString("Name"); StringRef Name = R->getValueAsString("Name");
StringRef Suffix = R->getValueAsString("Suffix"); StringRef SuffixProto = R->getValueAsString("Suffix");
StringRef MangledName = R->getValueAsString("MangledName"); StringRef MangledName = R->getValueAsString("MangledName");
StringRef Prototypes = R->getValueAsString("Prototype"); StringRef Prototypes = R->getValueAsString("Prototype");
StringRef TypeRange = R->getValueAsString("TypeRange"); StringRef TypeRange = R->getValueAsString("TypeRange");
@ -983,17 +1060,13 @@ void RVVEmitter::createRVVIntrinsics(
} }
// Parse prototype and create a list of primitive type with transformers // Parse prototype and create a list of primitive type with transformers
// (operand) in ProtoSeq. ProtoSeq[0] is output operand. // (operand) in ProtoSeq. ProtoSeq[0] is output operand.
SmallVector<std::string, 8> ProtoSeq; SmallVector<std::string> ProtoSeq;
const StringRef Primaries("evwqom0ztc"); parsePrototypes(Prototypes, [&ProtoSeq](StringRef Proto) {
while (!Prototypes.empty()) { ProtoSeq.push_back(Proto.str());
auto Idx = Prototypes.find_first_of(Primaries); });
assert(Idx != StringRef::npos);
ProtoSeq.push_back(Prototypes.slice(0, Idx + 1).str());
Prototypes = Prototypes.drop_front(Idx + 1);
}
// Compute Builtin types // Compute Builtin types
SmallVector<std::string, 8> ProtoMaskSeq = ProtoSeq; SmallVector<std::string> ProtoMaskSeq = ProtoSeq;
if (HasMask) { if (HasMask) {
// If HasMask, append 'm' to last operand. // If HasMask, append 'm' to last operand.
ProtoMaskSeq.push_back("m"); ProtoMaskSeq.push_back("m");
@ -1015,8 +1088,7 @@ void RVVEmitter::createRVVIntrinsics(
if (!Types.hasValue()) if (!Types.hasValue())
continue; continue;
auto SuffixStr = auto SuffixStr = getSuffixStr(I, Log2LMUL, SuffixProto);
computeType(I, Log2LMUL, Suffix).getValue()->getShortStr();
// Create a non-mask intrinsic // Create a non-mask intrinsic
Out.push_back(std::make_unique<RVVIntrinsic>( Out.push_back(std::make_unique<RVVIntrinsic>(
Name, SuffixStr, MangledName, IRName, HasSideEffects, Name, SuffixStr, MangledName, IRName, HasSideEffects,