forked from OSchip/llvm-project
Comment parsing: resolve more named character references
This reimplements r173850 with a better approach: (1) use a TableGen-generated matcher instead of doing a linear search; (2) avoid allocations for new strings by converting code points to string iterals with TableGen. llvm-svn: 173931
This commit is contained in:
parent
32832e6176
commit
28800da1b3
|
@ -33,6 +33,10 @@ clang_tablegen(CommentHTMLTagsProperties.inc -gen-clang-comment-html-tags-proper
|
|||
SOURCE CommentHTMLTags.td
|
||||
TARGET ClangCommentHTMLTagsProperties)
|
||||
|
||||
clang_tablegen(CommentHTMLNamedCharacterReferences.inc -gen-clang-comment-html-named-character-references
|
||||
SOURCE CommentHTMLNamedCharacterReferences.td
|
||||
TARGET ClangCommentHTMLNamedCharacterReferences)
|
||||
|
||||
clang_tablegen(CommentCommandInfo.inc -gen-clang-comment-command-info
|
||||
SOURCE CommentCommands.td
|
||||
TARGET ClangCommentCommandInfo)
|
||||
|
|
|
@ -0,0 +1,177 @@
|
|||
// HTML Named Character Reference
|
||||
class NCR<string spelling, int codePoint> {
|
||||
string Spelling = spelling;
|
||||
int CodePoint = codePoint;
|
||||
}
|
||||
|
||||
// The list below includes named character references supported by Doxygen:
|
||||
// http://www.stack.nl/~dimitri/doxygen/manual/htmlcmds.html
|
||||
//
|
||||
// It does not include all HTML 5 named character references.
|
||||
//
|
||||
// Corresponding code point values can be found here:
|
||||
// http://www.w3.org/TR/2011/WD-html5-20110113/named-character-references.html
|
||||
|
||||
def : NCR<"copy", 0x000A9>;
|
||||
def : NCR<"COPY", 0x000A9>;
|
||||
def : NCR<"trade", 0x02122>;
|
||||
def : NCR<"TRADE", 0x02122>;
|
||||
def : NCR<"reg", 0x000AE>;
|
||||
def : NCR<"REG", 0x000AE>;
|
||||
def : NCR<"lt", 0x0003C>;
|
||||
def : NCR<"Lt", 0x0003C>;
|
||||
def : NCR<"LT", 0x0003C>;
|
||||
def : NCR<"gt", 0x0003E>;
|
||||
def : NCR<"Gt", 0x0003E>;
|
||||
def : NCR<"GT", 0x0003E>;
|
||||
def : NCR<"amp", 0x00026>;
|
||||
def : NCR<"AMP", 0x00026>;
|
||||
def : NCR<"apos", 0x00027>;
|
||||
def : NCR<"quot", 0x00022>;
|
||||
def : NCR<"QUOT", 0x00022>;
|
||||
def : NCR<"lsquo", 0x02018>;
|
||||
def : NCR<"rsquo", 0x02019>;
|
||||
def : NCR<"ldquo", 0x0201C>;
|
||||
def : NCR<"rdquo", 0x0201D>;
|
||||
def : NCR<"ndash", 0x02013>;
|
||||
def : NCR<"mdash", 0x02014>;
|
||||
|
||||
def : NCR<"Auml", 0x000C4>;
|
||||
def : NCR<"Euml", 0x000CB>;
|
||||
def : NCR<"Iuml", 0x000CF>;
|
||||
def : NCR<"Ouml", 0x000D6>;
|
||||
def : NCR<"Uuml", 0x000DC>;
|
||||
def : NCR<"Yuml", 0x00178>;
|
||||
def : NCR<"auml", 0x000E4>;
|
||||
def : NCR<"euml", 0x000EB>;
|
||||
def : NCR<"iuml", 0x000EF>;
|
||||
def : NCR<"ouml", 0x000F6>;
|
||||
def : NCR<"uuml", 0x000FC>;
|
||||
def : NCR<"yuml", 0x000FF>;
|
||||
|
||||
def : NCR<"Aacute", 0x000C1>;
|
||||
def : NCR<"Eacute", 0x000C9>;
|
||||
def : NCR<"Iacute", 0x000CD>;
|
||||
def : NCR<"Oacute", 0x000D3>;
|
||||
def : NCR<"Uacute", 0x000DA>;
|
||||
def : NCR<"Yacute", 0x000DD>;
|
||||
def : NCR<"aacute", 0x000E1>;
|
||||
def : NCR<"eacute", 0x000E9>;
|
||||
def : NCR<"iacute", 0x000ED>;
|
||||
def : NCR<"oacute", 0x000F3>;
|
||||
def : NCR<"uacute", 0x000FA>;
|
||||
def : NCR<"yacute", 0x000FD>;
|
||||
|
||||
def : NCR<"Agrave", 0x000C0>;
|
||||
def : NCR<"Egrave", 0x000C8>;
|
||||
def : NCR<"Igrave", 0x000CC>;
|
||||
def : NCR<"Ograve", 0x000D2>;
|
||||
def : NCR<"Ugrave", 0x000D9>;
|
||||
// def : NCR<"Ygrave", 0x01EF2>; // Defined neither in Doxygen, nor in HTML5.
|
||||
def : NCR<"agrave", 0x000E0>;
|
||||
def : NCR<"egrave", 0x000E8>;
|
||||
def : NCR<"igrave", 0x000EC>;
|
||||
def : NCR<"ograve", 0x000F2>;
|
||||
def : NCR<"ugrave", 0x000F9>;
|
||||
def : NCR<"ygrave", 0x01EF3>; // Defined in Doxygen, not defined in HTML5.
|
||||
|
||||
def : NCR<"Acirc", 0x000C2>;
|
||||
def : NCR<"Ecirc", 0x000CA>;
|
||||
def : NCR<"Icirc", 0x000CE>;
|
||||
def : NCR<"Ocirc", 0x000D4>;
|
||||
def : NCR<"Ucirc", 0x000DB>;
|
||||
def : NCR<"Ycirc", 0x00176>; // Not defined in Doxygen, defined in HTML5.
|
||||
def : NCR<"acirc", 0x000E2>;
|
||||
def : NCR<"ecirc", 0x000EA>;
|
||||
def : NCR<"icirc", 0x000EE>;
|
||||
def : NCR<"ocirc", 0x000F4>;
|
||||
def : NCR<"ucirc", 0x000FB>;
|
||||
def : NCR<"ycirc", 0x00177>;
|
||||
|
||||
def : NCR<"Atilde", 0x000C3>;
|
||||
def : NCR<"Ntilde", 0x000D1>;
|
||||
def : NCR<"Otilde", 0x000D5>;
|
||||
def : NCR<"atilde", 0x000E3>;
|
||||
def : NCR<"ntilde", 0x000F1>;
|
||||
def : NCR<"otilde", 0x000F5>;
|
||||
|
||||
def : NCR<"szlig", 0x000DF>;
|
||||
|
||||
def : NCR<"ccedil", 0x000E7>;
|
||||
def : NCR<"Ccedil", 0x000C7>;
|
||||
|
||||
def : NCR<"aring", 0x000E5>;
|
||||
def : NCR<"Aring", 0x000C5>;
|
||||
|
||||
def : NCR<"nbsp", 0x000A0>;
|
||||
|
||||
def : NCR<"Gamma", 0x00393>;
|
||||
def : NCR<"Delta", 0x00394>;
|
||||
def : NCR<"Theta", 0x00398>;
|
||||
def : NCR<"Lambda", 0x0039B>;
|
||||
def : NCR<"Xi", 0x0039E>;
|
||||
def : NCR<"Pi", 0x003A0>;
|
||||
def : NCR<"Sigma", 0x003A3>;
|
||||
def : NCR<"Upsilon", 0x003A5>;
|
||||
def : NCR<"Phi", 0x003A6>;
|
||||
def : NCR<"Psi", 0x003A8>;
|
||||
def : NCR<"Omega", 0x003A9>;
|
||||
|
||||
def : NCR<"alpha", 0x003B1>;
|
||||
def : NCR<"beta", 0x003B2>;
|
||||
def : NCR<"gamma", 0x003B3>;
|
||||
def : NCR<"delta", 0x003B4>;
|
||||
def : NCR<"epsilon", 0x003B5>;
|
||||
def : NCR<"zeta", 0x003B6>;
|
||||
def : NCR<"eta", 0x003B7>;
|
||||
def : NCR<"theta", 0x003B8>;
|
||||
def : NCR<"iota", 0x003B9>;
|
||||
def : NCR<"kappa", 0x003BA>;
|
||||
def : NCR<"lambda", 0x003BB>;
|
||||
def : NCR<"mu", 0x003BC>;
|
||||
def : NCR<"nu", 0x003BD>;
|
||||
def : NCR<"xi", 0x003BE>;
|
||||
def : NCR<"pi", 0x003C0>;
|
||||
def : NCR<"rho", 0x003C1>;
|
||||
def : NCR<"sigma", 0x003C3>;
|
||||
def : NCR<"tau", 0x003C4>;
|
||||
def : NCR<"upsilon", 0x003C5>;
|
||||
def : NCR<"phi", 0x003C6>;
|
||||
def : NCR<"chi", 0x003C7>;
|
||||
def : NCR<"psi", 0x003C8>;
|
||||
def : NCR<"omega", 0x003C9>;
|
||||
def : NCR<"sigmaf", 0x003C2>;
|
||||
|
||||
def : NCR<"sect", 0x000A7>;
|
||||
def : NCR<"deg", 0x000B0>;
|
||||
def : NCR<"prime", 0x02032>;
|
||||
def : NCR<"Prime", 0x02033>;
|
||||
def : NCR<"infin", 0x0221E>;
|
||||
def : NCR<"empty", 0x02205>;
|
||||
def : NCR<"plusmn", 0x000B1>;
|
||||
def : NCR<"times", 0x000D7>;
|
||||
def : NCR<"minus", 0x02212>;
|
||||
def : NCR<"sdot", 0x022C5>;
|
||||
def : NCR<"part", 0x02202>;
|
||||
def : NCR<"nabla", 0x02207>;
|
||||
def : NCR<"radic", 0x0221A>;
|
||||
def : NCR<"perp", 0x022A5>;
|
||||
def : NCR<"sum", 0x02211>;
|
||||
def : NCR<"int", 0x0222B>;
|
||||
def : NCR<"prod", 0x0220F>;
|
||||
def : NCR<"sim", 0x0223C>;
|
||||
def : NCR<"asymp", 0x02248>;
|
||||
def : NCR<"ne", 0x02260>;
|
||||
def : NCR<"equiv", 0x02261>;
|
||||
def : NCR<"prop", 0x0221D>;
|
||||
def : NCR<"le", 0x02264>;
|
||||
def : NCR<"ge", 0x02265>;
|
||||
def : NCR<"larr", 0x02190>;
|
||||
def : NCR<"rarr", 0x02192>;
|
||||
def : NCR<"isin", 0x02208>;
|
||||
def : NCR<"notin", 0x02209>;
|
||||
def : NCR<"lceil", 0x02308>;
|
||||
def : NCR<"rceil", 0x02309>;
|
||||
def : NCR<"lfloor", 0x0230A>;
|
||||
def : NCR<"rfloor", 0x0230B>;
|
||||
|
|
@ -282,18 +282,11 @@ private:
|
|||
/// it stands for (e.g., "<").
|
||||
StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
|
||||
|
||||
/// Given a Doxygen-supported named character reference (e.g., "™"),
|
||||
/// it returns its UTF8 encoding.
|
||||
StringRef HTMLDoxygenCharacterReference(StringRef Name) const;
|
||||
|
||||
/// Given a Unicode codepoint as base-10 integer, return the character.
|
||||
StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const;
|
||||
|
||||
/// Given a Unicode codepoint as base-16 integer, return the character.
|
||||
StringRef resolveHTMLHexCharacterReference(StringRef Name) const;
|
||||
|
||||
/// Helper routine to do part of the work for resolveHTMLHexCharacterReference.
|
||||
StringRef helperResolveHTMLHexCharacterReference(unsigned CodePoint) const;
|
||||
|
||||
void formTokenWithChars(Token &Result, const char *TokEnd,
|
||||
tok::TokenKind Kind) {
|
||||
|
|
|
@ -3,7 +3,9 @@ TD_SRC_DIR = $(PROJ_SRC_DIR)/../Basic
|
|||
BUILT_SOURCES = Attrs.inc AttrImpl.inc AttrDump.inc \
|
||||
StmtNodes.inc DeclNodes.inc \
|
||||
CommentNodes.inc CommentHTMLTags.inc \
|
||||
CommentHTMLTagsProperties.inc CommentCommandInfo.inc
|
||||
CommentHTMLTagsProperties.inc \
|
||||
CommentHTMLNamedCharacterReferences.inc \
|
||||
CommentCommandInfo.inc
|
||||
|
||||
TABLEGEN_INC_FILES_COMMON = 1
|
||||
|
||||
|
@ -52,6 +54,12 @@ $(ObjDir)/CommentHTMLTagsProperties.inc.tmp : $(PROJ_SRC_DIR)/CommentHTMLTags.td
|
|||
$(Echo) "Building Clang comment HTML tag properties with tblgen"
|
||||
$(Verb) $(ClangTableGen) -gen-clang-comment-html-tags-properties -o $(call SYSPATH, $@) $<
|
||||
|
||||
$(ObjDir)/CommentHTMLNamedCharacterReferences.inc.tmp : \
|
||||
$(PROJ_SRC_DIR)/CommentHTMLNamedCharacterReferences.td \
|
||||
$(CLANG_TBLGEN) $(ObjDir)/.dir
|
||||
$(Echo) "Building Clang named character reference translation function with tblgen"
|
||||
$(Verb) $(ClangTableGen) -gen-clang-comment-html-named-character-references -o $(call SYSPATH, $@) $<
|
||||
|
||||
$(ObjDir)/CommentCommandInfo.inc.tmp : $(PROJ_SRC_DIR)/CommentCommands.td \
|
||||
$(CLANG_TBLGEN) $(ObjDir)/.dir
|
||||
$(Echo) "Building Clang comment command info with tblgen"
|
||||
|
|
|
@ -68,6 +68,7 @@ add_dependencies(clangAST
|
|||
ClangCommentNodes
|
||||
ClangCommentHTMLTags
|
||||
ClangCommentHTMLTagsProperties
|
||||
ClangCommentHTMLNamedCharacterReferences
|
||||
ClangDeclNodes
|
||||
ClangDiagnosticAST
|
||||
ClangDiagnosticComment
|
||||
|
|
|
@ -30,22 +30,8 @@ bool isHTMLHexCharacterReferenceCharacter(char C) {
|
|||
(C >= 'A' && C <= 'F');
|
||||
}
|
||||
|
||||
#include "clang/AST/CommentHTMLTags.inc"
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
static unsigned getCodePoint(StringRef Name) {
|
||||
unsigned CodePoint = 0;
|
||||
for (unsigned i = 0, e = Name.size(); i != e; ++i) {
|
||||
CodePoint *= 16;
|
||||
const char C = Name[i];
|
||||
assert(isHTMLHexCharacterReferenceCharacter(C));
|
||||
CodePoint += llvm::hexDigitValue(C);
|
||||
}
|
||||
return CodePoint;
|
||||
}
|
||||
|
||||
StringRef Lexer::helperResolveHTMLHexCharacterReference(unsigned CodePoint) const {
|
||||
StringRef convertCodePointToUTF8(llvm::BumpPtrAllocator &Allocator,
|
||||
unsigned CodePoint) {
|
||||
char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
|
||||
char *ResolvedPtr = Resolved;
|
||||
if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
|
||||
|
@ -53,164 +39,22 @@ StringRef Lexer::helperResolveHTMLHexCharacterReference(unsigned CodePoint) cons
|
|||
else
|
||||
return StringRef();
|
||||
}
|
||||
|
||||
StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
|
||||
unsigned CodePoint = getCodePoint(Name);
|
||||
return helperResolveHTMLHexCharacterReference(CodePoint);
|
||||
}
|
||||
|
||||
#include "clang/AST/CommentHTMLTags.inc"
|
||||
#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
|
||||
// Fast path, first check a few most widely used named character references.
|
||||
return llvm::StringSwitch<StringRef>(Name)
|
||||
.Case("amp", "&")
|
||||
.Case("lt", "<")
|
||||
.Case("gt", ">")
|
||||
.Case("quot", "\"")
|
||||
.Case("apos", "\'")
|
||||
.Default("");
|
||||
}
|
||||
|
||||
StringRef Lexer::HTMLDoxygenCharacterReference(StringRef Name) const {
|
||||
return llvm::StringSwitch<StringRef>(Name)
|
||||
.Case("copy", helperResolveHTMLHexCharacterReference(0x000A9))
|
||||
.Case("trade", helperResolveHTMLHexCharacterReference(0x02122))
|
||||
.Case("reg", helperResolveHTMLHexCharacterReference(0x000AE))
|
||||
.Case("lt", helperResolveHTMLHexCharacterReference(0x0003C))
|
||||
.Case("gt", helperResolveHTMLHexCharacterReference(0x0003C))
|
||||
.Case("amp", helperResolveHTMLHexCharacterReference(0x00026))
|
||||
.Case("apos", helperResolveHTMLHexCharacterReference(0x00027))
|
||||
.Case("quot", helperResolveHTMLHexCharacterReference(0x00022))
|
||||
.Case("lsquo", helperResolveHTMLHexCharacterReference(0x02018))
|
||||
.Case("rsquo", helperResolveHTMLHexCharacterReference(0x02019))
|
||||
.Case("ldquo", helperResolveHTMLHexCharacterReference(0x0201C))
|
||||
.Case("rdquo", helperResolveHTMLHexCharacterReference(0x0201D))
|
||||
.Case("ndash", helperResolveHTMLHexCharacterReference(0x02013))
|
||||
.Case("mdash", helperResolveHTMLHexCharacterReference(0x02014))
|
||||
.Case("Auml", helperResolveHTMLHexCharacterReference(0x000C4))
|
||||
.Case("Euml", helperResolveHTMLHexCharacterReference(0x000CB))
|
||||
.Case("Iuml", helperResolveHTMLHexCharacterReference(0x000CF))
|
||||
.Case("Ouml", helperResolveHTMLHexCharacterReference(0x000D6))
|
||||
.Case("Uuml", helperResolveHTMLHexCharacterReference(0x000DC))
|
||||
.Case("Yuml", helperResolveHTMLHexCharacterReference(0x00178))
|
||||
.Case("auml", helperResolveHTMLHexCharacterReference(0x000E4))
|
||||
.Case("euml", helperResolveHTMLHexCharacterReference(0x000EB))
|
||||
.Case("iuml", helperResolveHTMLHexCharacterReference(0x000EF))
|
||||
.Case("ouml", helperResolveHTMLHexCharacterReference(0x000F6))
|
||||
.Case("uuml", helperResolveHTMLHexCharacterReference(0x000FC))
|
||||
.Case("yuml", helperResolveHTMLHexCharacterReference(0x000FF))
|
||||
.Case("Aacute", helperResolveHTMLHexCharacterReference(0x000C1))
|
||||
.Case("Eacute", helperResolveHTMLHexCharacterReference(0x000C9))
|
||||
.Case("Iacute", helperResolveHTMLHexCharacterReference(0x000CD))
|
||||
.Case("Oacute", helperResolveHTMLHexCharacterReference(0x000D3))
|
||||
.Case("Uacute", helperResolveHTMLHexCharacterReference(0x000DA))
|
||||
.Case("Yacute", helperResolveHTMLHexCharacterReference(0x000DD))
|
||||
.Case("aacute", helperResolveHTMLHexCharacterReference(0x000E1))
|
||||
.Case("eacute", helperResolveHTMLHexCharacterReference(0x000E9))
|
||||
.Case("iacute", helperResolveHTMLHexCharacterReference(0x000ED))
|
||||
.Case("oacute", helperResolveHTMLHexCharacterReference(0x000F3))
|
||||
.Case("uacute", helperResolveHTMLHexCharacterReference(0x000FA))
|
||||
.Case("yacute", helperResolveHTMLHexCharacterReference(0x000FD))
|
||||
.Case("Agrave", helperResolveHTMLHexCharacterReference(0x000C0))
|
||||
.Case("Egrave", helperResolveHTMLHexCharacterReference(0x000C8))
|
||||
.Case("Igrave", helperResolveHTMLHexCharacterReference(0x000CC))
|
||||
.Case("Ograve", helperResolveHTMLHexCharacterReference(0x000D2))
|
||||
.Case("Ugrave", helperResolveHTMLHexCharacterReference(0x000D9))
|
||||
.Case("agrave", helperResolveHTMLHexCharacterReference(0x000E0))
|
||||
.Case("egrave", helperResolveHTMLHexCharacterReference(0x000E8))
|
||||
.Case("igrave", helperResolveHTMLHexCharacterReference(0x000EC))
|
||||
.Case("ograve", helperResolveHTMLHexCharacterReference(0x000F2))
|
||||
.Case("ugrave", helperResolveHTMLHexCharacterReference(0x000F9))
|
||||
.Case("ygrave", helperResolveHTMLHexCharacterReference(0x01EF3))
|
||||
.Case("Acirc", helperResolveHTMLHexCharacterReference(0x000C2))
|
||||
.Case("Ecirc", helperResolveHTMLHexCharacterReference(0x000CA))
|
||||
.Case("Icirc", helperResolveHTMLHexCharacterReference(0x000CE))
|
||||
.Case("Ocirc", helperResolveHTMLHexCharacterReference(0x000D4))
|
||||
.Case("Ucirc", helperResolveHTMLHexCharacterReference(0x000DB))
|
||||
.Case("acirc", helperResolveHTMLHexCharacterReference(0x000E2))
|
||||
.Case("ecirc", helperResolveHTMLHexCharacterReference(0x000EA))
|
||||
.Case("icirc", helperResolveHTMLHexCharacterReference(0x000EE))
|
||||
.Case("ocirc", helperResolveHTMLHexCharacterReference(0x000F4))
|
||||
.Case("ucirc", helperResolveHTMLHexCharacterReference(0x000FB))
|
||||
.Case("ycirc", helperResolveHTMLHexCharacterReference(0x00177))
|
||||
.Case("Atilde", helperResolveHTMLHexCharacterReference(0x000C3))
|
||||
.Case("Ntilde", helperResolveHTMLHexCharacterReference(0x000D1))
|
||||
.Case("Otilde", helperResolveHTMLHexCharacterReference(0x000D5))
|
||||
.Case("atilde", helperResolveHTMLHexCharacterReference(0x000E3))
|
||||
.Case("ntilde", helperResolveHTMLHexCharacterReference(0x000F1))
|
||||
.Case("otilde", helperResolveHTMLHexCharacterReference(0x000F5))
|
||||
.Case("szlig", helperResolveHTMLHexCharacterReference(0x000DF))
|
||||
.Case("ccedil", helperResolveHTMLHexCharacterReference(0x000E7))
|
||||
.Case("Ccedil", helperResolveHTMLHexCharacterReference(0x000C7))
|
||||
.Case("aring", helperResolveHTMLHexCharacterReference(0x000E5))
|
||||
.Case("Aring", helperResolveHTMLHexCharacterReference(0x000C5))
|
||||
.Case("nbsp", helperResolveHTMLHexCharacterReference(0x000A0))
|
||||
.Case("Gamma", helperResolveHTMLHexCharacterReference(0x00393))
|
||||
.Case("Delta", helperResolveHTMLHexCharacterReference(0x00394))
|
||||
.Case("Theta", helperResolveHTMLHexCharacterReference(0x00398))
|
||||
.Case("Lambda", helperResolveHTMLHexCharacterReference(0x0039B))
|
||||
.Case("Xi", helperResolveHTMLHexCharacterReference(0x0039E))
|
||||
.Case("Pi", helperResolveHTMLHexCharacterReference(0x003A0))
|
||||
.Case("Sigma", helperResolveHTMLHexCharacterReference(0x003A3))
|
||||
.Case("Upsilon", helperResolveHTMLHexCharacterReference(0x003A5))
|
||||
.Case("Phi", helperResolveHTMLHexCharacterReference(0x003A6))
|
||||
.Case("Psi", helperResolveHTMLHexCharacterReference(0x003A8))
|
||||
.Case("Omega", helperResolveHTMLHexCharacterReference(0x003A9))
|
||||
.Case("alpha", helperResolveHTMLHexCharacterReference(0x003B1))
|
||||
.Case("beta", helperResolveHTMLHexCharacterReference(0x003B2))
|
||||
.Case("gamma", helperResolveHTMLHexCharacterReference(0x003B3))
|
||||
.Case("delta", helperResolveHTMLHexCharacterReference(0x003B4))
|
||||
.Case("epsilon", helperResolveHTMLHexCharacterReference(0x003B5))
|
||||
.Case("zeta", helperResolveHTMLHexCharacterReference(0x003B6))
|
||||
.Case("eta", helperResolveHTMLHexCharacterReference(0x003B7))
|
||||
.Case("theta", helperResolveHTMLHexCharacterReference(0x003B8))
|
||||
.Case("iota", helperResolveHTMLHexCharacterReference(0x003B9))
|
||||
.Case("kappa", helperResolveHTMLHexCharacterReference(0x003BA))
|
||||
.Case("lambda", helperResolveHTMLHexCharacterReference(0x003BB))
|
||||
.Case("mu", helperResolveHTMLHexCharacterReference(0x003BC))
|
||||
.Case("nu", helperResolveHTMLHexCharacterReference(0x003BD))
|
||||
.Case("xi", helperResolveHTMLHexCharacterReference(0x003BE))
|
||||
.Case("pi", helperResolveHTMLHexCharacterReference(0x003C0))
|
||||
.Case("rho", helperResolveHTMLHexCharacterReference(0x003C1))
|
||||
.Case("sigma", helperResolveHTMLHexCharacterReference(0x003C3))
|
||||
.Case("tau", helperResolveHTMLHexCharacterReference(0x003C4))
|
||||
.Case("upsilon", helperResolveHTMLHexCharacterReference(0x003C5))
|
||||
.Case("phi", helperResolveHTMLHexCharacterReference(0x003C6))
|
||||
.Case("chi", helperResolveHTMLHexCharacterReference(0x003C7))
|
||||
.Case("psi", helperResolveHTMLHexCharacterReference(0x003C8))
|
||||
.Case("omega", helperResolveHTMLHexCharacterReference(0x003C9))
|
||||
.Case("sigmaf", helperResolveHTMLHexCharacterReference(0x003C2))
|
||||
.Case("sect", helperResolveHTMLHexCharacterReference(0x000A7))
|
||||
.Case("deg", helperResolveHTMLHexCharacterReference(0x000B0))
|
||||
.Case("prime", helperResolveHTMLHexCharacterReference(0x02032))
|
||||
.Case("Prime", helperResolveHTMLHexCharacterReference(0x02033))
|
||||
.Case("infin", helperResolveHTMLHexCharacterReference(0x0221E))
|
||||
.Case("empty", helperResolveHTMLHexCharacterReference(0x02205))
|
||||
.Case("plusmn", helperResolveHTMLHexCharacterReference(0x000B1))
|
||||
.Case("times", helperResolveHTMLHexCharacterReference(0x000D7))
|
||||
.Case("minus", helperResolveHTMLHexCharacterReference(0x02212))
|
||||
.Case("sdot", helperResolveHTMLHexCharacterReference(0x022C5))
|
||||
.Case("part", helperResolveHTMLHexCharacterReference(0x02202))
|
||||
.Case("nabla", helperResolveHTMLHexCharacterReference(0x02207))
|
||||
.Case("radic", helperResolveHTMLHexCharacterReference(0x0221A))
|
||||
.Case("perp", helperResolveHTMLHexCharacterReference(0x022A5))
|
||||
.Case("sum", helperResolveHTMLHexCharacterReference(0x02211))
|
||||
.Case("int", helperResolveHTMLHexCharacterReference(0x0222B))
|
||||
.Case("prod", helperResolveHTMLHexCharacterReference(0x0220F))
|
||||
.Case("sim", helperResolveHTMLHexCharacterReference(0x0223C))
|
||||
.Case("asymp", helperResolveHTMLHexCharacterReference(0x02248))
|
||||
.Case("ne", helperResolveHTMLHexCharacterReference(0x02260))
|
||||
.Case("equiv", helperResolveHTMLHexCharacterReference(0x02261))
|
||||
.Case("prop", helperResolveHTMLHexCharacterReference(0x0221D))
|
||||
.Case("le", helperResolveHTMLHexCharacterReference(0x02264))
|
||||
.Case("ge", helperResolveHTMLHexCharacterReference(0x02265))
|
||||
.Case("larr", helperResolveHTMLHexCharacterReference(0x02190))
|
||||
.Case("rarr", helperResolveHTMLHexCharacterReference(0x02192))
|
||||
.Case("isin", helperResolveHTMLHexCharacterReference(0x02208))
|
||||
.Case("notin", helperResolveHTMLHexCharacterReference(0x02209))
|
||||
.Case("lceil", helperResolveHTMLHexCharacterReference(0x02308))
|
||||
.Case("rceil", helperResolveHTMLHexCharacterReference(0x02309))
|
||||
.Case("lfloor", helperResolveHTMLHexCharacterReference(0x0230A))
|
||||
.Case("rfloor", helperResolveHTMLHexCharacterReference(0x0230B))
|
||||
.Default("");
|
||||
// Slow path.
|
||||
.Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
|
||||
}
|
||||
|
||||
StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
|
||||
|
@ -220,13 +64,18 @@ StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
|
|||
CodePoint *= 10;
|
||||
CodePoint += Name[i] - '0';
|
||||
}
|
||||
return convertCodePointToUTF8(Allocator, CodePoint);
|
||||
}
|
||||
|
||||
char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
|
||||
char *ResolvedPtr = Resolved;
|
||||
if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
|
||||
return StringRef(Resolved, ResolvedPtr - Resolved);
|
||||
else
|
||||
return StringRef();
|
||||
StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
|
||||
unsigned CodePoint = 0;
|
||||
for (unsigned i = 0, e = Name.size(); i != e; ++i) {
|
||||
CodePoint *= 16;
|
||||
const char C = Name[i];
|
||||
assert(isHTMLHexCharacterReferenceCharacter(C));
|
||||
CodePoint += llvm::hexDigitValue(C);
|
||||
}
|
||||
return convertCodePointToUTF8(Allocator, CodePoint);
|
||||
}
|
||||
|
||||
void Lexer::skipLineStartingDecorations() {
|
||||
|
@ -725,17 +574,8 @@ void Lexer::lexHTMLCharacterReference(Token &T) {
|
|||
StringRef Name(NamePtr, TokenPtr - NamePtr);
|
||||
TokenPtr++; // Skip semicolon.
|
||||
StringRef Resolved;
|
||||
if (isNamed) {
|
||||
if (isNamed)
|
||||
Resolved = resolveHTMLNamedCharacterReference(Name);
|
||||
if (Resolved.empty()) {
|
||||
Resolved = HTMLDoxygenCharacterReference(Name);
|
||||
if (!Resolved.empty()) {
|
||||
formTokenWithChars(T, TokenPtr, tok::text);
|
||||
T.setText(Resolved);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (isDecimal)
|
||||
Resolved = resolveHTMLDecimalCharacterReference(Name);
|
||||
else
|
||||
|
|
|
@ -4,6 +4,7 @@ add_tablegen(clang-tblgen CLANG
|
|||
ClangASTNodesEmitter.cpp
|
||||
ClangAttrEmitter.cpp
|
||||
ClangCommentCommandInfoEmitter.cpp
|
||||
ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
|
||||
ClangCommentHTMLTagsEmitter.cpp
|
||||
ClangDiagnosticsEmitter.cpp
|
||||
ClangSACheckersEmitter.cpp
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This tablegen backend emits an fficient function to translate HTML named
|
||||
// character references to UTF-8 sequences.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/Support/ConvertUTF.h"
|
||||
#include "llvm/TableGen/Error.h"
|
||||
#include "llvm/TableGen/Record.h"
|
||||
#include "llvm/TableGen/StringMatcher.h"
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
/// \brief Convert a code point to the corresponding UTF-8 sequence represented
|
||||
/// as a C string literal.
|
||||
///
|
||||
/// \returns true on success.
|
||||
static bool translateCodePointToUTF8(unsigned CodePoint,
|
||||
SmallVectorImpl<char> &CLiteral) {
|
||||
char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
|
||||
char *TranslatedPtr = Translated;
|
||||
if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
|
||||
return false;
|
||||
|
||||
StringRef UTF8(Translated, TranslatedPtr - Translated);
|
||||
|
||||
raw_svector_ostream OS(CLiteral);
|
||||
OS << "\"";
|
||||
for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
|
||||
OS << "\\x";
|
||||
OS.write_hex(static_cast<unsigned char>(UTF8[i]));
|
||||
}
|
||||
OS << "\"";
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace clang {
|
||||
void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
|
||||
raw_ostream &OS) {
|
||||
std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
|
||||
std::vector<StringMatcher::StringPair> NameToUTF8;
|
||||
SmallString<32> CLiteral;
|
||||
for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
|
||||
I != E; ++I) {
|
||||
Record &Tag = **I;
|
||||
std::string Spelling = Tag.getValueAsString("Spelling");
|
||||
uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
|
||||
CLiteral.clear();
|
||||
CLiteral.append("return ");
|
||||
if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
|
||||
SrcMgr.PrintMessage(Tag.getLoc().front(),
|
||||
SourceMgr::DK_Error,
|
||||
Twine("invalid code point"));
|
||||
continue;
|
||||
}
|
||||
CLiteral.append(";");
|
||||
|
||||
StringMatcher::StringPair Match(Spelling, CLiteral.str());
|
||||
NameToUTF8.push_back(Match);
|
||||
}
|
||||
|
||||
OS << "// This file is generated by TableGen. Do not edit.\n\n";
|
||||
|
||||
OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
|
||||
" StringRef Name) {\n";
|
||||
StringMatcher("Name", NameToUTF8, OS).Emit();
|
||||
OS << " return StringRef();\n"
|
||||
<< "}\n\n";
|
||||
}
|
||||
|
||||
} // end namespace clang
|
||||
|
|
@ -44,6 +44,7 @@ enum ActionType {
|
|||
GenClangSACheckers,
|
||||
GenClangCommentHTMLTags,
|
||||
GenClangCommentHTMLTagsProperties,
|
||||
GenClangCommentHTMLNamedCharacterReferences,
|
||||
GenClangCommentCommandInfo,
|
||||
GenOptParserDefs, GenOptParserImpl,
|
||||
GenArmNeon,
|
||||
|
@ -111,6 +112,10 @@ namespace {
|
|||
"gen-clang-comment-html-tags-properties",
|
||||
"Generate efficient matchers for HTML tag "
|
||||
"properties"),
|
||||
clEnumValN(GenClangCommentHTMLNamedCharacterReferences,
|
||||
"gen-clang-comment-html-named-character-references",
|
||||
"Generate function to translate named character "
|
||||
"references to UTF-8 sequences"),
|
||||
clEnumValN(GenClangCommentCommandInfo,
|
||||
"gen-clang-comment-command-info",
|
||||
"Generate list of commands that are used in "
|
||||
|
@ -194,6 +199,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
|
|||
case GenClangCommentHTMLTagsProperties:
|
||||
EmitClangCommentHTMLTagsProperties(Records, OS);
|
||||
break;
|
||||
case GenClangCommentHTMLNamedCharacterReferences:
|
||||
EmitClangCommentHTMLNamedCharacterReferences(Records, OS);
|
||||
break;
|
||||
case GenClangCommentCommandInfo:
|
||||
EmitClangCommentCommandInfo(Records, OS);
|
||||
break;
|
||||
|
|
|
@ -51,6 +51,7 @@ void EmitClangSACheckers(RecordKeeper &Records, raw_ostream &OS);
|
|||
|
||||
void EmitClangCommentHTMLTags(RecordKeeper &Records, raw_ostream &OS);
|
||||
void EmitClangCommentHTMLTagsProperties(RecordKeeper &Records, raw_ostream &OS);
|
||||
void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records, raw_ostream &OS);
|
||||
|
||||
void EmitClangCommentCommandInfo(RecordKeeper &Records, raw_ostream &OS);
|
||||
|
||||
|
|
Loading…
Reference in New Issue