Comment parsing: remove HTML attribute validation

Since the community says that a blacklist is not good enough, and I don't have
enough time now to implement a proper whitelist, let's just remove the
attribute validation.

But, nevertheless, we can still communicate in the generated XML if our parser
found an issue with the HTML.  But this bit is best-effort and is specifically
called out in the schema as such.

llvm-svn: 207712
This commit is contained in:
Dmitri Gribenko 2014-04-30 21:54:30 +00:00
parent fee224f942
commit 0b2026de6b
8 changed files with 33 additions and 120 deletions

View File

@ -582,7 +582,9 @@
<element name="rawHTML">
<optional>
<!-- If not specified, the default value is 'false'. -->
<attribute name="isSafeToPassThrough">
<!-- The value 'false' or absence of the attribute does not imply
that the HTML is actually well-formed. -->
<attribute name="isMalformed">
<data type="boolean" />
</attribute>
</optional>

View File

@ -105,9 +105,8 @@ protected:
unsigned : NumInlineContentCommentBits;
/// True if this tag is safe to pass through to HTML output even if the
/// comment comes from an untrusted source.
unsigned IsSafeToPassThrough : 1;
/// True if we found that this tag is malformed in some way.
unsigned IsMalformed : 1;
};
enum { NumHTMLTagCommentBits = NumInlineContentCommentBits + 1 };
@ -388,7 +387,7 @@ protected:
TagName(TagName),
TagNameRange(TagNameBegin, TagNameEnd) {
setLocation(TagNameBegin);
HTMLTagCommentBits.IsSafeToPassThrough = 1;
HTMLTagCommentBits.IsMalformed = 0;
}
public:
@ -405,12 +404,12 @@ public:
L.getLocWithOffset(1 + TagName.size()));
}
bool isSafeToPassThrough() const {
return HTMLTagCommentBits.IsSafeToPassThrough;
bool isMalformed() const {
return HTMLTagCommentBits.IsMalformed;
}
void setUnsafeToPassThrough() {
HTMLTagCommentBits.IsSafeToPassThrough = 0;
void setIsMalformed() {
HTMLTagCommentBits.IsMalformed = 1;
}
};

View File

@ -65,70 +65,3 @@ class EventHandlerContentAttribute<string spelling> : Attribute<spelling> {
let IsSafeToPassThrough = 0;
}
// This list is based on HTML5 draft as of 04 February 2014.
//
// The list is intentionally organized as one item per line to make it easier
// to compare with the HTML spec.
foreach AttrName = [
"onabort",
"onblur",
"oncancel",
"oncanplay",
"oncanplaythrough",
"onchange",
"onclick",
"onclose",
"oncuechange",
"ondblclick",
"ondrag",
"ondragend",
"ondragenter",
"ondragexit",
"ondragleave",
"ondragover",
"ondragstart",
"ondrop",
"ondurationchange",
"onemptied",
"onended",
"onerror",
"onfocus",
"oninput",
"oninvalid",
"onkeydown",
"onkeypress",
"onkeyup",
"onload",
"onloadeddata",
"onloadedmetadata",
"onloadstart",
"onmousedown",
"onmouseenter",
"onmouseleave",
"onmousemove",
"onmouseout",
"onmouseover",
"onmouseup",
"onmousewheel",
"onpause",
"onplay",
"onplaying",
"onprogress",
"onratechange",
"onreset",
"onresize",
"onscroll",
"onseeked",
"onseeking",
"onselect",
"onshow",
"onstalled",
"onsubmit",
"onsuspend",
"ontimeupdate",
"ontoggle",
"onvolumechange",
"onwaiting"
] in {
def Attr#AttrName : EventHandlerContentAttribute<AttrName>;
}

View File

@ -467,11 +467,6 @@ void Sema::actOnHTMLStartTagFinish(
SourceLocation GreaterLoc,
bool IsSelfClosing) {
Tag->setAttrs(Attrs);
for (const auto &Attr : Attrs) {
if (!isHTMLAttributeSafeToPassThrough(Attr.Name))
Tag->setUnsafeToPassThrough();
}
Tag->setGreaterLoc(GreaterLoc);
if (IsSelfClosing)
Tag->setSelfClosing();
@ -487,7 +482,7 @@ HTMLEndTagComment *Sema::actOnHTMLEndTag(SourceLocation LocBegin,
if (isHTMLEndTagForbidden(TagName)) {
Diag(HET->getLocation(), diag::warn_doc_html_end_forbidden)
<< TagName << HET->getSourceRange();
HET->setUnsafeToPassThrough();
HET->setIsMalformed();
return HET;
}
@ -503,7 +498,7 @@ HTMLEndTagComment *Sema::actOnHTMLEndTag(SourceLocation LocBegin,
if (!FoundOpen) {
Diag(HET->getLocation(), diag::warn_doc_html_end_unbalanced)
<< HET->getSourceRange();
HET->setUnsafeToPassThrough();
HET->setIsMalformed();
return HET;
}
@ -511,9 +506,9 @@ HTMLEndTagComment *Sema::actOnHTMLEndTag(SourceLocation LocBegin,
HTMLStartTagComment *HST = HTMLOpenTags.pop_back_val();
StringRef LastNotClosedTagName = HST->getTagName();
if (LastNotClosedTagName == TagName) {
// If the start tag is unsafe, end tag is unsafe as well.
if (!HST->isSafeToPassThrough())
HET->setUnsafeToPassThrough();
// If the start tag is malformed, end tag is malformed as well.
if (HST->isMalformed())
HET->setIsMalformed();
break;
}
@ -533,14 +528,14 @@ HTMLEndTagComment *Sema::actOnHTMLEndTag(SourceLocation LocBegin,
Diag(HST->getLocation(), diag::warn_doc_html_start_end_mismatch)
<< HST->getTagName() << HET->getTagName()
<< HST->getSourceRange() << HET->getSourceRange();
HST->setUnsafeToPassThrough();
HST->setIsMalformed();
} else {
Diag(HST->getLocation(), diag::warn_doc_html_start_end_mismatch)
<< HST->getTagName() << HET->getTagName()
<< HST->getSourceRange();
Diag(HET->getLocation(), diag::note_doc_html_end_tag)
<< HET->getSourceRange();
HST->setUnsafeToPassThrough();
HST->setIsMalformed();
}
}
@ -560,7 +555,7 @@ FullComment *Sema::actOnFullComment(
Diag(HST->getLocation(), diag::warn_doc_html_missing_end_tag)
<< HST->getTagName() << HST->getSourceRange();
HST->setUnsafeToPassThrough();
HST->setIsMalformed();
}
return FC;

View File

@ -669,8 +669,8 @@ void CommentASTToXMLConverter::visitInlineCommandComment(
void CommentASTToXMLConverter::visitHTMLStartTagComment(
const HTMLStartTagComment *C) {
Result << "<rawHTML";
if (C->isSafeToPassThrough())
Result << " isSafeToPassThrough=\"1\"";
if (C->isMalformed())
Result << " isMalformed=\"1\"";
Result << ">";
{
SmallString<32> Tag;
@ -686,8 +686,8 @@ void CommentASTToXMLConverter::visitHTMLStartTagComment(
void
CommentASTToXMLConverter::visitHTMLEndTagComment(const HTMLEndTagComment *C) {
Result << "<rawHTML";
if (C->isSafeToPassThrough())
Result << " isSafeToPassThrough=\"1\"";
if (C->isMalformed())
Result << " isMalformed=\"1\"";
Result << ">&lt;/" << C->getTagName() << "&gt;</rawHTML>";
}

View File

@ -7,8 +7,8 @@
<monospaced>ccc</monospaced>
<emphasized>ddd</emphasized>
<rawHTML>&lt;eee&gt;</rawHTML>
<rawHTML isSafeToPassThrough="0">&lt;fff&gt;</rawHTML>
<rawHTML isSafeToPassThrough="1">&lt;ggg&gt;</rawHTML>.
<rawHTML isMalformed="0">&lt;fff&gt;</rawHTML>
<rawHTML isMalformed="1">&lt;ggg&gt;</rawHTML>.
</Para>
</Abstract>
</Function>

View File

@ -472,7 +472,7 @@ void test_full_comment_1(int x1, int x2);
/// <br><a href="http://example.com/">Aaa</a>
void comment_to_html_conversion_24();
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:6: FunctionDecl=comment_to_html_conversion_24:{{.*}} FullCommentAsHTML=[<p class="para-brief"> <br><a href="http://example.com/">Aaa</a></p>] FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-2]]" column="6"><Name>comment_to_html_conversion_24</Name><USR>c:@F@comment_to_html_conversion_24#</USR><Declaration>void comment_to_html_conversion_24()</Declaration><Abstract><Para> <rawHTML isSafeToPassThrough="1"><![CDATA[<br>]]></rawHTML><rawHTML isSafeToPassThrough="1"><![CDATA[<a href="http://example.com/">]]></rawHTML>Aaa<rawHTML isSafeToPassThrough="1">&lt;/a&gt;</rawHTML></Para></Abstract></Function>]
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:6: FunctionDecl=comment_to_html_conversion_24:{{.*}} FullCommentAsHTML=[<p class="para-brief"> <br><a href="http://example.com/">Aaa</a></p>] FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-2]]" column="6"><Name>comment_to_html_conversion_24</Name><USR>c:@F@comment_to_html_conversion_24#</USR><Declaration>void comment_to_html_conversion_24()</Declaration><Abstract><Para> <rawHTML><![CDATA[<br>]]></rawHTML><rawHTML><![CDATA[<a href="http://example.com/">]]></rawHTML>Aaa<rawHTML>&lt;/a&gt;</rawHTML></Para></Abstract></Function>]
// CHECK-NEXT: CommentAST=[
// CHECK-NEXT: (CXComment_FullComment
// CHECK-NEXT: (CXComment_Paragraph
@ -678,7 +678,7 @@ void comment_to_html_conversion_33();
/// <em>0&lt;i</em>
void comment_to_html_conversion_34();
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:6: FunctionDecl=comment_to_html_conversion_34:{{.*}} FullCommentAsHTML=[<p class="para-brief"> <em>0&lt;i</em></p>] FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-2]]" column="6"><Name>comment_to_html_conversion_34</Name><USR>c:@F@comment_to_html_conversion_34#</USR><Declaration>void comment_to_html_conversion_34()</Declaration><Abstract><Para> <rawHTML isSafeToPassThrough="1"><![CDATA[<em>]]></rawHTML>0&lt;i<rawHTML isSafeToPassThrough="1">&lt;/em&gt;</rawHTML></Para></Abstract></Function>]
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:6: FunctionDecl=comment_to_html_conversion_34:{{.*}} FullCommentAsHTML=[<p class="para-brief"> <em>0&lt;i</em></p>] FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-2]]" column="6"><Name>comment_to_html_conversion_34</Name><USR>c:@F@comment_to_html_conversion_34#</USR><Declaration>void comment_to_html_conversion_34()</Declaration><Abstract><Para> <rawHTML><![CDATA[<em>]]></rawHTML>0&lt;i<rawHTML>&lt;/em&gt;</rawHTML></Para></Abstract></Function>]
// CHECK-NEXT: CommentAST=[
// CHECK-NEXT: (CXComment_FullComment
// CHECK-NEXT: (CXComment_Paragraph
@ -724,7 +724,7 @@ void comment_to_html_conversion_35();
/// <h1 id="]]>">Aaa</h1>
void comment_to_html_conversion_36();
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:6: FunctionDecl=comment_to_html_conversion_36:{{.*}} FullCommentAsHTML=[<p class="para-brief"> <h1 id="]]>">Aaa</h1></p>] FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-2]]" column="6"><Name>comment_to_html_conversion_36</Name><USR>c:@F@comment_to_html_conversion_36#</USR><Declaration>void comment_to_html_conversion_36()</Declaration><Abstract><Para> <rawHTML isSafeToPassThrough="1"><![CDATA[<h1 id="]]]]><![CDATA[>">]]></rawHTML>Aaa<rawHTML isSafeToPassThrough="1">&lt;/h1&gt;</rawHTML></Para></Abstract></Function>]
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:6: FunctionDecl=comment_to_html_conversion_36:{{.*}} FullCommentAsHTML=[<p class="para-brief"> <h1 id="]]>">Aaa</h1></p>] FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-2]]" column="6"><Name>comment_to_html_conversion_36</Name><USR>c:@F@comment_to_html_conversion_36#</USR><Declaration>void comment_to_html_conversion_36()</Declaration><Abstract><Para> <rawHTML><![CDATA[<h1 id="]]]]><![CDATA[>">]]></rawHTML>Aaa<rawHTML>&lt;/h1&gt;</rawHTML></Para></Abstract></Function>]
// CHECK-NEXT: CommentAST=[
// CHECK-NEXT: (CXComment_FullComment
// CHECK-NEXT: (CXComment_Paragraph
@ -867,27 +867,27 @@ enum class comment_to_xml_conversion_17 {
/// <a href="http://example.org/">
void comment_to_xml_conversion_unsafe_html_01();
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_01:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_01</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_01#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_01()</Declaration><Abstract><Para> <rawHTML><![CDATA[<a href="http://example.org/">]]></rawHTML></Para></Abstract></Function>]
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_01:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_01</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_01#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_01()</Declaration><Abstract><Para> <rawHTML isMalformed="1"><![CDATA[<a href="http://example.org/">]]></rawHTML></Para></Abstract></Function>]
/// <a href="http://example.org/"><em>Aaa</em>
void comment_to_xml_conversion_unsafe_html_02();
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_02:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_02</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_02#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_02()</Declaration><Abstract><Para> <rawHTML><![CDATA[<a href="http://example.org/">]]></rawHTML><rawHTML isSafeToPassThrough="1"><![CDATA[<em>]]></rawHTML>Aaa<rawHTML isSafeToPassThrough="1">&lt;/em&gt;</rawHTML></Para></Abstract></Function>]
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_02:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_02</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_02#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_02()</Declaration><Abstract><Para> <rawHTML isMalformed="1"><![CDATA[<a href="http://example.org/">]]></rawHTML><rawHTML><![CDATA[<em>]]></rawHTML>Aaa<rawHTML>&lt;/em&gt;</rawHTML></Para></Abstract></Function>]
/// <em>Aaa
void comment_to_xml_conversion_unsafe_html_03();
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_03:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_03</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_03#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_03()</Declaration><Abstract><Para> <rawHTML><![CDATA[<em>]]></rawHTML>Aaa</Para></Abstract></Function>]
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_03:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_03</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_03#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_03()</Declaration><Abstract><Para> <rawHTML isMalformed="1"><![CDATA[<em>]]></rawHTML>Aaa</Para></Abstract></Function>]
/// <em>Aaa</b></em>
void comment_to_xml_conversion_unsafe_html_04();
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_04:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_04</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_04#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_04()</Declaration><Abstract><Para> <rawHTML isSafeToPassThrough="1"><![CDATA[<em>]]></rawHTML>Aaa<rawHTML>&lt;/b&gt;</rawHTML><rawHTML isSafeToPassThrough="1">&lt;/em&gt;</rawHTML></Para></Abstract></Function>]
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_04:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_04</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_04#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_04()</Declaration><Abstract><Para> <rawHTML><![CDATA[<em>]]></rawHTML>Aaa<rawHTML isMalformed="1">&lt;/b&gt;</rawHTML><rawHTML>&lt;/em&gt;</rawHTML></Para></Abstract></Function>]
/// <em>Aaa</em></b>
void comment_to_xml_conversion_unsafe_html_05();
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_05:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_05</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_05#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_05()</Declaration><Abstract><Para> <rawHTML isSafeToPassThrough="1"><![CDATA[<em>]]></rawHTML>Aaa<rawHTML isSafeToPassThrough="1">&lt;/em&gt;</rawHTML><rawHTML>&lt;/b&gt;</rawHTML></Para></Abstract></Function>]
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_05:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_05</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_05#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_05()</Declaration><Abstract><Para> <rawHTML><![CDATA[<em>]]></rawHTML>Aaa<rawHTML>&lt;/em&gt;</rawHTML><rawHTML isMalformed="1">&lt;/b&gt;</rawHTML></Para></Abstract></Function>]
/// </table>
void comment_to_xml_conversion_unsafe_html_06();
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_06:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_06</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_06#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_06()</Declaration><Abstract><Para> <rawHTML>&lt;/table&gt;</rawHTML></Para></Abstract></Function>]
// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_06:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_unsafe_html_06</Name><USR>c:@F@comment_to_xml_conversion_unsafe_html_06#</USR><Declaration>void comment_to_xml_conversion_unsafe_html_06()</Declaration><Abstract><Para> <rawHTML isMalformed="1">&lt;/table&gt;</rawHTML></Para></Abstract></Function>]
/// <div onclick="alert('meow');">Aaa</div>
void comment_to_xml_conversion_unsafe_html_07();

View File

@ -61,21 +61,5 @@ void clang::EmitClangCommentHTMLTagsProperties(RecordKeeper &Records,
StringMatcher("Name", MatchesEndTagForbidden, OS).Emit();
OS << " return false;\n"
<< "}\n\n";
std::vector<Record *> Attributes =
Records.getAllDerivedDefinitions("Attribute");
std::vector<StringMatcher::StringPair> Matches;
for (Record *Attribute : Attributes) {
std::string Spelling = Attribute->getValueAsString("Spelling");
if (!Attribute->getValueAsBit("IsSafeToPassThrough"))
Matches.push_back(StringMatcher::StringPair(Spelling, "return false;"));
}
emitSourceFileHeader("HTML attribute name matcher", OS);
OS << "bool isHTMLAttributeSafeToPassThrough(StringRef Name) {\n";
StringMatcher("Name", Matches, OS).Emit();
OS << " return true;\n"
<< "}\n\n";
}