[libc++] Implements multiline regex support.

This resolves LWG2503.
This commit is contained in:
Mark de Wever 2020-11-18 18:09:13 +01:00
parent 0016ab6f36
commit 3abaf6cde7
4 changed files with 334 additions and 15 deletions

View File

@ -32,7 +32,8 @@ enum syntax_option_type
extended = unspecified,
awk = unspecified,
grep = unspecified,
egrep = unspecified
egrep = unspecified,
multiline = unspecified
};
constexpr syntax_option_type operator~(syntax_option_type f);
@ -142,6 +143,7 @@ public:
static constexpr regex_constants::syntax_option_type awk = regex_constants::awk;
static constexpr regex_constants::syntax_option_type grep = regex_constants::grep;
static constexpr regex_constants::syntax_option_type egrep = regex_constants::egrep;
static constexpr regex_constants::syntax_option_type multiline = regex_constants::multiline;
// construct/copy/destroy:
basic_regex();
@ -802,7 +804,9 @@ enum syntax_option_type
extended = 1 << 5,
awk = 1 << 6,
grep = 1 << 7,
egrep = 1 << 8
egrep = 1 << 8,
// 1 << 9 may be used by ECMAScript
multiline = 1 << 10
};
inline _LIBCPP_CONSTEXPR
@ -1982,24 +1986,33 @@ __word_boundary<_CharT, _Traits>::__exec(__state& __s) const
// __l_anchor
template <class _CharT>
class __l_anchor
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
bool __is_eol(_CharT c)
{
return c == '\r' || c == '\n';
}
template <class _CharT>
class __l_anchor_multiline
: public __owns_one_state<_CharT>
{
typedef __owns_one_state<_CharT> base;
bool __multiline;
public:
typedef _VSTD::__state<_CharT> __state;
_LIBCPP_INLINE_VISIBILITY
__l_anchor(__node<_CharT>* __s)
: base(__s) {}
__l_anchor_multiline(bool __multiline, __node<_CharT>* __s)
: base(__s), __multiline(__multiline) {}
virtual void __exec(__state&) const;
};
template <class _CharT>
void
__l_anchor<_CharT>::__exec(__state& __s) const
__l_anchor_multiline<_CharT>::__exec(__state& __s) const
{
if (__s.__at_first_ && __s.__current_ == __s.__first_ &&
!(__s.__flags_ & regex_constants::match_not_bol))
@ -2007,6 +2020,13 @@ __l_anchor<_CharT>::__exec(__state& __s) const
__s.__do_ = __state::__accept_but_not_consume;
__s.__node_ = this->first();
}
else if (__multiline &&
!__s.__at_first_ &&
__is_eol(*_VSTD::prev(__s.__current_)))
{
__s.__do_ = __state::__accept_but_not_consume;
__s.__node_ = this->first();
}
else
{
__s.__do_ = __state::__reject;
@ -2017,24 +2037,26 @@ __l_anchor<_CharT>::__exec(__state& __s) const
// __r_anchor
template <class _CharT>
class __r_anchor
class __r_anchor_multiline
: public __owns_one_state<_CharT>
{
typedef __owns_one_state<_CharT> base;
bool __multiline;
public:
typedef _VSTD::__state<_CharT> __state;
_LIBCPP_INLINE_VISIBILITY
__r_anchor(__node<_CharT>* __s)
: base(__s) {}
__r_anchor_multiline(bool __multiline, __node<_CharT>* __s)
: base(__s), __multiline(__multiline) {}
virtual void __exec(__state&) const;
};
template <class _CharT>
void
__r_anchor<_CharT>::__exec(__state& __s) const
__r_anchor_multiline<_CharT>::__exec(__state& __s) const
{
if (__s.__current_ == __s.__last_ &&
!(__s.__flags_ & regex_constants::match_not_eol))
@ -2042,6 +2064,11 @@ __r_anchor<_CharT>::__exec(__state& __s) const
__s.__do_ = __state::__accept_but_not_consume;
__s.__node_ = this->first();
}
else if (__multiline && __is_eol(*__s.__current_))
{
__s.__do_ = __state::__accept_but_not_consume;
__s.__node_ = this->first();
}
else
{
__s.__do_ = __state::__reject;
@ -2541,6 +2568,7 @@ public:
static const regex_constants::syntax_option_type awk = regex_constants::awk;
static const regex_constants::syntax_option_type grep = regex_constants::grep;
static const regex_constants::syntax_option_type egrep = regex_constants::egrep;
static const regex_constants::syntax_option_type multiline = regex_constants::multiline;
// construct/copy/destroy:
_LIBCPP_INLINE_VISIBILITY
@ -2707,6 +2735,12 @@ private:
_LIBCPP_INLINE_VISIBILITY
unsigned __loop_count() const {return __loop_count_;}
_LIBCPP_INLINE_VISIBILITY
bool __use_multiline() const
{
return __get_grammar(__flags_) == ECMAScript && (__flags_ & multiline);
}
template <class _ForwardIterator>
void
__init(_ForwardIterator __first, _ForwardIterator __last);
@ -4746,7 +4780,7 @@ template <class _CharT, class _Traits>
void
basic_regex<_CharT, _Traits>::__push_l_anchor()
{
__end_->first() = new __l_anchor<_CharT>(__end_->first());
__end_->first() = new __l_anchor_multiline<_CharT>(__use_multiline(), __end_->first());
__end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
}
@ -4754,7 +4788,7 @@ template <class _CharT, class _Traits>
void
basic_regex<_CharT, _Traits>::__push_r_anchor()
{
__end_->first() = new __r_anchor<_CharT>(__end_->first());
__end_->first() = new __r_anchor_multiline<_CharT>(__use_multiline(), __end_->first());
__end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
}

View File

@ -0,0 +1,272 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// UNSUPPORTED: c++98, c++03
// <regex>
// multiline:
// Specifies that ^ shall match the beginning of a line and $ shall match
// the end of a line, if the ECMAScript engine is selected.
#include <regex>
#include <cassert>
#include "test_macros.h"
static void search(const char* pat, std::regex_constants::syntax_option_type f,
const char* target, bool expected)
{
std::regex re(pat, f);
std::cmatch m;
assert(std::regex_search(target, m, re) == expected);
if(expected) {
assert(m.size() == 1);
assert(m.length(0) == 3);
assert(m.str(0) == "foo");
}
else
{
assert(m.size() == 0);
}
}
int main(int, char**)
{
using std::regex_constants::ECMAScript;
using std::regex_constants::basic;
using std::regex_constants::extended;
using std::regex_constants::awk;
using std::regex_constants::grep;
using std::regex_constants::egrep;
using std::regex_constants::multiline;
{
const char* pat = "^foo";
const char* target = "foo";
search(pat, ECMAScript, target, true);
search(pat, basic, target, true);
search(pat, extended, target, true);
search(pat, awk, target, true);
search(pat, grep, target, true);
search(pat, egrep, target, true);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, true);
search(pat, extended | multiline, target, true);
search(pat, awk | multiline, target, true);
search(pat, grep | multiline, target, true);
search(pat, egrep | multiline, target, true);
}
{
const char* pat = "^foo";
const char* target = "\nfoo";
search(pat, ECMAScript, target, false);
search(pat, basic, target, false);
search(pat, extended, target, false);
search(pat, awk, target, false);
search(pat, grep, target, false);
search(pat, egrep, target, false);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, false);
search(pat, extended | multiline, target, false);
search(pat, awk | multiline, target, false);
search(pat, grep | multiline, target, false);
search(pat, egrep | multiline, target, false);
}
{
const char* pat = "^foo";
const char* target = "bar\nfoo";
search(pat, ECMAScript, target, false);
search(pat, basic, target, false);
search(pat, extended, target, false);
search(pat, awk, target, false);
search(pat, grep, target, false);
search(pat, egrep, target, false);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, false);
search(pat, extended | multiline, target, false);
search(pat, awk | multiline, target, false);
search(pat, grep | multiline, target, false);
search(pat, egrep | multiline, target, false);
}
{
const char* pat = "foo$";
const char* target = "foo";
search(pat, ECMAScript, target, true);
search(pat, basic, target, true);
search(pat, extended, target, true);
search(pat, awk, target, true);
search(pat, grep, target, true);
search(pat, egrep, target, true);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, true);
search(pat, extended | multiline, target, true);
search(pat, awk | multiline, target, true);
search(pat, grep | multiline, target, true);
search(pat, egrep | multiline, target, true);
}
{
const char* pat = "foo$";
const char* target = "foo\n";
search(pat, ECMAScript, target, false);
search(pat, basic, target, false);
search(pat, extended, target, false);
search(pat, awk, target, false);
search(pat, grep, target, false);
search(pat, egrep, target, false);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, false);
search(pat, extended | multiline, target, false);
search(pat, awk | multiline, target, false);
search(pat, grep | multiline, target, false);
search(pat, egrep | multiline, target, false);
}
{
const char* pat = "foo$";
const char* target = "foo\nbar";
search(pat, ECMAScript, target, false);
search(pat, basic, target, false);
search(pat, extended, target, false);
search(pat, awk, target, false);
search(pat, grep, target, false);
search(pat, egrep, target, false);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, false);
search(pat, extended | multiline, target, false);
search(pat, awk | multiline, target, false);
search(pat, grep | multiline, target, false);
search(pat, egrep | multiline, target, false);
}
{
const char* pat = "^foo";
const char* target = "foo";
search(pat, ECMAScript, target, true);
search(pat, basic, target, true);
search(pat, extended, target, true);
search(pat, awk, target, true);
search(pat, grep, target, true);
search(pat, egrep, target, true);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, true);
search(pat, extended | multiline, target, true);
search(pat, awk | multiline, target, true);
search(pat, grep | multiline, target, true);
search(pat, egrep | multiline, target, true);
}
{
const char* pat = "^foo";
const char* target = "\rfoo";
search(pat, ECMAScript, target, false);
search(pat, basic, target, false);
search(pat, extended, target, false);
search(pat, awk, target, false);
search(pat, grep, target, false);
search(pat, egrep, target, false);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, false);
search(pat, extended | multiline, target, false);
search(pat, awk | multiline, target, false);
search(pat, grep | multiline, target, false);
search(pat, egrep | multiline, target, false);
}
{
const char* pat = "^foo";
const char* target = "bar\rfoo";
search(pat, ECMAScript, target, false);
search(pat, basic, target, false);
search(pat, extended, target, false);
search(pat, awk, target, false);
search(pat, grep, target, false);
search(pat, egrep, target, false);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, false);
search(pat, extended | multiline, target, false);
search(pat, awk | multiline, target, false);
search(pat, grep | multiline, target, false);
search(pat, egrep | multiline, target, false);
}
{
const char* pat = "foo$";
const char* target = "foo";
search(pat, ECMAScript, target, true);
search(pat, basic, target, true);
search(pat, extended, target, true);
search(pat, awk, target, true);
search(pat, grep, target, true);
search(pat, egrep, target, true);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, true);
search(pat, extended | multiline, target, true);
search(pat, awk | multiline, target, true);
search(pat, grep | multiline, target, true);
search(pat, egrep | multiline, target, true);
}
{
const char* pat = "foo$";
const char* target = "foo\r";
search(pat, ECMAScript, target, false);
search(pat, basic, target, false);
search(pat, extended, target, false);
search(pat, awk, target, false);
search(pat, grep, target, false);
search(pat, egrep, target, false);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, false);
search(pat, extended | multiline, target, false);
search(pat, awk | multiline, target, false);
search(pat, grep | multiline, target, false);
search(pat, egrep | multiline, target, false);
}
{
const char* pat = "foo$";
const char* target = "foo\rbar";
search(pat, ECMAScript, target, false);
search(pat, basic, target, false);
search(pat, extended, target, false);
search(pat, awk, target, false);
search(pat, grep, target, false);
search(pat, egrep, target, false);
search(pat, ECMAScript | multiline, target, true);
search(pat, basic | multiline, target, false);
search(pat, extended | multiline, target, false);
search(pat, awk | multiline, target, false);
search(pat, grep | multiline, target, false);
search(pat, egrep | multiline, target, false);
}
return 0;
}

View File

@ -23,7 +23,8 @@
// extended = unspecified,
// awk = unspecified,
// grep = unspecified,
// egrep = unspecified
// egrep = unspecified,
// multiline = unspecified
// };
//
// }
@ -48,6 +49,7 @@ int main(int, char**)
assert(std::regex_constants::awk != 0);
assert(std::regex_constants::grep != 0);
assert(std::regex_constants::egrep != 0);
assert(std::regex_constants::multiline != 0);
assert((std::regex_constants::icase & std::regex_constants::nosubs) == 0);
assert((std::regex_constants::icase & std::regex_constants::optimize) == 0);
@ -58,6 +60,7 @@ int main(int, char**)
assert((std::regex_constants::icase & std::regex_constants::awk) == 0);
assert((std::regex_constants::icase & std::regex_constants::grep) == 0);
assert((std::regex_constants::icase & std::regex_constants::egrep) == 0);
assert((std::regex_constants::icase & std::regex_constants::multiline) == 0);
assert((std::regex_constants::nosubs & std::regex_constants::optimize) == 0);
assert((std::regex_constants::nosubs & std::regex_constants::collate) == 0);
@ -67,6 +70,7 @@ int main(int, char**)
assert((std::regex_constants::nosubs & std::regex_constants::awk) == 0);
assert((std::regex_constants::nosubs & std::regex_constants::grep) == 0);
assert((std::regex_constants::nosubs & std::regex_constants::egrep) == 0);
assert((std::regex_constants::nosubs & std::regex_constants::multiline) == 0);
assert((std::regex_constants::optimize & std::regex_constants::collate) == 0);
assert((std::regex_constants::optimize & std::regex_constants::ECMAScript) == 0);
@ -75,6 +79,7 @@ int main(int, char**)
assert((std::regex_constants::optimize & std::regex_constants::awk) == 0);
assert((std::regex_constants::optimize & std::regex_constants::grep) == 0);
assert((std::regex_constants::optimize & std::regex_constants::egrep) == 0);
assert((std::regex_constants::optimize & std::regex_constants::multiline) == 0);
assert((std::regex_constants::collate & std::regex_constants::ECMAScript) == 0);
assert((std::regex_constants::collate & std::regex_constants::basic) == 0);
@ -82,26 +87,34 @@ int main(int, char**)
assert((std::regex_constants::collate & std::regex_constants::awk) == 0);
assert((std::regex_constants::collate & std::regex_constants::grep) == 0);
assert((std::regex_constants::collate & std::regex_constants::egrep) == 0);
assert((std::regex_constants::collate & std::regex_constants::multiline) == 0);
assert((std::regex_constants::ECMAScript & std::regex_constants::basic) == 0);
assert((std::regex_constants::ECMAScript & std::regex_constants::extended) == 0);
assert((std::regex_constants::ECMAScript & std::regex_constants::awk) == 0);
assert((std::regex_constants::ECMAScript & std::regex_constants::grep) == 0);
assert((std::regex_constants::ECMAScript & std::regex_constants::egrep) == 0);
assert((std::regex_constants::ECMAScript & std::regex_constants::multiline) == 0);
assert((std::regex_constants::basic & std::regex_constants::extended) == 0);
assert((std::regex_constants::basic & std::regex_constants::awk) == 0);
assert((std::regex_constants::basic & std::regex_constants::grep) == 0);
assert((std::regex_constants::basic & std::regex_constants::egrep) == 0);
assert((std::regex_constants::basic & std::regex_constants::multiline) == 0);
assert((std::regex_constants::extended & std::regex_constants::awk) == 0);
assert((std::regex_constants::extended & std::regex_constants::grep) == 0);
assert((std::regex_constants::extended & std::regex_constants::egrep) == 0);
assert((std::regex_constants::extended & std::regex_constants::multiline) == 0);
assert((std::regex_constants::awk & std::regex_constants::grep) == 0);
assert((std::regex_constants::awk & std::regex_constants::egrep) == 0);
assert((std::regex_constants::awk & std::regex_constants::multiline) == 0);
assert((std::regex_constants::grep & std::regex_constants::egrep) == 0);
assert((std::regex_constants::grep & std::regex_constants::multiline) == 0);
assert((std::regex_constants::egrep & std::regex_constants::multiline) == 0);
assert((std::regex_constants::icase | std::regex_constants::nosubs) != 0);
assert((std::regex_constants::icase ^ std::regex_constants::nosubs) != 0);

View File

@ -371,7 +371,7 @@
<tr><td><a href="https://wg21.link/LWG2460">2460</a></td><td>LWG issue 2408 and value categories</td><td>Issaquah</td><td>Complete</td></tr>
<tr><td><a href="https://wg21.link/LWG2468">2468</a></td><td>Self-move-assignment of library types</td><td>Issaquah</td><td></td></tr>
<tr><td><a href="https://wg21.link/LWG2475">2475</a></td><td>Allow overwriting of std::basic_string terminator with charT() to allow cleaner interoperation with legacy APIs</td><td>Issaquah</td><td>Complete</td></tr>
<tr><td><a href="https://wg21.link/LWG2503">2503</a></td><td>multiline option should be added to syntax_option_type</td><td>Issaquah</td><td></td></tr>
<tr><td><a href="https://wg21.link/LWG2503">2503</a></td><td>multiline option should be added to syntax_option_type</td><td>Issaquah</td><td>Complete</td></tr>
<tr><td><a href="https://wg21.link/LWG2510">2510</a></td><td>Tag types should not be DefaultConstructible</td><td>Issaquah</td><td>Complete</td></tr>
<tr><td><a href="https://wg21.link/LWG2514">2514</a></td><td>Type traits must not be final</td><td>Issaquah</td><td>Complete</td></tr>
<tr><td><a href="https://wg21.link/LWG2518">2518</a></td><td>[fund.ts.v2] Non-member swap for propagate_const should call member swap</td><td>Issaquah</td><td>Complete</td></tr>
@ -503,7 +503,7 @@
<!-- <tr><td></td><td></td><td></td><td></td></tr> -->
</table>
<p>Last Updated: 3-Jul-2019</p>
<p>Last Updated: 17-Nov-2020</p>
</div>
</body>
</html>