forked from OSchip/llvm-project
Bill Fisher: This patch fixes a bug where std::regex in ECMAScript mode was ignoring capture groups inside lookahead assertions.
For example, matching /(?=(a))(a)/ to "a" should yield two captures: \1 = "a", \2 = "a" llvm-svn: 186954
This commit is contained in:
parent
9b3d2c0260
commit
7491a16031
|
@ -2769,7 +2769,7 @@ private:
|
|||
void __push_end_marked_subexpression(unsigned);
|
||||
void __push_empty();
|
||||
void __push_word_boundary(bool);
|
||||
void __push_lookahead(const basic_regex&, bool);
|
||||
void __push_lookahead(const basic_regex&, bool, unsigned);
|
||||
|
||||
template <class _Allocator>
|
||||
bool
|
||||
|
@ -2907,6 +2907,7 @@ class __lookahead
|
|||
typedef __owns_one_state<_CharT> base;
|
||||
|
||||
basic_regex<_CharT, _Traits> __exp_;
|
||||
unsigned __mexp_;
|
||||
bool __invert_;
|
||||
|
||||
__lookahead(const __lookahead&);
|
||||
|
@ -2915,8 +2916,8 @@ public:
|
|||
typedef _VSTD::__state<_CharT> __state;
|
||||
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
__lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s)
|
||||
: base(__s), __exp_(__exp), __invert_(__invert) {}
|
||||
__lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s, unsigned __mexp)
|
||||
: base(__s), __exp_(__exp), __invert_(__invert), __mexp_(__mexp) {}
|
||||
|
||||
virtual void __exec(__state&) const;
|
||||
};
|
||||
|
@ -2935,6 +2936,9 @@ __lookahead<_CharT, _Traits>::__exec(__state& __s) const
|
|||
{
|
||||
__s.__do_ = __state::__accept_but_not_consume;
|
||||
__s.__node_ = this->first();
|
||||
for (unsigned __i = 1; __i < __m.size(); ++__i) {
|
||||
__s.__sub_matches_[__mexp_ + __i - 1] = __m.__matches_[__i];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -4168,7 +4172,9 @@ basic_regex<_CharT, _Traits>::__parse_assertion(_ForwardIterator __first,
|
|||
basic_regex __exp;
|
||||
__exp.__flags_ = __flags_;
|
||||
__temp = __exp.__parse(++__temp, __last);
|
||||
__push_lookahead(_VSTD::move(__exp), false);
|
||||
unsigned __mexp = __exp.__marked_count_;
|
||||
__push_lookahead(_VSTD::move(__exp), false, __marked_count_);
|
||||
__marked_count_ += __mexp;
|
||||
#ifndef _LIBCPP_NO_EXCEPTIONS
|
||||
if (__temp == __last || *__temp != ')')
|
||||
throw regex_error(regex_constants::error_paren);
|
||||
|
@ -4181,7 +4187,9 @@ basic_regex<_CharT, _Traits>::__parse_assertion(_ForwardIterator __first,
|
|||
basic_regex __exp;
|
||||
__exp.__flags_ = __flags_;
|
||||
__temp = __exp.__parse(++__temp, __last);
|
||||
__push_lookahead(_VSTD::move(__exp), true);
|
||||
unsigned __mexp = __exp.__marked_count_;
|
||||
__push_lookahead(_VSTD::move(__exp), true, __marked_count_);
|
||||
__marked_count_ += __mexp;
|
||||
#ifndef _LIBCPP_NO_EXCEPTIONS
|
||||
if (__temp == __last || *__temp != ')')
|
||||
throw regex_error(regex_constants::error_paren);
|
||||
|
@ -4759,10 +4767,11 @@ basic_regex<_CharT, _Traits>::__start_matching_list(bool __negate)
|
|||
template <class _CharT, class _Traits>
|
||||
void
|
||||
basic_regex<_CharT, _Traits>::__push_lookahead(const basic_regex& __exp,
|
||||
bool __invert)
|
||||
bool __invert,
|
||||
unsigned __mexp)
|
||||
{
|
||||
__end_->first() = new __lookahead<_CharT, _Traits>(__exp, __invert,
|
||||
__end_->first());
|
||||
__end_->first(), __mexp);
|
||||
__end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is dual licensed under the MIT and the University of Illinois Open
|
||||
// Source Licenses. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// <regex>
|
||||
|
||||
// template <class BidirectionalIterator, class Allocator, class charT, class traits>
|
||||
// bool
|
||||
// regex_match(BidirectionalIterator first, BidirectionalIterator last,
|
||||
// match_results<BidirectionalIterator, Allocator>& m,
|
||||
// const basic_regex<charT, traits>& e,
|
||||
// regex_constants::match_flag_type flags = regex_constants::match_default);
|
||||
|
||||
// std::regex in ECMAScript mode should not ignore capture groups inside lookahead assertions.
|
||||
// For example, matching /(?=(a))(a)/ to "a" should yield two captures: \1 = "a", \2 = "a"
|
||||
|
||||
#include <regex>
|
||||
#include <cassert>
|
||||
|
||||
#include "test_iterators.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
{
|
||||
std::regex re{"^(?=(.))a$"};
|
||||
assert(re.mark_count() == 1);
|
||||
|
||||
std::string s{"a"};
|
||||
std::smatch m;
|
||||
assert(std::regex_match(s, m, re));
|
||||
assert(m.size() == 2);
|
||||
assert(m[0] == "a");
|
||||
assert(m[1] == "a");
|
||||
}
|
||||
|
||||
{
|
||||
std::regex re{"^(a)(?=(.))(b)$"};
|
||||
assert(re.mark_count() == 3);
|
||||
|
||||
std::string s{"ab"};
|
||||
std::smatch m;
|
||||
assert(std::regex_match(s, m, re));
|
||||
assert(m.size() == 4);
|
||||
assert(m[0] == "ab");
|
||||
assert(m[1] == "a");
|
||||
assert(m[2] == "b");
|
||||
assert(m[3] == "b");
|
||||
}
|
||||
|
||||
{
|
||||
std::regex re{"^(.)(?=(.)(?=.(.)))(...)$"};
|
||||
assert(re.mark_count() == 4);
|
||||
|
||||
std::string s{"abcd"};
|
||||
std::smatch m;
|
||||
assert(std::regex_match(s, m, re));
|
||||
assert(m.size() == 5);
|
||||
assert(m[0] == "abcd");
|
||||
assert(m[1] == "a");
|
||||
assert(m[2] == "b");
|
||||
assert(m[3] == "d");
|
||||
assert(m[4] == "bcd");
|
||||
}
|
||||
|
||||
{
|
||||
std::regex re{"^(a)(?!([^b]))(.c)$"};
|
||||
assert(re.mark_count() == 3);
|
||||
|
||||
std::string s{"abc"};
|
||||
std::smatch m;
|
||||
assert(std::regex_match(s, m, re));
|
||||
assert(m.size() == 4);
|
||||
assert(m[0] == "abc");
|
||||
assert(m[1] == "a");
|
||||
assert(m[2] == "");
|
||||
assert(m[3] == "bc");
|
||||
}
|
||||
|
||||
{
|
||||
std::regex re{"^(?!((b)))(?=(.))(?!(abc)).b$"};
|
||||
assert(re.mark_count() == 4);
|
||||
|
||||
std::string s{"ab"};
|
||||
std::smatch m;
|
||||
assert(std::regex_match(s, m, re));
|
||||
assert(m.size() == 5);
|
||||
assert(m[0] == "ab");
|
||||
assert(m[1] == "");
|
||||
assert(m[2] == "");
|
||||
assert(m[3] == "a");
|
||||
assert(m[4] == "");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue