Improved our argument parsing abilities to be able to handle stuff more like

a shell would interpret it. A few examples that we now handle correctly

INPUT: "Hello "world
OUTPUT: "Hello World"

INPUT: "Hello "' World'
OUTPUT: "Hello World"

INPUT: Hello" World"
OUTPUT: "Hello World"

This broke the setting of dictionary values for the "settings set" command
for things like:

(lldb) settings set target.process.env-vars ["MY_ENV_VAR"]=YES

since we would drop the quotes. I fixed the user settings controller to use
a regular expression so it can accept any of the following inputs for
dictionary setting:

settings set target.process.env-vars ["MY_ENV_VAR"]=YES
settings set target.process.env-vars [MY_ENV_VAR]=YES
settings set target.process.env-vars MY_ENV_VAR=YES

We might want to eventually drop the first two syntaxes, but I won't make
that decision right now.

This allows more natural setting of the envirorment variables:

settings set target.process.env-vars MY_ENV_VAR=YES ABC=DEF CWD=/tmp

llvm-svn: 122166
This commit is contained in:
Greg Clayton 2010-12-19 03:41:24 +00:00
parent 9a2d4e04c7
commit 6ad07dd9e9
4 changed files with 221 additions and 148 deletions

View File

@ -101,10 +101,6 @@ public:
/// @param[in] match_count
/// The number of regmatch_t objects in \a match_ptr
///
/// @param[out] match_ptr
/// A pointer to at least \a match_count regmatch_t objects
/// if \a match_count is non-zero.
///
/// @param[in] execute_flags
/// Flags to pass to the \c regexec() function.
///

View File

@ -100,7 +100,7 @@ RegularExpression::Execute(const char* s, size_t num_matches, int execute_flags)
match_result = ::regexec (&m_preg,
s,
m_matches.size(),
&m_matches.front(),
&m_matches[0],
execute_flags);
}
return match_result == 0;
@ -111,9 +111,18 @@ RegularExpression::GetMatchAtIndex (const char* s, uint32_t idx, std::string& ma
{
if (idx <= m_preg.re_nsub && idx < m_matches.size())
{
match_str.assign (s + m_matches[idx].rm_so,
m_matches[idx].rm_eo - m_matches[idx].rm_so);
return true;
if (m_matches[idx].rm_eo == m_matches[idx].rm_so)
{
// Matched the empty string...
match_str.clear();
return true;
}
else if (m_matches[idx].rm_eo > m_matches[idx].rm_so)
{
match_str.assign (s + m_matches[idx].rm_so,
m_matches[idx].rm_eo - m_matches[idx].rm_so);
return true;
}
}
return false;
}

View File

@ -12,6 +12,7 @@
#include "lldb/Core/UserSettingsController.h"
#include "lldb/Core/Error.h"
#include "lldb/Core/RegularExpression.h"
#include "lldb/Core/Stream.h"
#include "lldb/Core/StreamString.h"
#include "lldb/Interpreter/CommandInterpreter.h"
@ -2102,34 +2103,32 @@ UserSettingsController::UpdateDictionaryVariable (lldb::VarSetOperationType op,
}
Args args (new_value);
size_t num_args = args.GetArgumentCount();
RegularExpression regex("(\\[\"?)?" // Regex match 1 (optional key prefix of '["' pr '[')
"([A-Za-z_][A-Za-z_0-9]*)" // Regex match 2 (key string)
"(\"?\\])?" // Regex match 3 (optional key suffix of '"]' pr ']')
"=" // The equal sign that is required
"(.*)"); // Regex match 4 (value string)
std::string key, value;
for (size_t i = 0; i < num_args; ++i)
{
std::string tmp_arg = args.GetArgumentAtIndex (i);
size_t eq_sign = tmp_arg.find ('=');
if (eq_sign != std::string::npos)
const char *key_equal_value_arg = args.GetArgumentAtIndex (i);
// Execute the regular expression on each arg.
if (regex.Execute(key_equal_value_arg, 5))
{
if (eq_sign > 4)
{
std::string tmp_key = tmp_arg.substr (0, eq_sign);
std::string real_value = tmp_arg.substr (eq_sign+1);
if ((tmp_key[0] == '[')
&& (tmp_key[1] == '"')
&& (tmp_key[eq_sign-2] == '"')
&& (tmp_key[eq_sign-1] == ']'))
{
std::string real_key = tmp_key.substr (2, eq_sign-4);
dictionary[real_key] = real_value;
}
else
err.SetErrorString ("Invalid key format for dictionary assignment. "
"Expected '[\"<key>\"]'\n");
}
else
err.SetErrorString ("Invalid key format for dictionary assignment. "
"Expected '[\"<key>\"]'\n");
// The regular expression succeeded. The match at index
// zero will be the entire string that matched the entire
// regular expression. The match at index 1 - 4 will be
// as mentioned above by the creation of the regex pattern.
// Match index 2 is the key, match index 4 is the value.
regex.GetMatchAtIndex (key_equal_value_arg, 2, key);
regex.GetMatchAtIndex (key_equal_value_arg, 4, value);
dictionary[key] = value;
}
else
err.SetErrorString ("Invalid format for dictionary value. Expected '[\"<key>\"]=<value>'\n");
{
err.SetErrorString ("Invalid format for dictionary value. Expected one of '[\"<key>\"]=<value>', '[<key>]=<value>', or '<key>=<value>'\n");
}
}
}
break;

View File

@ -23,10 +23,6 @@
using namespace lldb;
using namespace lldb_private;
static const char *k_space_characters = "\t\n\v\f\r ";
static const char *k_space_characters_with_slash = "\t\n\v\f\r \\";
//----------------------------------------------------------------------
// Args constructor
//----------------------------------------------------------------------
@ -34,7 +30,8 @@ Args::Args (const char *command) :
m_args(),
m_argv()
{
SetCommandString (command);
if (command)
SetCommandString (command);
}
@ -42,11 +39,10 @@ Args::Args (const char *command, size_t len) :
m_args(),
m_argv()
{
SetCommandString (command, len);
if (command && len)
SetCommandString (command, len);
}
//----------------------------------------------------------------------
// Destructor
//----------------------------------------------------------------------
@ -97,20 +93,20 @@ bool
Args::GetQuotedCommandString (std::string &command)
{
command.clear ();
int argc = GetArgumentCount ();
for (int i = 0; i < argc; ++i)
size_t argc = GetArgumentCount ();
for (size_t i = 0; i < argc; ++i)
{
if (i > 0)
command += ' ';
char quote_char = m_args_quote_char[i];
if (quote_char != '\0')
command.append (1, ' ');
char quote_char = GetArgumentQuoteCharAtIndex(i);
if (quote_char)
{
command += quote_char;
command += m_argv[i];
command += quote_char;
command.append (1, quote_char);
command.append (m_argv[i]);
command.append (1, quote_char);
}
else
command += m_argv[i];
command.append (m_argv[i]);
}
return argc > 0;
}
@ -127,136 +123,197 @@ Args::SetCommandString (const char *command, size_t len)
void
Args::SetCommandString (const char *command)
{
StreamFile s(stdout);
s.Printf("\nCOMMAND: %s\n", command);
m_args.clear();
m_argv.clear();
m_args_quote_char.clear();
if (command && command[0])
{
const char *arg_start;
const char *next_arg_start;
for (arg_start = command, next_arg_start = NULL;
arg_start && arg_start[0];
arg_start = next_arg_start, next_arg_start = NULL)
static const char *k_space_separators = " \t";
static const char *k_space_separators_with_slash_and_quotes = " \t \\'\"`";
const char *arg_end = NULL;
const char *arg_pos;
for (arg_pos = command;
arg_pos && arg_pos[0];
arg_pos = arg_end)
{
// Skip any leading space characters
arg_start = ::strspn (arg_start, k_space_characters) + arg_start;
// If there were only space characters to the end of the line, then
// Skip any leading space separators
const char *arg_start = ::strspn (arg_pos, k_space_separators) + arg_pos;
// If there were only space separators to the end of the line, then
// we're done.
if (*arg_start == '\0')
break;
// Arguments can be split into multiple discongituous pieces,
// for example:
// "Hello ""World"
// this would result in a single argument "Hello World" (without/
// the quotes) since the quotes would be removed and there is
// not space between the strings. So we need to keep track of the
// current start of each argument piece in "arg_piece_start"
const char *arg_piece_start = arg_start;
arg_pos = arg_piece_start;
std::string arg;
const char *arg_end = NULL;
// Since we can have multiple quotes that form a single command
// in a command like: "Hello "world'!' (which will make a single
// argument "Hello world!") we remember the first quote character
// we encounter and use that for the quote character.
char first_quote_char = '\0';
char quote_char = '\0';
bool arg_complete = false;
switch (*arg_start)
do
{
case '\'':
case '"':
case '`':
arg_end = ::strcspn (arg_pos, k_space_separators_with_slash_and_quotes) + arg_pos;
switch (arg_end[0])
{
// Look for either a quote character, or the backslash
// character
const char quote_char = *arg_start;
char find_chars[3] = { quote_char, '\\' , '\0'};
bool is_backtick = (quote_char == '`');
if (quote_char == '"' || quote_char == '`')
m_args_quote_char.push_back(quote_char);
else
m_args_quote_char.push_back('\0');
default:
assert (!"Unhandled case statement, we must handle this...");
break;
while (*arg_start != '\0')
case '\0':
// End of C string
if (arg_piece_start && arg_piece_start[0])
arg.append (arg_piece_start);
arg_complete = true;
break;
case '\\':
// Backslash character
switch (arg_end[1])
{
arg_end = ::strcspn (arg_start + 1, find_chars) + arg_start + 1;
if (*arg_end == '\0')
{
arg.append (arg_start);
case '\0':
arg.append (arg_piece_start);
arg_complete = true;
break;
}
// Watch out for quote characters prefixed with '\'
if (*arg_end == '\\')
default:
arg_pos = arg_end + 2;
break;
}
break;
case '"':
case '\'':
case '`':
// Quote characters
if (quote_char)
{
// We found a quote character while inside a quoted
// character argument. If it matches our current quote
// character, this ends the effect of the quotes. If it
// doesn't we ignore it.
if (quote_char == arg_end[0])
{
if (arg_end[1] == quote_char)
arg.append (arg_piece_start, arg_end - arg_piece_start);
// Clear the quote character and let parsing
// continue (we need to watch for things like:
// "Hello ""World"
// "Hello "World
// "Hello "'World'
// All of which will result in a single argument "Hello World"
quote_char = '\0'; // Note that we are no longer inside quotes
arg_pos = arg_end + 1; // Skip the quote character
arg_piece_start = arg_pos; // Note we are starting from later in the string
}
else
{
// different quote, skip it and keep going
arg_pos = arg_end + 1;
}
}
else
{
// We found the start of a quote scope.
// Make sure there isn't a string that predeces
// the start of a quote scope like:
// Hello" World"
// If so, then add the "Hello" to the arg
if (arg_end > arg_piece_start)
arg.append (arg_piece_start, arg_end - arg_piece_start);
// Enter into a quote scope
quote_char = arg_end[0];
if (first_quote_char == '\0')
first_quote_char = quote_char;
arg_pos = arg_end;
if (quote_char != '`')
++arg_pos; // Skip the quote character if it is not a backtick
arg_piece_start = arg_pos; // Note we are starting from later in the string
// Skip till the next quote character
const char *end_quote = ::strchr (arg_piece_start, quote_char);
while (end_quote && end_quote[-1] == '\\')
{
// Don't skip the quote character if it is
// preceded by a '\' character
end_quote = ::strchr (end_quote + 1, quote_char);
}
if (end_quote)
{
if (end_quote > arg_piece_start)
{
// The character following the '\' is our quote
// character so strip the backslash character
arg.append (arg_start, arg_end);
// Keep the backtick quote on commands
if (quote_char == '`')
arg.append (arg_piece_start, end_quote + 1 - arg_piece_start);
else
arg.append (arg_piece_start, end_quote - arg_piece_start);
}
// If the next character is a space or the end of
// string, this argument is complete...
if (end_quote[1] == ' ' || end_quote[1] == '\t' || end_quote[1] == '\0')
{
arg_complete = true;
arg_end = end_quote + 1;
}
else
{
// The character following the '\' is NOT our
// quote character, so include the backslash
// and continue
arg.append (arg_start, arg_end + 1);
arg_pos = end_quote + 1;
arg_piece_start = arg_pos;
}
arg_start = arg_end + 1;
continue;
}
else
{
arg.append (arg_start, arg_end + 1);
next_arg_start = arg_end + 1;
break;
quote_char = '\0';
}
}
break;
// Skip single and double quotes, but leave backtick quotes
if (!is_backtick)
case ' ':
case '\t':
if (quote_char)
{
char first_c = arg[0];
arg.erase(0,1);
// Only erase the last character if it is the same as the first.
// Otherwise, we're parsing an incomplete command line, and we
// would be stripping off the last character of that string.
if (arg[arg.size() - 1] == first_c)
arg.erase(arg.size() - 1, 1);
// We are currently processing a quoted character and found
// a space character, skip any spaces and keep trying to find
// the end of the argument.
arg_pos = ::strspn (arg_end, k_space_separators) + arg_end;
}
}
break;
default:
{
m_args_quote_char.push_back('\0');
// Look for the next non-escaped space character
while (*arg_start != '\0')
else
{
arg_end = ::strcspn (arg_start, k_space_characters_with_slash) + arg_start;
if (arg_end == NULL)
{
arg.append(arg_start);
break;
}
if (*arg_end == '\\')
{
// Append up to the '\' char
arg.append (arg_start, arg_end);
if (arg_end[1] == '\0')
break;
// Append the character following the '\' if it isn't
// the end of the string
arg.append (1, arg_end[1]);
arg_start = arg_end + 2;
continue;
}
else
{
arg.append (arg_start, arg_end);
next_arg_start = arg_end;
break;
}
// We are not inside any quotes, we just found a space after an
// argument
if (arg_end > arg_piece_start)
arg.append (arg_piece_start, arg_end - arg_piece_start);
arg_complete = true;
}
break;
}
break;
}
} while (!arg_complete);
m_args.push_back(arg);
m_args_quote_char.push_back (first_quote_char);
}
UpdateArgvFromArgs();
}
UpdateArgvFromArgs();
Dump (&s);
}
void
@ -309,6 +366,9 @@ Args::UpdateArgvFromArgs()
for (pos = m_args.begin(); pos != end; ++pos)
m_argv.push_back(pos->c_str());
m_argv.push_back(NULL);
// Make sure we have enough arg quote chars in the array
if (m_args_quote_char.size() < m_args.size())
m_args_quote_char.resize (m_argv.size());
}
size_t
@ -359,7 +419,8 @@ Args::Shift ()
{
m_argv.erase(m_argv.begin());
m_args.pop_front();
m_args_quote_char.erase(m_args_quote_char.begin());
if (!m_args_quote_char.empty())
m_args_quote_char.erase(m_args_quote_char.begin());
}
}
@ -399,8 +460,13 @@ Args::InsertArgumentAtIndex (size_t idx, const char *arg_cstr, char quote_char)
pos = m_args.insert(pos, arg_cstr);
m_args_quote_char.insert(m_args_quote_char.begin() + idx, quote_char);
if (idx >= m_args_quote_char.size())
{
m_args_quote_char.resize(idx + 1);
m_args_quote_char[idx] = quote_char;
}
else
m_args_quote_char.insert(m_args_quote_char.begin() + idx, quote_char);
UpdateArgvFromArgs();
return GetArgumentAtIndex(idx);
@ -422,6 +488,8 @@ Args::ReplaceArgumentAtIndex (size_t idx, const char *arg_cstr, char quote_char)
pos->assign(arg_cstr);
assert(idx < m_argv.size() - 1);
m_argv[idx] = pos->c_str();
if (idx >= m_args_quote_char.size())
m_args_quote_char.resize(idx + 1);
m_args_quote_char[idx] = quote_char;
return GetArgumentAtIndex(idx);
}
@ -444,7 +512,8 @@ Args::DeleteArgumentAtIndex (size_t idx)
m_args.erase (pos);
assert(idx < m_argv.size() - 1);
m_argv.erase(m_argv.begin() + idx);
m_args_quote_char.erase(m_args_quote_char.begin() + idx);
if (idx < m_args_quote_char.size())
m_args_quote_char.erase(m_args_quote_char.begin() + idx);
}
}
@ -462,7 +531,7 @@ Args::SetArguments (int argc, const char **argv)
for (i=0; i<argc; ++i)
{
m_args.push_back (argv[i]);
if ((argv[i][0] == '"') || (argv[i][0] == '`'))
if ((argv[i][0] == '\'') || (argv[i][0] == '"') || (argv[i][0] == '`'))
m_args_quote_char.push_back (argv[i][0]);
else
m_args_quote_char.push_back ('\0');