From 6ad07dd9e91c69f748e8aa41f6c48b0a3d3f03c0 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Sun, 19 Dec 2010 03:41:24 +0000 Subject: [PATCH] Improved our argument parsing abilities to be able to handle stuff more like a shell would interpret it. A few examples that we now handle correctly INPUT: "Hello "world OUTPUT: "Hello World" INPUT: "Hello "' World' OUTPUT: "Hello World" INPUT: Hello" World" OUTPUT: "Hello World" This broke the setting of dictionary values for the "settings set" command for things like: (lldb) settings set target.process.env-vars ["MY_ENV_VAR"]=YES since we would drop the quotes. I fixed the user settings controller to use a regular expression so it can accept any of the following inputs for dictionary setting: settings set target.process.env-vars ["MY_ENV_VAR"]=YES settings set target.process.env-vars [MY_ENV_VAR]=YES settings set target.process.env-vars MY_ENV_VAR=YES We might want to eventually drop the first two syntaxes, but I won't make that decision right now. This allows more natural setting of the envirorment variables: settings set target.process.env-vars MY_ENV_VAR=YES ABC=DEF CWD=/tmp llvm-svn: 122166 --- lldb/include/lldb/Core/RegularExpression.h | 4 - lldb/source/Core/RegularExpression.cpp | 17 +- lldb/source/Core/UserSettingsController.cpp | 45 ++- lldb/source/Interpreter/Args.cpp | 303 ++++++++++++-------- 4 files changed, 221 insertions(+), 148 deletions(-) diff --git a/lldb/include/lldb/Core/RegularExpression.h b/lldb/include/lldb/Core/RegularExpression.h index 5a38e5fc8392..f71b24a5d627 100644 --- a/lldb/include/lldb/Core/RegularExpression.h +++ b/lldb/include/lldb/Core/RegularExpression.h @@ -101,10 +101,6 @@ public: /// @param[in] match_count /// The number of regmatch_t objects in \a match_ptr /// - /// @param[out] match_ptr - /// A pointer to at least \a match_count regmatch_t objects - /// if \a match_count is non-zero. - /// /// @param[in] execute_flags /// Flags to pass to the \c regexec() function. /// diff --git a/lldb/source/Core/RegularExpression.cpp b/lldb/source/Core/RegularExpression.cpp index 45ed9cc63742..7848931e3927 100644 --- a/lldb/source/Core/RegularExpression.cpp +++ b/lldb/source/Core/RegularExpression.cpp @@ -100,7 +100,7 @@ RegularExpression::Execute(const char* s, size_t num_matches, int execute_flags) match_result = ::regexec (&m_preg, s, m_matches.size(), - &m_matches.front(), + &m_matches[0], execute_flags); } return match_result == 0; @@ -111,9 +111,18 @@ RegularExpression::GetMatchAtIndex (const char* s, uint32_t idx, std::string& ma { if (idx <= m_preg.re_nsub && idx < m_matches.size()) { - match_str.assign (s + m_matches[idx].rm_so, - m_matches[idx].rm_eo - m_matches[idx].rm_so); - return true; + if (m_matches[idx].rm_eo == m_matches[idx].rm_so) + { + // Matched the empty string... + match_str.clear(); + return true; + } + else if (m_matches[idx].rm_eo > m_matches[idx].rm_so) + { + match_str.assign (s + m_matches[idx].rm_so, + m_matches[idx].rm_eo - m_matches[idx].rm_so); + return true; + } } return false; } diff --git a/lldb/source/Core/UserSettingsController.cpp b/lldb/source/Core/UserSettingsController.cpp index fc8d1aab57f8..6cd1f3ccfd82 100644 --- a/lldb/source/Core/UserSettingsController.cpp +++ b/lldb/source/Core/UserSettingsController.cpp @@ -12,6 +12,7 @@ #include "lldb/Core/UserSettingsController.h" #include "lldb/Core/Error.h" +#include "lldb/Core/RegularExpression.h" #include "lldb/Core/Stream.h" #include "lldb/Core/StreamString.h" #include "lldb/Interpreter/CommandInterpreter.h" @@ -2102,34 +2103,32 @@ UserSettingsController::UpdateDictionaryVariable (lldb::VarSetOperationType op, } Args args (new_value); size_t num_args = args.GetArgumentCount(); + RegularExpression regex("(\\[\"?)?" // Regex match 1 (optional key prefix of '["' pr '[') + "([A-Za-z_][A-Za-z_0-9]*)" // Regex match 2 (key string) + "(\"?\\])?" // Regex match 3 (optional key suffix of '"]' pr ']') + "=" // The equal sign that is required + "(.*)"); // Regex match 4 (value string) + std::string key, value; + for (size_t i = 0; i < num_args; ++i) { - std::string tmp_arg = args.GetArgumentAtIndex (i); - size_t eq_sign = tmp_arg.find ('='); - if (eq_sign != std::string::npos) + const char *key_equal_value_arg = args.GetArgumentAtIndex (i); + // Execute the regular expression on each arg. + if (regex.Execute(key_equal_value_arg, 5)) { - if (eq_sign > 4) - { - std::string tmp_key = tmp_arg.substr (0, eq_sign); - std::string real_value = tmp_arg.substr (eq_sign+1); - if ((tmp_key[0] == '[') - && (tmp_key[1] == '"') - && (tmp_key[eq_sign-2] == '"') - && (tmp_key[eq_sign-1] == ']')) - { - std::string real_key = tmp_key.substr (2, eq_sign-4); - dictionary[real_key] = real_value; - } - else - err.SetErrorString ("Invalid key format for dictionary assignment. " - "Expected '[\"\"]'\n"); - } - else - err.SetErrorString ("Invalid key format for dictionary assignment. " - "Expected '[\"\"]'\n"); + // The regular expression succeeded. The match at index + // zero will be the entire string that matched the entire + // regular expression. The match at index 1 - 4 will be + // as mentioned above by the creation of the regex pattern. + // Match index 2 is the key, match index 4 is the value. + regex.GetMatchAtIndex (key_equal_value_arg, 2, key); + regex.GetMatchAtIndex (key_equal_value_arg, 4, value); + dictionary[key] = value; } else - err.SetErrorString ("Invalid format for dictionary value. Expected '[\"\"]='\n"); + { + err.SetErrorString ("Invalid format for dictionary value. Expected one of '[\"\"]=', '[]=', or '='\n"); + } } } break; diff --git a/lldb/source/Interpreter/Args.cpp b/lldb/source/Interpreter/Args.cpp index e5264e593bd6..affa644cd4b7 100644 --- a/lldb/source/Interpreter/Args.cpp +++ b/lldb/source/Interpreter/Args.cpp @@ -23,10 +23,6 @@ using namespace lldb; using namespace lldb_private; -static const char *k_space_characters = "\t\n\v\f\r "; -static const char *k_space_characters_with_slash = "\t\n\v\f\r \\"; - - //---------------------------------------------------------------------- // Args constructor //---------------------------------------------------------------------- @@ -34,7 +30,8 @@ Args::Args (const char *command) : m_args(), m_argv() { - SetCommandString (command); + if (command) + SetCommandString (command); } @@ -42,11 +39,10 @@ Args::Args (const char *command, size_t len) : m_args(), m_argv() { - SetCommandString (command, len); + if (command && len) + SetCommandString (command, len); } - - //---------------------------------------------------------------------- // Destructor //---------------------------------------------------------------------- @@ -97,20 +93,20 @@ bool Args::GetQuotedCommandString (std::string &command) { command.clear (); - int argc = GetArgumentCount (); - for (int i = 0; i < argc; ++i) + size_t argc = GetArgumentCount (); + for (size_t i = 0; i < argc; ++i) { if (i > 0) - command += ' '; - char quote_char = m_args_quote_char[i]; - if (quote_char != '\0') + command.append (1, ' '); + char quote_char = GetArgumentQuoteCharAtIndex(i); + if (quote_char) { - command += quote_char; - command += m_argv[i]; - command += quote_char; + command.append (1, quote_char); + command.append (m_argv[i]); + command.append (1, quote_char); } else - command += m_argv[i]; + command.append (m_argv[i]); } return argc > 0; } @@ -127,136 +123,197 @@ Args::SetCommandString (const char *command, size_t len) void Args::SetCommandString (const char *command) { + StreamFile s(stdout); + s.Printf("\nCOMMAND: %s\n", command); m_args.clear(); m_argv.clear(); + m_args_quote_char.clear(); + if (command && command[0]) { - const char *arg_start; - const char *next_arg_start; - for (arg_start = command, next_arg_start = NULL; - arg_start && arg_start[0]; - arg_start = next_arg_start, next_arg_start = NULL) + static const char *k_space_separators = " \t"; + static const char *k_space_separators_with_slash_and_quotes = " \t \\'\"`"; + const char *arg_end = NULL; + const char *arg_pos; + for (arg_pos = command; + arg_pos && arg_pos[0]; + arg_pos = arg_end) { - // Skip any leading space characters - arg_start = ::strspn (arg_start, k_space_characters) + arg_start; - - // If there were only space characters to the end of the line, then + // Skip any leading space separators + const char *arg_start = ::strspn (arg_pos, k_space_separators) + arg_pos; + + // If there were only space separators to the end of the line, then // we're done. if (*arg_start == '\0') break; + // Arguments can be split into multiple discongituous pieces, + // for example: + // "Hello ""World" + // this would result in a single argument "Hello World" (without/ + // the quotes) since the quotes would be removed and there is + // not space between the strings. So we need to keep track of the + // current start of each argument piece in "arg_piece_start" + const char *arg_piece_start = arg_start; + arg_pos = arg_piece_start; + std::string arg; - const char *arg_end = NULL; + // Since we can have multiple quotes that form a single command + // in a command like: "Hello "world'!' (which will make a single + // argument "Hello world!") we remember the first quote character + // we encounter and use that for the quote character. + char first_quote_char = '\0'; + char quote_char = '\0'; + bool arg_complete = false; - switch (*arg_start) + do { - case '\'': - case '"': - case '`': + arg_end = ::strcspn (arg_pos, k_space_separators_with_slash_and_quotes) + arg_pos; + + switch (arg_end[0]) { - // Look for either a quote character, or the backslash - // character - const char quote_char = *arg_start; - char find_chars[3] = { quote_char, '\\' , '\0'}; - bool is_backtick = (quote_char == '`'); - if (quote_char == '"' || quote_char == '`') - m_args_quote_char.push_back(quote_char); - else - m_args_quote_char.push_back('\0'); + default: + assert (!"Unhandled case statement, we must handle this..."); + break; - while (*arg_start != '\0') + case '\0': + // End of C string + if (arg_piece_start && arg_piece_start[0]) + arg.append (arg_piece_start); + arg_complete = true; + break; + + case '\\': + // Backslash character + switch (arg_end[1]) { - arg_end = ::strcspn (arg_start + 1, find_chars) + arg_start + 1; - - if (*arg_end == '\0') - { - arg.append (arg_start); + case '\0': + arg.append (arg_piece_start); + arg_complete = true; break; - } - // Watch out for quote characters prefixed with '\' - if (*arg_end == '\\') + default: + arg_pos = arg_end + 2; + break; + } + break; + + case '"': + case '\'': + case '`': + // Quote characters + if (quote_char) + { + // We found a quote character while inside a quoted + // character argument. If it matches our current quote + // character, this ends the effect of the quotes. If it + // doesn't we ignore it. + if (quote_char == arg_end[0]) { - if (arg_end[1] == quote_char) + arg.append (arg_piece_start, arg_end - arg_piece_start); + // Clear the quote character and let parsing + // continue (we need to watch for things like: + // "Hello ""World" + // "Hello "World + // "Hello "'World' + // All of which will result in a single argument "Hello World" + quote_char = '\0'; // Note that we are no longer inside quotes + arg_pos = arg_end + 1; // Skip the quote character + arg_piece_start = arg_pos; // Note we are starting from later in the string + } + else + { + // different quote, skip it and keep going + arg_pos = arg_end + 1; + } + } + else + { + // We found the start of a quote scope. + // Make sure there isn't a string that predeces + // the start of a quote scope like: + // Hello" World" + // If so, then add the "Hello" to the arg + if (arg_end > arg_piece_start) + arg.append (arg_piece_start, arg_end - arg_piece_start); + + // Enter into a quote scope + quote_char = arg_end[0]; + + if (first_quote_char == '\0') + first_quote_char = quote_char; + + arg_pos = arg_end; + + if (quote_char != '`') + ++arg_pos; // Skip the quote character if it is not a backtick + + arg_piece_start = arg_pos; // Note we are starting from later in the string + + // Skip till the next quote character + const char *end_quote = ::strchr (arg_piece_start, quote_char); + while (end_quote && end_quote[-1] == '\\') + { + // Don't skip the quote character if it is + // preceded by a '\' character + end_quote = ::strchr (end_quote + 1, quote_char); + } + + if (end_quote) + { + if (end_quote > arg_piece_start) { - // The character following the '\' is our quote - // character so strip the backslash character - arg.append (arg_start, arg_end); + // Keep the backtick quote on commands + if (quote_char == '`') + arg.append (arg_piece_start, end_quote + 1 - arg_piece_start); + else + arg.append (arg_piece_start, end_quote - arg_piece_start); + } + + // If the next character is a space or the end of + // string, this argument is complete... + if (end_quote[1] == ' ' || end_quote[1] == '\t' || end_quote[1] == '\0') + { + arg_complete = true; + arg_end = end_quote + 1; } else { - // The character following the '\' is NOT our - // quote character, so include the backslash - // and continue - arg.append (arg_start, arg_end + 1); + arg_pos = end_quote + 1; + arg_piece_start = arg_pos; } - arg_start = arg_end + 1; - continue; - } - else - { - arg.append (arg_start, arg_end + 1); - next_arg_start = arg_end + 1; - break; + quote_char = '\0'; } } + break; - // Skip single and double quotes, but leave backtick quotes - if (!is_backtick) + case ' ': + case '\t': + if (quote_char) { - char first_c = arg[0]; - arg.erase(0,1); - // Only erase the last character if it is the same as the first. - // Otherwise, we're parsing an incomplete command line, and we - // would be stripping off the last character of that string. - if (arg[arg.size() - 1] == first_c) - arg.erase(arg.size() - 1, 1); + // We are currently processing a quoted character and found + // a space character, skip any spaces and keep trying to find + // the end of the argument. + arg_pos = ::strspn (arg_end, k_space_separators) + arg_end; } - } - break; - default: - { - m_args_quote_char.push_back('\0'); - // Look for the next non-escaped space character - while (*arg_start != '\0') + else { - arg_end = ::strcspn (arg_start, k_space_characters_with_slash) + arg_start; - - if (arg_end == NULL) - { - arg.append(arg_start); - break; - } - - if (*arg_end == '\\') - { - // Append up to the '\' char - arg.append (arg_start, arg_end); - - if (arg_end[1] == '\0') - break; - - // Append the character following the '\' if it isn't - // the end of the string - arg.append (1, arg_end[1]); - arg_start = arg_end + 2; - continue; - } - else - { - arg.append (arg_start, arg_end); - next_arg_start = arg_end; - break; - } + // We are not inside any quotes, we just found a space after an + // argument + if (arg_end > arg_piece_start) + arg.append (arg_piece_start, arg_end - arg_piece_start); + arg_complete = true; } + break; } - break; - } + } while (!arg_complete); m_args.push_back(arg); + m_args_quote_char.push_back (first_quote_char); } + UpdateArgvFromArgs(); } - UpdateArgvFromArgs(); + Dump (&s); } void @@ -309,6 +366,9 @@ Args::UpdateArgvFromArgs() for (pos = m_args.begin(); pos != end; ++pos) m_argv.push_back(pos->c_str()); m_argv.push_back(NULL); + // Make sure we have enough arg quote chars in the array + if (m_args_quote_char.size() < m_args.size()) + m_args_quote_char.resize (m_argv.size()); } size_t @@ -359,7 +419,8 @@ Args::Shift () { m_argv.erase(m_argv.begin()); m_args.pop_front(); - m_args_quote_char.erase(m_args_quote_char.begin()); + if (!m_args_quote_char.empty()) + m_args_quote_char.erase(m_args_quote_char.begin()); } } @@ -399,8 +460,13 @@ Args::InsertArgumentAtIndex (size_t idx, const char *arg_cstr, char quote_char) pos = m_args.insert(pos, arg_cstr); - - m_args_quote_char.insert(m_args_quote_char.begin() + idx, quote_char); + if (idx >= m_args_quote_char.size()) + { + m_args_quote_char.resize(idx + 1); + m_args_quote_char[idx] = quote_char; + } + else + m_args_quote_char.insert(m_args_quote_char.begin() + idx, quote_char); UpdateArgvFromArgs(); return GetArgumentAtIndex(idx); @@ -422,6 +488,8 @@ Args::ReplaceArgumentAtIndex (size_t idx, const char *arg_cstr, char quote_char) pos->assign(arg_cstr); assert(idx < m_argv.size() - 1); m_argv[idx] = pos->c_str(); + if (idx >= m_args_quote_char.size()) + m_args_quote_char.resize(idx + 1); m_args_quote_char[idx] = quote_char; return GetArgumentAtIndex(idx); } @@ -444,7 +512,8 @@ Args::DeleteArgumentAtIndex (size_t idx) m_args.erase (pos); assert(idx < m_argv.size() - 1); m_argv.erase(m_argv.begin() + idx); - m_args_quote_char.erase(m_args_quote_char.begin() + idx); + if (idx < m_args_quote_char.size()) + m_args_quote_char.erase(m_args_quote_char.begin() + idx); } } @@ -462,7 +531,7 @@ Args::SetArguments (int argc, const char **argv) for (i=0; i