[Support/Path] Make handling of paths like "///" consistent

Summary:
Various path functions were not treating paths consisting of slashes
alone consistently. For example, the iterator-based accessors decomposed the
path "///" into two elements: "/" and ".". This is not too bad, but it
is different from the behavior specified by posix:
```
A pathname that contains ***at least one non-slash character*** and that
ends with one or more trailing slashes shall be resolved as if a single
dot character ( '.' ) were appended to the pathname.
```
More importantly, this was different from how we treated the same path
in the filename+parent_path functions, which decomposed this path into
"." and "". This was completely wrong as it lost the information that
this was an absolute path which referred to the root directory.

This patch fixes this behavior by making sure all functions treat paths
consisting of (back)slashes alone the same way as "/". I.e., the
iterator-based functions will just report one component ("/"), and the
filename+parent_path will decompose them into "/" and "".

A slightly controversial topic here may be the treatment of "//". Posix
says that paths beginning with "//" may have special meaning and indeed
we have code which parses paths like "//net/foo/bar" specially. However,
as we were already not being consistent in parsing the "//" string
alone, and any special parsing for it would complicate the code further,
I chose to treat it the same way as longer sequences of slashes (which
are guaranteed to be the same as "/").

Another slight change of behavior is in the parsing of paths like
"//net//". Previously the last component of this path was ".". However,
as in our parsing the "//net" part in this path was the same as the
"drive" part in "c:\" and the next slash was the "root directory", it
made sense to treat "//net//" the same way as "//net/" (i.e., not to add
the extra "." component at the end).

Reviewers: zturner, rnk, dblaikie, Bigcheese

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D45942

llvm-svn: 331876
This commit is contained in:
Pavel Labath 2018-05-09 13:21:16 +00:00
parent d88caa21e6
commit d20289b31a
2 changed files with 53 additions and 28 deletions

View File

@ -91,10 +91,9 @@ namespace {
return path.substr(0, end);
}
// Returns the first character of the filename in str. For paths ending in
// '/', it returns the position of the '/'.
size_t filename_pos(StringRef str, Style style) {
if (str.size() == 2 && is_separator(str[0], style) && str[0] == str[1])
return 0;
if (str.size() > 0 && is_separator(str[str.size() - 1], style))
return str.size() - 1;
@ -111,6 +110,8 @@ namespace {
return pos + 1;
}
// Returns the position of the root directory in str. If there is no root
// directory in str, it returns StringRef::npos.
size_t root_dir_start(StringRef str, Style style) {
// case "c:/"
if (real_style(style) == Style::windows) {
@ -118,10 +119,6 @@ namespace {
return 2;
}
// case "//"
if (str.size() == 2 && is_separator(str[0], style) && str[0] == str[1])
return StringRef::npos;
// case "//net"
if (str.size() > 3 && is_separator(str[0], style) && str[0] == str[1] &&
!is_separator(str[2], style)) {
@ -135,22 +132,29 @@ namespace {
return StringRef::npos;
}
// Returns the position past the end of the "parent path" of path. The parent
// path will not end in '/', unless the parent is the root directory. If the
// path has no parent, 0 is returned.
size_t parent_path_end(StringRef path, Style style) {
size_t end_pos = filename_pos(path, style);
bool filename_was_sep =
path.size() > 0 && is_separator(path[end_pos], style);
// Skip separators except for root dir.
size_t root_dir_pos = root_dir_start(path.substr(0, end_pos), style);
while (end_pos > 0 && (end_pos - 1) != root_dir_pos &&
// Skip separators until we reach root dir (or the start of the string).
size_t root_dir_pos = root_dir_start(path, style);
while (end_pos > 0 &&
(root_dir_pos == StringRef::npos || end_pos > root_dir_pos) &&
is_separator(path[end_pos - 1], style))
--end_pos;
if (end_pos == 1 && root_dir_pos == 0 && filename_was_sep)
return StringRef::npos;
if (end_pos == root_dir_pos && !filename_was_sep) {
// We've reached the root dir and the input path was *not* ending in a
// sequence of slashes. Include the root dir in the parent path.
return root_dir_pos + 1;
}
// Otherwise, just include before the last slash.
return end_pos;
}
} // end unnamed namespace
@ -282,8 +286,8 @@ const_iterator &const_iterator::operator++() {
++Position;
}
// Treat trailing '/' as a '.'.
if (Position == Path.size()) {
// Treat trailing '/' as a '.', unless it is the root dir.
if (Position == Path.size() && Component != "/") {
--Position;
Component = ".";
return *this;
@ -322,23 +326,23 @@ reverse_iterator rend(StringRef Path) {
}
reverse_iterator &reverse_iterator::operator++() {
// If we're at the end and the previous char was a '/', return '.' unless
// we are the root path.
size_t root_dir_pos = root_dir_start(Path, S);
if (Position == Path.size() && Path.size() > root_dir_pos + 1 &&
is_separator(Path[Position - 1], S)) {
// Skip separators unless it's the root directory.
size_t end_pos = Position;
while (end_pos > 0 && (end_pos - 1) != root_dir_pos &&
is_separator(Path[end_pos - 1], S))
--end_pos;
// Treat trailing '/' as a '.', unless it is the root dir.
if (Position == Path.size() && !Path.empty() &&
is_separator(Path.back(), S) &&
(root_dir_pos == StringRef::npos || end_pos - 1 > root_dir_pos)) {
--Position;
Component = ".";
return *this;
}
// Skip separators unless it's the root directory.
size_t end_pos = Position;
while (end_pos > 0 && (end_pos - 1) != root_dir_pos &&
is_separator(Path[end_pos - 1], S))
--end_pos;
// Find next separator.
size_t start_pos = filename_pos(Path.substr(0, end_pos), S);
Component = Path.slice(start_pos, end_pos);

View File

@ -184,6 +184,12 @@ TEST(Support, FilenameParent) {
EXPECT_EQ("\\", path::filename("c:\\", path::Style::windows));
EXPECT_EQ("c:", path::parent_path("c:\\", path::Style::windows));
EXPECT_EQ("/", path::filename("///"));
EXPECT_EQ("", path::parent_path("///"));
EXPECT_EQ("\\", path::filename("c:\\\\", path::Style::windows));
EXPECT_EQ("c:", path::parent_path("c:\\\\", path::Style::windows));
EXPECT_EQ("bar", path::filename("/foo/bar"));
EXPECT_EQ("/foo", path::parent_path("/foo/bar"));
@ -204,6 +210,19 @@ TEST(Support, FilenameParent) {
EXPECT_EQ("foo", path::filename("//net/foo"));
EXPECT_EQ("//net/", path::parent_path("//net/foo"));
// These checks are just to make sure we do something reasonable with the
// paths below. They are not meant to prescribe the one true interpretation of
// these paths. Other decompositions (e.g. "//" -> "" + "//") are also
// possible.
EXPECT_EQ("/", path::filename("//"));
EXPECT_EQ("", path::parent_path("//"));
EXPECT_EQ("\\", path::filename("\\\\", path::Style::windows));
EXPECT_EQ("", path::parent_path("\\\\", path::Style::windows));
EXPECT_EQ("\\", path::filename("\\\\\\", path::Style::windows));
EXPECT_EQ("", path::parent_path("\\\\\\", path::Style::windows));
}
static std::vector<StringRef>
@ -214,6 +233,8 @@ GetComponents(StringRef Path, path::Style S = path::Style::native) {
TEST(Support, PathIterator) {
EXPECT_THAT(GetComponents("/foo"), testing::ElementsAre("/", "foo"));
EXPECT_THAT(GetComponents("/"), testing::ElementsAre("/"));
EXPECT_THAT(GetComponents("//"), testing::ElementsAre("/"));
EXPECT_THAT(GetComponents("///"), testing::ElementsAre("/"));
EXPECT_THAT(GetComponents("c/d/e/foo.txt"),
testing::ElementsAre("c", "d", "e", "foo.txt"));
EXPECT_THAT(GetComponents(".c/.d/../."),
@ -234,10 +255,8 @@ TEST(Support, AbsolutePathIteratorEnd) {
SmallVector<std::pair<StringRef, path::Style>, 4> Paths;
Paths.emplace_back("/foo/", path::Style::native);
Paths.emplace_back("/foo//", path::Style::native);
Paths.emplace_back("//net//", path::Style::native);
Paths.emplace_back("//net/foo/", path::Style::native);
Paths.emplace_back("c:\\foo\\", path::Style::windows);
Paths.emplace_back("c:\\\\", path::Style::windows);
for (auto &Path : Paths) {
SCOPED_TRACE(Path.first);
@ -249,6 +268,8 @@ TEST(Support, AbsolutePathIteratorEnd) {
RootPaths.emplace_back("/", path::Style::native);
RootPaths.emplace_back("//net/", path::Style::native);
RootPaths.emplace_back("c:\\", path::Style::windows);
RootPaths.emplace_back("//net//", path::Style::native);
RootPaths.emplace_back("c:\\\\", path::Style::windows);
for (auto &Path : RootPaths) {
SCOPED_TRACE(Path.first);