[libc] Add strtok_r implementation.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D85779
This commit is contained in:
parallels 2020-08-13 15:51:16 -04:00
parent 21810b0e14
commit bc45bab7eb
11 changed files with 220 additions and 23 deletions

View File

@ -35,6 +35,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.strspn
libc.src.string.strstr
libc.src.string.strtok
libc.src.string.strtok_r
)
set(TARGET_LIBM_ENTRYPOINTS

View File

@ -236,6 +236,7 @@ def StringAPI : PublicAPI<"string.h"> {
"strspn",
"strstr",
"strtok",
"strtok_r",
"strxfrm",
];

View File

@ -53,6 +53,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.strspn
libc.src.string.strstr
libc.src.string.strtok
libc.src.string.strtok_r
# sys/mman.h entrypoints
libc.src.sys.mman.mmap

View File

@ -11,8 +11,12 @@ def RestrictStructSigactionPtr : RestrictedPtrType<StructSigaction>;
def ConstRestrictStructSigactionPtr : ConstType<RestrictStructSigactionPtr>;
def POSIX : StandardSpec<"POSIX"> {
// TODO: Change naming so that they're consistent with other files.
PtrType CharPtr = PtrType<CharType>;
ConstType ConstCharPtr = ConstType<CharPtr>;
RestrictedPtrType RestrictedCharPtr = RestrictedPtrType<CharType>;
ConstType ConstRestrictedCharPtr = ConstType<RestrictedCharPtr>;
RestrictedPtrType CharRestrictedDoublePtr = RestrictedPtrType<CharPtr>;
NamedType OffTType = NamedType<"off_t">;
NamedType SSizeTType = NamedType<"ssize_t">;
@ -221,6 +225,11 @@ def POSIX : StandardSpec<"POSIX"> {
RetValSpec<SizeTType>,
[ArgSpec<ConstCharPtr>, ArgSpec<SizeTType>]
>,
FunctionSpec<
"strtok_r",
RetValSpec<CharPtr>,
[ArgSpec<ConstRestrictedCharPtr>, ArgSpec<CharRestrictedDoublePtr>]
>,
]
>;

View File

@ -139,7 +139,17 @@ add_entrypoint_object(
HDRS
strtok.h
DEPENDS
libc.utils.CPP.standalone_cpp
.string_utils
)
add_entrypoint_object(
strtok_r
SRCS
strtok_r.cpp
HDRS
strtok_r.h
DEPENDS
.string_utils
)
# Helper to define a function with multiple implementations

View File

@ -28,6 +28,39 @@ static inline size_t complementary_span(const char *src, const char *segment) {
return src - initial;
}
// Given the similarities between strtok and strtok_r, we can implement both
// using a utility function. On the first call, 'src' is scanned for the
// first character not found in 'delimiter_string'. Once found, it scans until
// the first character in the 'delimiter_string' or the null terminator is
// found. We define this span as a token. The end of the token is appended with
// a null terminator, and the token is returned. The point where the last token
// is found is then stored within 'context' for subsequent calls. Subsequent
// calls will use 'context' when a nullptr is passed in for 'src'. Once the null
// terminating character is reached, returns a nullptr.
static inline char *string_token(char *src, const char *delimiter_string,
char **saveptr) {
cpp::Bitset<256> delimiter_set;
for (; *delimiter_string; ++delimiter_string)
delimiter_set.set(*delimiter_string);
src = src ? src : *saveptr;
for (; *src && delimiter_set.test(*src); ++src)
;
if (!*src) {
*saveptr = src;
return nullptr;
}
char *token = src;
for (; *src && !delimiter_set.test(*src); ++src)
;
if (*src) {
*src = '\0';
++src;
}
*saveptr = src;
return token;
}
} // namespace internal
} // namespace __llvm_libc

View File

@ -9,34 +9,16 @@
#include "src/string/strtok.h"
#include "src/__support/common.h"
#include "utils/CPP/Bitset.h"
#include "src/string/string_utils.h"
namespace __llvm_libc {
static char *strtok_str = nullptr;
// TODO: Place restrict qualifier where necessary for this and other function
// arguments.
char *LLVM_LIBC_ENTRYPOINT(strtok)(char *src, const char *delimiter_string) {
cpp::Bitset<256> delimiter_set;
for (; *delimiter_string; ++delimiter_string)
delimiter_set.set(*delimiter_string);
src = src ? src : strtok_str;
for (; *src && delimiter_set.test(*src); ++src)
;
if (!*src) {
strtok_str = src;
return nullptr;
}
char *token = src;
for (; *src && !delimiter_set.test(*src); ++src)
;
strtok_str = src;
if (*strtok_str) {
*strtok_str = '\0';
++strtok_str;
}
return token;
return internal::string_token(src, delimiter_string, &strtok_str);
}
} // namespace __llvm_libc

View File

@ -0,0 +1,21 @@
//===-- Implementation of strtok_r ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/string/strtok_r.h"
#include "src/__support/common.h"
#include "src/string/string_utils.h"
namespace __llvm_libc {
char *LLVM_LIBC_ENTRYPOINT(strtok_r)(char *src, const char *delimiter_string,
char **saveptr) {
return internal::string_token(src, delimiter_string, saveptr);
}
} // namespace __llvm_libc

View File

@ -0,0 +1,18 @@
//===-- Implementation header for strtok_r ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STRING_STRTOK_R_H
#define LLVM_LIBC_SRC_STRING_STRTOK_R_H
namespace __llvm_libc {
char *strtok_r(char *src, const char *delimiter_string, char **saveptr);
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_STRING_STRTOK_R_H

View File

@ -142,6 +142,16 @@ add_libc_unittest(
libc.src.string.strtok
)
add_libc_unittest(
strtok_r_test
SUITE
libc_string_unittests
SRCS
strtok_r_test.cpp
DEPENDS
libc.src.string.strtok_r
)
# Tests all implementations that can run on the host.
function(add_libc_multi_impl_test name)
get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations)

View File

@ -0,0 +1,111 @@
//===-- Unittests for strtok_r -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/string/strtok_r.h"
#include "utils/UnitTest/Test.h"
TEST(StrTokReentrantTest, NoTokenFound) {
{ // Empty source and delimiter string.
char empty[] = "";
char *reserve = nullptr;
ASSERT_STREQ(__llvm_libc::strtok_r(empty, "", &reserve), nullptr);
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_STREQ(__llvm_libc::strtok_r(empty, "", &reserve), nullptr);
ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "", &reserve), nullptr);
}
{ // Empty source and single character delimiter string.
char empty[] = "";
char *reserve = nullptr;
ASSERT_STREQ(__llvm_libc::strtok_r(empty, "_", &reserve), nullptr);
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_STREQ(__llvm_libc::strtok_r(empty, "_", &reserve), nullptr);
ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "_", &reserve), nullptr);
}
{ // Same character source and delimiter string.
char single[] = "_";
char *reserve = nullptr;
ASSERT_STREQ(__llvm_libc::strtok_r(single, "_", &reserve), nullptr);
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_STREQ(__llvm_libc::strtok_r(single, "_", &reserve), nullptr);
ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "_", &reserve), nullptr);
}
{ // Multiple character source and single character delimiter string.
char multiple[] = "1,2";
char *reserve = nullptr;
ASSERT_STREQ(__llvm_libc::strtok_r(multiple, ":", &reserve), "1,2");
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_STREQ(__llvm_libc::strtok_r(multiple, ":", &reserve), "1,2");
ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ":", &reserve), nullptr);
}
}
TEST(StrTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) {
char src[] = ".123";
char *reserve = nullptr;
ASSERT_STREQ(__llvm_libc::strtok_r(src, ".", &reserve), "123");
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_STREQ(__llvm_libc::strtok_r(src, ".", &reserve), "123");
ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ".", &reserve), nullptr);
}
TEST(StrTokReentrantTest, DelimiterIsMiddleCharacter) {
char src[] = "12,34";
char *reserve = nullptr;
ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ",", &reserve), nullptr);
}
TEST(StrTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) {
char src[] = "1234:";
char *reserve = nullptr;
ASSERT_STREQ(__llvm_libc::strtok_r(src, ":", &reserve), "1234");
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_STREQ(__llvm_libc::strtok_r(src, ":", &reserve), "1234");
ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ":", &reserve), nullptr);
}
TEST(StrTokReentrantTest, ShouldNotGoPastNullTerminator) {
char src[] = {'1', '2', '\0', ',', '3'};
char *reserve = nullptr;
ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ",", &reserve), nullptr);
}
TEST(StrTokReentrantTest, SubsequentCallsShouldFindFollowingDelimiters) {
char src[] = "12,34.56";
char *reserve = nullptr;
char *token = __llvm_libc::strtok_r(src, ",.", &reserve);
ASSERT_STREQ(token, "12");
token = __llvm_libc::strtok_r(nullptr, ",.", &reserve);
ASSERT_STREQ(token, "34");
token = __llvm_libc::strtok_r(nullptr, ",.", &reserve);
ASSERT_STREQ(token, "56");
token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve);
ASSERT_STREQ(token, nullptr);
// Subsequent calls after hitting the end of the string should also return
// nullptr.
token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve);
ASSERT_STREQ(token, nullptr);
}
TEST(StrTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
char src[] = "__ab__:_cd__:__ef__:__";
char *reserve = nullptr;
char *token = __llvm_libc::strtok_r(src, "_:", &reserve);
ASSERT_STREQ(token, "ab");
token = __llvm_libc::strtok_r(nullptr, ":_", &reserve);
ASSERT_STREQ(token, "cd");
token = __llvm_libc::strtok_r(nullptr, "_:,", &reserve);
ASSERT_STREQ(token, "ef");
token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve);
ASSERT_STREQ(token, nullptr);
}