diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 6b3fc9485ec1a..bf04ae2e83fb3 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -386,6 +386,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.wchar.wmemchr libc.src.wchar.wcpcpy libc.src.wchar.wcpncpy + libc.src.wchar.wcstok # sys/uio.h entrypoints libc.src.sys.uio.writev diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 397296894829d..15025f42c0723 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -189,6 +189,14 @@ functions: arguments: - type: wchar_t *__restrict - type: const wchar_t *__restrict + - name: wcstok + standards: + - stdc + return_type: wchar_t * + arguments: + - type: wchar_t *__restrict + - type: const wchar_t *__restrict + - type: wchar_t** __restrict - name: wcpcpy standards: - stdc diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 16664100d42c7..6d93b82b2d2bf 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -34,6 +34,16 @@ add_entrypoint_object( libc.src.__support.wctype_utils ) +add_entrypoint_object( + wcstok + SRCS + wcstok.cpp + HDRS + wcstok.h + DEPENDS + libc.hdr.types.wchar_t +) + add_entrypoint_object( wcrtomb SRCS diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp new file mode 100644 index 0000000000000..291efc15e158a --- /dev/null +++ b/libc/src/wchar/wcstok.cpp @@ -0,0 +1,50 @@ +//===-- Implementation of wcstok ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcstok.h" + +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE_DECL { + +bool isADelimeter(wchar_t wc, const wchar_t *delimiters) { + for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr) + if (wc == *delim_ptr) + return true; + return false; +} + +LLVM_LIBC_FUNCTION(wchar_t *, wcstok, + (wchar_t *__restrict str, const wchar_t *__restrict delim, + wchar_t **__restrict context)) { + if (str == nullptr) { + if (*context == nullptr) + return nullptr; + + str = *context; + } + + wchar_t *tok_start, *tok_end; + for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim); + ++tok_start) + ; + + for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim); + ++tok_end) + ; + + if (*tok_end != L'\0') { + *tok_end = L'\0'; + ++tok_end; + } + *context = tok_end; + return *tok_start == L'\0' ? nullptr : tok_start; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcstok.h b/libc/src/wchar/wcstok.h new file mode 100644 index 0000000000000..5e673ff4e89b9 --- /dev/null +++ b/libc/src/wchar/wcstok.h @@ -0,0 +1,22 @@ +//===-- Implementation header for wcstok ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOK_H +#define LLVM_LIBC_SRC_WCHAR_WCSTOK_H + +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim, + wchar_t **__restrict context); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSTOK_H diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index bf16fdd7f8c4d..8967cc1e8d353 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -111,6 +111,16 @@ add_libc_test( libc.src.wchar.wcschr ) +add_libc_test( + wcstok_test + SUITE + libc_wchar_unittests + SRCS + wcstok_test.cpp + DEPENDS + libc.src.wchar.wcstok +) + add_libc_test( wcsncmp_test SUITE diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp new file mode 100644 index 0000000000000..7106e9f2fab5e --- /dev/null +++ b/libc/test/src/wchar/wcstok_test.cpp @@ -0,0 +1,181 @@ +//===-- Unittests for wcstok ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/wchar/wcstok.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) { + { // Empty source and delimiter string. + wchar_t empty[] = L""; + wchar_t *reserve = nullptr; + ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr); + } + { // Empty source and single character delimiter string. + wchar_t empty[] = L""; + wchar_t *reserve = nullptr; + ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr); + } + { // Same character source and delimiter string. + wchar_t single[] = L"_"; + wchar_t *reserve = nullptr; + ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr); + } + { // Multiple character source and single character delimiter string. + wchar_t multiple[] = L"1,2"; + wchar_t *reserve = nullptr; + wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L','); + ASSERT_TRUE(tok[2] == L'2'); + ASSERT_TRUE(tok[3] == L'\0'); + // Another call to ensure that 'reserve' is not in a bad state. + tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L','); + ASSERT_TRUE(tok[2] == L'2'); + ASSERT_TRUE(tok[3] == L'\0'); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr); + } +} + +TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) { + wchar_t src[] = L".123"; + wchar_t *reserve = nullptr; + wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'3'); + ASSERT_TRUE(tok[3] == L'\0'); + // Another call to ensure that 'reserve' is not in a bad state. + tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'3'); + ASSERT_TRUE(tok[3] == L'\0'); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L".", &reserve), nullptr); +} + +TEST(LlvmLibcWCSTokReentrantTest, DelimiterIsMiddleCharacter) { + wchar_t src[] = L"12,34"; + wchar_t *reserve = nullptr; + wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'\0'); + // Another call to ensure that 'reserve' is not in a bad state. + tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'\0'); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr); +} + +TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) { + wchar_t src[] = L"1234:"; + wchar_t *reserve = nullptr; + wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'3'); + ASSERT_TRUE(tok[3] == L'4'); + ASSERT_TRUE(tok[4] == L'\0'); + // Another call to ensure that 'reserve' is not in a bad state. + tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'3'); + ASSERT_TRUE(tok[3] == L'4'); + ASSERT_TRUE(tok[4] == L'\0'); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr); +} + +TEST(LlvmLibcWCSTokReentrantTest, ShouldNotGoPastNullTerminator) { + wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'}; + wchar_t *reserve = nullptr; + wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'\0'); + // Another call to ensure that 'reserve' is not in a bad state. + tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'\0'); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr); +} + +TEST(LlvmLibcWCSTokReentrantTest, + ShouldReturnNullptrWhenBothSrcAndSaveptrAreNull) { + wchar_t *src = nullptr; + wchar_t *reserve = nullptr; + // Ensure that instead of crashing if src and reserve are null, nullptr is + // returned + ASSERT_EQ(LIBC_NAMESPACE::wcstok(src, L",", &reserve), nullptr); + // And that neither src nor reserve are changed when that happens + ASSERT_EQ(src, nullptr); + ASSERT_EQ(reserve, nullptr); +} + +TEST(LlvmLibcWCSTokReentrantTest, + SubsequentCallsShouldFindFollowingDelimiters) { + wchar_t src[] = L"12,34.56"; + wchar_t *reserve = nullptr; + wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &reserve); + ASSERT_TRUE(token[0] == L'1'); + ASSERT_TRUE(token[1] == L'2'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve); + ASSERT_TRUE(token[0] == L'3'); + ASSERT_TRUE(token[1] == L'4'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve); + ASSERT_TRUE(token[0] == L'5'); + ASSERT_TRUE(token[1] == L'6'); + ASSERT_TRUE(token[2] == L'\0'); + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve); + ASSERT_EQ(token, nullptr); + // Subsequent calls after hitting the end of the string should also return + // nullptr. + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve); + ASSERT_EQ(token, nullptr); +} + +TEST(LlvmLibcWCSTokReentrantTest, DelimitersShouldNotBeIncludedInToken) { + wchar_t src[] = L"__ab__:_cd__:__ef__:__"; + wchar_t *reserve = nullptr; + wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &reserve); + ASSERT_TRUE(token[0] == L'a'); + ASSERT_TRUE(token[1] == L'b'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &reserve); + ASSERT_TRUE(token[0] == L'c'); + ASSERT_TRUE(token[1] == L'd'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &reserve); + ASSERT_TRUE(token[0] == L'e'); + ASSERT_TRUE(token[1] == L'f'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve); + ASSERT_EQ(token, nullptr); +}