Skip to content

Commit def4cf1

Browse files
committed
[Windows] Fix build issues using Clang-CL on Windows, add CI
1 parent 00d86aa commit def4cf1

File tree

7 files changed

+63
-10
lines changed

7 files changed

+63
-10
lines changed

.github/workflows/pull.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,44 @@ jobs:
3434
3535
# Run tests
3636
pytest
37+
38+
windows:
39+
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
40+
with:
41+
runner: windows.4xlarge
42+
submodules: 'recursive'
43+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
44+
script: |
45+
conda init powershell
46+
powershell -Command "& {
47+
Set-PSDebug -Trace 1
48+
\$ErrorActionPreference = 'Stop'
49+
\$PSNativeCommandUseErrorActionPreference = \$true
50+
51+
cmake -DCMAKE_BUILD_TYPE=Debug test -Bbuild/test -T ClangCL
52+
cmake --build build/test -j9 --config Debug
53+
if (\$LASTEXITCODE -ne 0) {
54+
Write-Host "Build was not successful. Exit code: \$LASTEXITCODE."
55+
exit \$LASTEXITCODE
56+
}
57+
58+
Push-Location build/test
59+
ctest
60+
if (\$LASTEXITCODE -ne 0) {
61+
Write-Host "Unit tests were not successful. Exit code: \$LASTEXITCODE."
62+
exit \$LASTEXITCODE
63+
}
64+
Pop-Location
65+
66+
# Install tokenizers
67+
pip install . -v
68+
pip install pytest blobfile transformers>=4.53.1
69+
70+
# Run tests
71+
pytest
72+
if (\$LASTEXITCODE -ne 0) {
73+
Write-Host "Python tests were not successful. Exit code: \$LASTEXITCODE."
74+
Start-Sleep -Seconds 600 # Debug - keep alive to give time to SSH
75+
exit \$LASTEXITCODE
76+
}
77+
}"

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,7 @@ pip-out/
3434
*~
3535
.~lock.*
3636
*.idea
37+
38+
*.so
39+
*.dylib
40+
*.pyd

CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ include(CMakePackageConfigHelpers)
2828
include(Utils.cmake)
2929

3030
# Ignore weak attribute warning
31-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
31+
if(NOT MSVC)
32+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
33+
endif()
3234

3335
set(ABSL_ENABLE_INSTALL ON)
3436
set(ABSL_PROPAGATE_CXX_STD ON)

include/pytorch/tokenizers/tiktoken.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@
2323
#include <pytorch/tokenizers/result.h>
2424
#include <pytorch/tokenizers/tokenizer.h>
2525

26+
#ifdef _WIN32
27+
// ssize_t isn't available on Windows. Alias it to the Windows SSIZE_T value.
28+
#include <BaseTsd.h>
29+
typedef SSIZE_T ssize_t;
30+
#endif
31+
2632
namespace tokenizers {
2733

2834
static constexpr int32_t kSpecialTokensSize = 256;

setup.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ class CMakeBuild(build_ext):
3030
def build_extension(self, ext): # noqa C901
3131
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
3232

33-
# Ensure the extension goes into the pytorch_tokenizers package directory
34-
extdir = os.path.join(extdir, "pytorch_tokenizers")
35-
3633
# Required for auto-detection & inclusion of auxiliary "native" libs
3734
if not extdir.endswith(os.path.sep):
3835
extdir += os.path.sep
@@ -55,6 +52,10 @@ def build_extension(self, ext): # noqa C901
5552
]
5653
build_args = ["--target", "pytorch_tokenizers_cpp"]
5754

55+
# Use Clang for Windows builds.
56+
if sys.platform == "win32":
57+
cmake_args += ["-T ClangCL"]
58+
5859
# Adding CMake arguments set as environment variable
5960
# (needed e.g. to build for ARM OSX on conda-forge)
6061
if "CMAKE_ARGS" in os.environ:
@@ -124,15 +125,14 @@ def build_extension(self, ext): # noqa C901
124125
["cmake", "--build", "."] + build_args, cwd=build_temp, check=True
125126
)
126127

127-
128128
setup(
129129
name="pytorch-tokenizers",
130130
version="0.1.0",
131131
long_description=long_description,
132132
long_description_content_type="text/markdown",
133133
url="https://github.com/meta-pytorch/tokenizers",
134134
packages=find_packages(),
135-
ext_modules=[CMakeExtension("pytorch_tokenizers_cpp")],
135+
ext_modules=[CMakeExtension("pytorch_tokenizers.pytorch_tokenizers_cpp")],
136136
cmdclass={"build_ext": CMakeBuild},
137137
zip_safe=False,
138138
python_requires=">=3.10",

src/hf_tokenizer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,14 @@ Error HFTokenizer::load(const std::string& path) {
3434
std::string model_config_json = "";
3535
if (fs::is_directory(path)) {
3636
const fs::path root(path);
37-
model_json = root / "tokenizer.json";
37+
model_json = (root / "tokenizer.json").string();
3838
if (!fs::exists(model_json)) {
3939
TK_LOG(Info, "no tokenizer.json found in %s", path.c_str());
4040
return Error::LoadFailure;
4141
}
4242
const auto model_config_json_path = root / "tokenizer_config.json";
4343
if (fs::exists(model_config_json_path)) {
44-
model_config_json = model_config_json_path;
44+
model_config_json = model_config_json_path.string();
4545
}
4646
}
4747

test/test_tiktoken.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ TEST_F(TiktokenTest, TokenizerDecodeOutOfRangeFails) {
117117
TEST_F(TiktokenTest, ConstructionWithInvalidBOSIndex) {
118118
// gtest death test doesn't work on iOS:
119119
// https://github.com/google/googletest/issues/2834
120-
#if !GTEST_OS_IOS
120+
#if !GTEST_OS_IOS && !GTEST_OS_WINDOWS
121121
EXPECT_EXIT(
122122
std::make_unique<Tiktoken>(
123123
std::make_unique<std::vector<std::string>>(
@@ -132,7 +132,7 @@ TEST_F(TiktokenTest, ConstructionWithInvalidBOSIndex) {
132132
TEST_F(TiktokenTest, ConstructionWithInvalidEOSIndex) {
133133
// gtest death test doesn't work on iOS:
134134
// https://github.com/google/googletest/issues/2834
135-
#if !GTEST_OS_IOS
135+
#if !GTEST_OS_IOS && !GTEST_OS_WINDOWS
136136
EXPECT_EXIT(
137137
std::make_unique<Tiktoken>(
138138
std::make_unique<std::vector<std::string>>(

0 commit comments

Comments
 (0)