-
Notifications
You must be signed in to change notification settings - Fork 359
Expand file tree
/
Copy pathhatch_build.py
More file actions
99 lines (80 loc) · 3.47 KB
/
hatch_build.py
File metadata and controls
99 lines (80 loc) · 3.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""Hatchling build hook for compiling C++ extensions."""
import subprocess
import sys
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
class CustomBuildHook(BuildHookInterface):
"""Custom build hook to compile C++ extensions."""
def initialize(self, version, build_data):
"""Compile C++ extensions before building the wheel."""
if self.target_name not in ("wheel", "sdist"):
return
# Only compile for wheel builds; skip on macOS (C++/OpenMP toolchain issues)
if self.target_name == "wheel" and sys.platform != "darwin":
self._build_extensions()
def _build_extensions(self):
"""Build C++ extensions using setuptools."""
import pybind11
from Cython.Build import cythonize
from setuptools import Extension
from setuptools.command.build_ext import build_ext
class BuildExt(build_ext):
"""Custom build_ext to add compiler-specific options."""
def build_extensions(self):
ct = self.compiler.compiler_type
opts = []
if ct == "unix":
opts.append("-std=c++11")
opts.append("-fvisibility=hidden")
# On macOS, ensure clang finds C++ stdlib (fixes 'cstddef' not found)
if sys.platform == "darwin":
try:
sdk = subprocess.run(
["xcrun", "--show-sdk-path"],
capture_output=True,
text=True,
check=True,
)
if sdk.stdout.strip():
opts.append("-isysroot")
opts.append(sdk.stdout.strip())
except (subprocess.CalledProcessError, FileNotFoundError):
pass
elif ct == "msvc":
opts.append("/std:c++11")
for ext in self.extensions:
if ext.language == "c++":
ext.extra_compile_args = opts
build_ext.build_extensions(self)
# Define extensions
ext_modules = [
Extension(
"data_juicer.ops.deduplicator.minhash",
sources=["data_juicer/ops/deduplicator/minhash.cpp"],
include_dirs=[pybind11.get_include()],
extra_compile_args=["-fopenmp", "-O3"],
extra_link_args=["-fopenmp"],
language="c++",
),
Extension(
"data_juicer.ops.deduplicator.tokenize",
sources=["data_juicer/ops/deduplicator/tokenize.pyx"],
extra_compile_args=["-O3"],
),
]
# Cythonize
ext_modules = cythonize(
ext_modules,
compiler_directives={
"language_level": "3",
"embedsignature": True,
},
)
# Build extensions
from setuptools.dist import Distribution
dist = Distribution(attrs={"ext_modules": ext_modules, "cmdclass": {"build_ext": BuildExt}})
build_ext_cmd = dist.get_command_obj("build_ext")
build_ext_cmd.ensure_finalized()
# Set build directory to the package directory
build_ext_cmd.inplace = 1
build_ext_cmd.run()
print("C++ extensions built successfully!")