Make spread_purelib_into_root behave like the wheel was installed by (#581)
pip.
Merge purelib dir into toplevel even if purelib and toplevel have
subdirs with the same name. See tensorflow-io for an example of
a package which was not installed correctly by rules_python before this
change.
diff --git a/python/pip_install/extract_wheels/lib/BUILD b/python/pip_install/extract_wheels/lib/BUILD
index 756fc80..41fd3a6 100644
--- a/python/pip_install/extract_wheels/lib/BUILD
+++ b/python/pip_install/extract_wheels/lib/BUILD
@@ -94,6 +94,17 @@
],
)
+py_test(
+ name = "purelib_test",
+ size = "small",
+ srcs = [
+ "purelib_test.py",
+ ],
+ deps = [
+ ":lib",
+ ],
+)
+
filegroup(
name = "distribution",
srcs = glob(
diff --git a/python/pip_install/extract_wheels/lib/purelib.py b/python/pip_install/extract_wheels/lib/purelib.py
index 40eb25d..aa1ed43 100644
--- a/python/pip_install/extract_wheels/lib/purelib.py
+++ b/python/pip_install/extract_wheels/lib/purelib.py
@@ -1,4 +1,5 @@
"""Functions to make purelibs Bazel compatible"""
+import os
import pathlib
import shutil
@@ -34,6 +35,21 @@
_spread_purelib(child, wheel_dir)
+def backport_copytree(src: pathlib.Path, dst: pathlib.Path):
+ """Implementation similar to shutil.copytree.
+
+ shutil.copytree before python3.8 does not allow merging one tree with
+ an existing one. This function does that, while ignoring complications around symlinks, which
+ can't exist is wheels (See https://bugs.python.org/issue27318).
+ """
+ os.makedirs(dst, exist_ok=True)
+ for path in src.iterdir():
+ if path.is_dir():
+ backport_copytree(path, pathlib.Path(dst, path.name))
+ elif not pathlib.Path(dst, path.name).exists():
+ shutil.copy(path, dst)
+
+
def _spread_purelib(purelib_dir: pathlib.Path, root_dir: str) -> None:
"""Recursively moves all sibling directories of the purelib to the root.
@@ -41,13 +57,11 @@
purelib_dir: The directory of the purelib.
root_dir: The directory to move files into.
"""
- for grandchild in purelib_dir.iterdir():
- # Some purelib Wheels, like Tensorflow 2.0.0, have directories
- # split between the root and the purelib directory. In this case
- # we should leave the purelib 'sibling' alone.
- # See: https://github.com/dillon-giacoppo/rules_python_external/issues/8
- if not pathlib.Path(root_dir, grandchild.name).exists():
- shutil.move(
+ for child in purelib_dir.iterdir():
+ if child.is_dir():
+ backport_copytree(src=child, dst=pathlib.Path(root_dir, child.name))
+ elif not pathlib.Path(root_dir, grandchild.name).exists():
+ shutil.copy(
src=str(grandchild),
dst=root_dir,
)
diff --git a/python/pip_install/extract_wheels/lib/purelib_test.py b/python/pip_install/extract_wheels/lib/purelib_test.py
new file mode 100644
index 0000000..02fd922
--- /dev/null
+++ b/python/pip_install/extract_wheels/lib/purelib_test.py
@@ -0,0 +1,40 @@
+import os
+import unittest
+from contextlib import contextmanager
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+from python.pip_install.extract_wheels.lib import purelib
+
+
+class TestPurelibTestCase(unittest.TestCase):
+ @contextmanager
+ def setup_faux_unzipped_wheel(self):
+ files = [
+ ("faux_wheel.data/purelib/toplevel/foo.py", "# foo"),
+ ("faux_wheel.data/purelib/toplevel/dont_overwrite.py", "overwritten"),
+ ("faux_wheel.data/purelib/toplevel/subdir/baz.py", "overwritten"),
+ ("toplevel/bar.py", "# bar"),
+ ("toplevel/dont_overwrite.py", "original"),
+ ]
+ with TemporaryDirectory() as td:
+ self.td_path = Path(td)
+ self.purelib_path = self.td_path / Path("faux_wheel.data/purelib")
+ for file_, content in files:
+ path = self.td_path / Path(file_)
+ path.parent.mkdir(parents=True, exist_ok=True)
+ with open(str(path), "w") as f:
+ f.write(content)
+ yield
+
+ def test_spread_purelib_(self):
+ with self.setup_faux_unzipped_wheel():
+ purelib._spread_purelib(self.purelib_path, self.td_path)
+ self.assertTrue(Path(self.td_path, "toplevel/foo.py").exists())
+ self.assertTrue(Path(self.td_path, "toplevel/subdir/baz.py").exists())
+ with open(Path(self.td_path, "toplevel/dont_overwrite.py")) as original:
+ self.assertEqual(original.read().strip(), "original")
+
+
+if __name__ == "__main__":
+ unittest.main()