Make spread_purelib_into_root behave like the wheel was installed by (#581)

pip.

Merge purelib dir into toplevel even if purelib and toplevel have
subdirs with the same name. See tensorflow-io for an example of
a package which was not installed correctly by rules_python before this
change.
diff --git a/python/pip_install/extract_wheels/lib/BUILD b/python/pip_install/extract_wheels/lib/BUILD
index 756fc80..41fd3a6 100644
--- a/python/pip_install/extract_wheels/lib/BUILD
+++ b/python/pip_install/extract_wheels/lib/BUILD
@@ -94,6 +94,17 @@
     ],
 )
 
+py_test(
+    name = "purelib_test",
+    size = "small",
+    srcs = [
+        "purelib_test.py",
+    ],
+    deps = [
+        ":lib",
+    ],
+)
+
 filegroup(
     name = "distribution",
     srcs = glob(
diff --git a/python/pip_install/extract_wheels/lib/purelib.py b/python/pip_install/extract_wheels/lib/purelib.py
index 40eb25d..aa1ed43 100644
--- a/python/pip_install/extract_wheels/lib/purelib.py
+++ b/python/pip_install/extract_wheels/lib/purelib.py
@@ -1,4 +1,5 @@
 """Functions to make purelibs Bazel compatible"""
+import os
 import pathlib
 import shutil
 
@@ -34,6 +35,21 @@
             _spread_purelib(child, wheel_dir)
 
 
+def backport_copytree(src: pathlib.Path, dst: pathlib.Path):
+    """Implementation similar to shutil.copytree.
+
+    shutil.copytree before python3.8 does not allow merging one tree with
+    an existing one. This function does that, while ignoring complications around symlinks, which
+    can't exist is wheels (See https://bugs.python.org/issue27318).
+    """
+    os.makedirs(dst, exist_ok=True)
+    for path in src.iterdir():
+        if path.is_dir():
+            backport_copytree(path, pathlib.Path(dst, path.name))
+        elif not pathlib.Path(dst, path.name).exists():
+            shutil.copy(path, dst)
+
+
 def _spread_purelib(purelib_dir: pathlib.Path, root_dir: str) -> None:
     """Recursively moves all sibling directories of the purelib to the root.
 
@@ -41,13 +57,11 @@
         purelib_dir: The directory of the purelib.
         root_dir: The directory to move files into.
     """
-    for grandchild in purelib_dir.iterdir():
-        # Some purelib Wheels, like Tensorflow 2.0.0, have directories
-        # split between the root and the purelib directory. In this case
-        # we should leave the purelib 'sibling' alone.
-        # See: https://github.com/dillon-giacoppo/rules_python_external/issues/8
-        if not pathlib.Path(root_dir, grandchild.name).exists():
-            shutil.move(
+    for child in purelib_dir.iterdir():
+        if child.is_dir():
+            backport_copytree(src=child, dst=pathlib.Path(root_dir, child.name))
+        elif not pathlib.Path(root_dir, grandchild.name).exists():
+            shutil.copy(
                 src=str(grandchild),
                 dst=root_dir,
             )
diff --git a/python/pip_install/extract_wheels/lib/purelib_test.py b/python/pip_install/extract_wheels/lib/purelib_test.py
new file mode 100644
index 0000000..02fd922
--- /dev/null
+++ b/python/pip_install/extract_wheels/lib/purelib_test.py
@@ -0,0 +1,40 @@
+import os
+import unittest
+from contextlib import contextmanager
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+from python.pip_install.extract_wheels.lib import purelib
+
+
+class TestPurelibTestCase(unittest.TestCase):
+    @contextmanager
+    def setup_faux_unzipped_wheel(self):
+        files = [
+            ("faux_wheel.data/purelib/toplevel/foo.py", "# foo"),
+            ("faux_wheel.data/purelib/toplevel/dont_overwrite.py", "overwritten"),
+            ("faux_wheel.data/purelib/toplevel/subdir/baz.py", "overwritten"),
+            ("toplevel/bar.py", "# bar"),
+            ("toplevel/dont_overwrite.py", "original"),
+        ]
+        with TemporaryDirectory() as td:
+            self.td_path = Path(td)
+            self.purelib_path = self.td_path / Path("faux_wheel.data/purelib")
+            for file_, content in files:
+                path = self.td_path / Path(file_)
+                path.parent.mkdir(parents=True, exist_ok=True)
+                with open(str(path), "w") as f:
+                    f.write(content)
+            yield
+
+    def test_spread_purelib_(self):
+        with self.setup_faux_unzipped_wheel():
+            purelib._spread_purelib(self.purelib_path, self.td_path)
+            self.assertTrue(Path(self.td_path, "toplevel/foo.py").exists())
+            self.assertTrue(Path(self.td_path, "toplevel/subdir/baz.py").exists())
+            with open(Path(self.td_path, "toplevel/dont_overwrite.py")) as original:
+                self.assertEqual(original.read().strip(), "original")
+
+
+if __name__ == "__main__":
+    unittest.main()