Use `installer` to remove custom unzip and spread code (#715)

diff --git a/examples/pip_install/pip_install_test.py b/examples/pip_install/pip_install_test.py
index f9a62ca..eb4d7d8 100644
--- a/examples/pip_install/pip_install_test.py
+++ b/examples/pip_install/pip_install_test.py
@@ -37,12 +37,11 @@
         self.assertListEqual(
             env.split(" "),
             [
-                "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/INSTALL.md",
-                "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/LICENSE",
-                "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/NEWS",
-                "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/README.md",
-                "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/data/share/man/man1/s3cmd.1",
-                "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/scripts/s3cmd",
+                "external/pip/pypi__s3cmd/data/share/doc/packages/s3cmd/INSTALL.md",
+                "external/pip/pypi__s3cmd/data/share/doc/packages/s3cmd/LICENSE",
+                "external/pip/pypi__s3cmd/data/share/doc/packages/s3cmd/NEWS",
+                "external/pip/pypi__s3cmd/data/share/doc/packages/s3cmd/README.md",
+                "external/pip/pypi__s3cmd/data/share/man/man1/s3cmd.1",
             ],
         )
 
@@ -52,12 +51,13 @@
         self.assertListEqual(
             env.split(" "),
             [
-                "external/pip/pypi__boto3/boto3-1.14.51.dist-info/DESCRIPTION.rst",
-                "external/pip/pypi__boto3/boto3-1.14.51.dist-info/METADATA",
-                "external/pip/pypi__boto3/boto3-1.14.51.dist-info/RECORD",
-                "external/pip/pypi__boto3/boto3-1.14.51.dist-info/WHEEL",
-                "external/pip/pypi__boto3/boto3-1.14.51.dist-info/metadata.json",
-                "external/pip/pypi__boto3/boto3-1.14.51.dist-info/top_level.txt",
+                "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/DESCRIPTION.rst",
+                'external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/INSTALLER',
+                "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/METADATA",
+                "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/RECORD",
+                "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/WHEEL",
+                "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/metadata.json",
+                "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/top_level.txt",
             ],
         )
 
diff --git a/examples/pip_parse/pip_parse_test.py b/examples/pip_parse/pip_parse_test.py
index ef684c4..030e38c 100644
--- a/examples/pip_parse/pip_parse_test.py
+++ b/examples/pip_parse/pip_parse_test.py
@@ -35,12 +35,11 @@
         self.assertListEqual(
             env.split(" "),
             [
-                "external/pypi_s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/INSTALL.md",
-                "external/pypi_s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/LICENSE",
-                "external/pypi_s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/NEWS",
-                "external/pypi_s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/README.md",
-                "external/pypi_s3cmd/s3cmd-2.1.0.data/data/share/man/man1/s3cmd.1",
-                "external/pypi_s3cmd/s3cmd-2.1.0.data/scripts/s3cmd",
+                "external/pypi_s3cmd/data/share/doc/packages/s3cmd/INSTALL.md",
+                "external/pypi_s3cmd/data/share/doc/packages/s3cmd/LICENSE",
+                "external/pypi_s3cmd/data/share/doc/packages/s3cmd/NEWS",
+                "external/pypi_s3cmd/data/share/doc/packages/s3cmd/README.md",
+                "external/pypi_s3cmd/data/share/man/man1/s3cmd.1",
             ],
         )
 
@@ -50,11 +49,12 @@
         self.assertListEqual(
             env.split(" "),
             [
-                "external/pypi_requests/requests-2.25.1.dist-info/LICENSE",
-                "external/pypi_requests/requests-2.25.1.dist-info/METADATA",
-                "external/pypi_requests/requests-2.25.1.dist-info/RECORD",
-                "external/pypi_requests/requests-2.25.1.dist-info/WHEEL",
-                "external/pypi_requests/requests-2.25.1.dist-info/top_level.txt",
+                'external/pypi_requests/site-packages/requests-2.25.1.dist-info/INSTALLER',
+                "external/pypi_requests/site-packages/requests-2.25.1.dist-info/LICENSE",
+                "external/pypi_requests/site-packages/requests-2.25.1.dist-info/METADATA",
+                "external/pypi_requests/site-packages/requests-2.25.1.dist-info/RECORD",
+                "external/pypi_requests/site-packages/requests-2.25.1.dist-info/WHEEL",
+                "external/pypi_requests/site-packages/requests-2.25.1.dist-info/top_level.txt",
             ],
         )
 
diff --git a/examples/pip_repository_annotations/WORKSPACE b/examples/pip_repository_annotations/WORKSPACE
index eb712cf..8ee885d 100644
--- a/examples/pip_repository_annotations/WORKSPACE
+++ b/examples/pip_repository_annotations/WORKSPACE
@@ -42,7 +42,7 @@
         copy_executables = {"@pip_repository_annotations_example//:data/copy_executable.py": "copied_content/executable.py"},
         copy_files = {"@pip_repository_annotations_example//:data/copy_file.txt": "copied_content/file.txt"},
         data = [":generated_file"],
-        data_exclude_glob = ["*.dist-info/WHEEL"],
+        data_exclude_glob = ["site-packages/*.dist-info/WHEEL"],
     ),
 }
 
diff --git a/examples/pip_repository_annotations/pip_repository_annotations_test.py b/examples/pip_repository_annotations/pip_repository_annotations_test.py
index 79c354d..468788f 100644
--- a/examples/pip_repository_annotations/pip_repository_annotations_test.py
+++ b/examples/pip_repository_annotations/pip_repository_annotations_test.py
@@ -69,7 +69,7 @@
 
         r = runfiles.Create()
         dist_info_dir = (
-            "pip_repository_annotations_example/external/{}/wheel-{}.dist-info".format(
+            "pip_repository_annotations_example/external/{}/site-packages/wheel-{}.dist-info".format(
                 self.wheel_pkg_dir(),
                 current_wheel_version,
             )
diff --git a/python/pip_install/extract_wheels/lib/BUILD b/python/pip_install/extract_wheels/lib/BUILD
index 4821412..31d6bb8 100644
--- a/python/pip_install/extract_wheels/lib/BUILD
+++ b/python/pip_install/extract_wheels/lib/BUILD
@@ -9,7 +9,6 @@
         "arguments.py",
         "bazel.py",
         "namespace_pkgs.py",
-        "purelib.py",
         "requirements.py",
         "wheel.py",
     ],
@@ -135,17 +134,6 @@
     ],
 )
 
-py_test(
-    name = "purelib_test",
-    size = "small",
-    srcs = [
-        "purelib_test.py",
-    ],
-    deps = [
-        ":lib",
-    ],
-)
-
 filegroup(
     name = "distribution",
     srcs = glob(
diff --git a/python/pip_install/extract_wheels/lib/bazel.py b/python/pip_install/extract_wheels/lib/bazel.py
index aaca68b..da1e52c 100644
--- a/python/pip_install/extract_wheels/lib/bazel.py
+++ b/python/pip_install/extract_wheels/lib/bazel.py
@@ -9,7 +9,6 @@
 from python.pip_install.extract_wheels.lib import (
     annotation,
     namespace_pkgs,
-    purelib,
     wheel,
 )
 
@@ -137,27 +136,18 @@
     there may be no Python sources whatsoever (e.g. packages written in Cython: like `pymssql`).
     """
 
-    dist_info_ignores = [
-        # RECORD is known to contain sha256 checksums of files which might include the checksums
-        # of generated files produced when wheels are installed. The file is ignored to avoid
-        # Bazel caching issues.
-        "**/*.dist-info/RECORD",
-    ]
-
     data_exclude = list(
         set(
             [
-                "*.whl",
-                "**/__pycache__/**",
                 "**/* *",
                 "**/*.py",
                 "**/*.pyc",
-                "BUILD.bazel",
-                "WORKSPACE",
-                f"{WHEEL_ENTRY_POINT_PREFIX}*.py",
+                # RECORD is known to contain sha256 checksums of files which might include the checksums
+                # of generated files produced when wheels are installed. The file is ignored to avoid
+                # Bazel caching issues.
+                "**/*.dist-info/RECORD",
             ]
             + data_exclude
-            + dist_info_ignores
         )
     )
 
@@ -172,12 +162,12 @@
 
         filegroup(
             name = "{dist_info_label}",
-            srcs = glob(["*.dist-info/**"], allow_empty = True),
+            srcs = glob(["site-packages/*.dist-info/**"], allow_empty = True),
         )
 
         filegroup(
             name = "{data_label}",
-            srcs = glob(["*.data/**"], allow_empty = True),
+            srcs = glob(["data/**"], allow_empty = True),
         )
 
         filegroup(
@@ -188,11 +178,11 @@
 
         py_library(
             name = "{name}",
-            srcs = glob(["**/*.py"], exclude={srcs_exclude}, allow_empty = True),
-            data = {data} + glob(["**/*"], exclude={data_exclude}),
+            srcs = glob(["site-packages/**/*.py"], exclude={srcs_exclude}, allow_empty = True),
+            data = {data} + glob(["site-packages/**/*"], exclude={data_exclude}),
             # This makes this directory a top-level in the python import
             # search path for anything that depends on this.
-            imports = ["."],
+            imports = ["site-packages"],
             deps = [{dependencies}],
             tags = [{tags}],
         )
@@ -377,9 +367,6 @@
         shutil.copy(whl.path, directory)
     whl.unzip(directory)
 
-    # Note: Order of operations matters here
-    purelib.spread_purelib_into_root(directory)
-
     if not enable_implicit_namespace_pkgs:
         setup_namespace_pkg_compatibility(directory)
 
diff --git a/python/pip_install/extract_wheels/lib/purelib.py b/python/pip_install/extract_wheels/lib/purelib.py
deleted file mode 100644
index 978e0f1..0000000
--- a/python/pip_install/extract_wheels/lib/purelib.py
+++ /dev/null
@@ -1,67 +0,0 @@
-"""Functions to make purelibs Bazel compatible"""
-import os
-import pathlib
-import shutil
-
-from python.pip_install.extract_wheels.lib import wheel
-
-
-def spread_purelib_into_root(wheel_dir: str) -> None:
-    """Unpacks purelib directories into the root.
-
-    Args:
-         wheel_dir: The root of the extracted wheel directory.
-    """
-    dist_info = wheel.get_dist_info(wheel_dir)
-    wheel_metadata_file_path = pathlib.Path(dist_info, "WHEEL")
-    wheel_metadata_dict = wheel.parse_wheel_meta_file(str(wheel_metadata_file_path))
-
-    # It is not guaranteed that a WHEEL file author populates 'Root-Is-Purelib'.
-    # See: https://github.com/bazelbuild/rules_python/issues/435
-    root_is_purelib: str = wheel_metadata_dict.get("Root-Is-Purelib", "")
-    if root_is_purelib.lower() == "true":
-        # The Python package code is in the root of the Wheel, so no need to 'spread' anything.
-        return
-
-    dot_data_dir = wheel.get_dot_data_directory(wheel_dir)
-    # 'Root-Is-Purelib: false' is no guarantee a .data directory exists with
-    # package code in it. eg. the 'markupsafe' package.
-    if not dot_data_dir:
-        return
-
-    for child in pathlib.Path(dot_data_dir).iterdir():
-        # TODO(Jonathon): Should all other potential folders get ignored? eg. 'platlib'
-        if str(child).endswith("purelib"):
-            _spread_purelib(child, wheel_dir)
-
-
-def backport_copytree(src: pathlib.Path, dst: pathlib.Path):
-    """Implementation similar to shutil.copytree.
-
-    shutil.copytree before python3.8 does not allow merging one tree with
-    an existing one. This function does that, while ignoring complications around symlinks, which
-    can't exist is wheels (See https://bugs.python.org/issue27318).
-    """
-    os.makedirs(dst, exist_ok=True)
-    for path in src.iterdir():
-        if path.is_dir():
-            backport_copytree(path, pathlib.Path(dst, path.name))
-        elif not pathlib.Path(dst, path.name).exists():
-            shutil.copy(path, dst)
-
-
-def _spread_purelib(purelib_dir: pathlib.Path, root_dir: str) -> None:
-    """Recursively moves all sibling directories of the purelib to the root.
-
-    Args:
-        purelib_dir: The directory of the purelib.
-        root_dir: The directory to move files into.
-    """
-    for child in purelib_dir.iterdir():
-        if child.is_dir():
-            backport_copytree(src=child, dst=pathlib.Path(root_dir, child.name))
-        elif not pathlib.Path(root_dir, child.name).exists():
-            shutil.copy(
-                src=str(child),
-                dst=root_dir,
-            )
diff --git a/python/pip_install/extract_wheels/lib/purelib_test.py b/python/pip_install/extract_wheels/lib/purelib_test.py
deleted file mode 100644
index 02fd922..0000000
--- a/python/pip_install/extract_wheels/lib/purelib_test.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import os
-import unittest
-from contextlib import contextmanager
-from pathlib import Path
-from tempfile import TemporaryDirectory
-
-from python.pip_install.extract_wheels.lib import purelib
-
-
-class TestPurelibTestCase(unittest.TestCase):
-    @contextmanager
-    def setup_faux_unzipped_wheel(self):
-        files = [
-            ("faux_wheel.data/purelib/toplevel/foo.py", "# foo"),
-            ("faux_wheel.data/purelib/toplevel/dont_overwrite.py", "overwritten"),
-            ("faux_wheel.data/purelib/toplevel/subdir/baz.py", "overwritten"),
-            ("toplevel/bar.py", "# bar"),
-            ("toplevel/dont_overwrite.py", "original"),
-        ]
-        with TemporaryDirectory() as td:
-            self.td_path = Path(td)
-            self.purelib_path = self.td_path / Path("faux_wheel.data/purelib")
-            for file_, content in files:
-                path = self.td_path / Path(file_)
-                path.parent.mkdir(parents=True, exist_ok=True)
-                with open(str(path), "w") as f:
-                    f.write(content)
-            yield
-
-    def test_spread_purelib_(self):
-        with self.setup_faux_unzipped_wheel():
-            purelib._spread_purelib(self.purelib_path, self.td_path)
-            self.assertTrue(Path(self.td_path, "toplevel/foo.py").exists())
-            self.assertTrue(Path(self.td_path, "toplevel/subdir/baz.py").exists())
-            with open(Path(self.td_path, "toplevel/dont_overwrite.py")) as original:
-                self.assertEqual(original.read().strip(), "original")
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/pip_install/extract_wheels/lib/wheel.py b/python/pip_install/extract_wheels/lib/wheel.py
index 6dab311..3f10100 100644
--- a/python/pip_install/extract_wheels/lib/wheel.py
+++ b/python/pip_install/extract_wheels/lib/wheel.py
@@ -1,9 +1,5 @@
 """Utility class to inspect an extracted wheel directory"""
 import email
-import glob
-import os
-import stat
-import zipfile
 from typing import Dict, Optional, Set, Tuple
 
 import installer
@@ -11,21 +7,6 @@
 from pip._vendor.packaging.utils import canonicalize_name
 
 
-def current_umask() -> int:
-    """Get the current umask which involves having to set it temporarily."""
-    mask = os.umask(0)
-    os.umask(mask)
-    return mask
-
-
-def set_extracted_file_to_default_mode_plus_executable(path: str) -> None:
-    """
-    Make file present at path have execute for user/group/world
-    (chmod +x) is no-op on windows per python docs
-    """
-    os.chmod(path, (0o777 & ~current_umask() | 0o111))
-
-
 class Wheel:
     """Representation of the compressed .whl file"""
 
@@ -90,91 +71,26 @@
         return dependency_set
 
     def unzip(self, directory: str) -> None:
-        with zipfile.ZipFile(self.path, "r") as whl:
-            whl.extractall(directory)
-            # The following logic is borrowed from Pip:
-            # https://github.com/pypa/pip/blob/cc48c07b64f338ac5e347d90f6cb4efc22ed0d0b/src/pip/_internal/utils/unpacking.py#L240
-            for info in whl.infolist():
-                name = info.filename
-                # Do not attempt to modify directories.
-                if name.endswith("/") or name.endswith("\\"):
-                    continue
-                mode = info.external_attr >> 16
-                # if mode and regular file and any execute permissions for
-                # user/group/world?
-                if mode and stat.S_ISREG(mode) and mode & 0o111:
-                    name = os.path.join(directory, name)
-                    set_extracted_file_to_default_mode_plus_executable(name)
-
-
-def get_dist_info(wheel_dir: str) -> str:
-    """ "Returns the relative path to the dist-info directory if it exists.
-
-    Args:
-         wheel_dir: The root of the extracted wheel directory.
-
-    Returns:
-        Relative path to the dist-info directory if it exists, else, None.
-    """
-    dist_info_dirs = glob.glob(os.path.join(wheel_dir, "*.dist-info"))
-    if not dist_info_dirs:
-        raise ValueError(
-            "No *.dist-info directory found. %s is not a valid Wheel." % wheel_dir
+        installation_schemes = {
+            "purelib": "/site-packages",
+            "platlib": "/site-packages",
+            "headers": "/include",
+            "scripts": "/bin",
+            "data": "/data",
+        }
+        destination = installer.destinations.SchemeDictionaryDestination(
+            installation_schemes,
+            # TODO Should entry_point scripts also be handled by installer rather than custom code?
+            interpreter="/dev/null",
+            script_kind="posix",
+            destdir=directory,
         )
 
-    if len(dist_info_dirs) > 1:
-        raise ValueError(
-            "Found more than 1 *.dist-info directory. %s is not a valid Wheel."
-            % wheel_dir
-        )
-
-    return dist_info_dirs[0]
-
-
-def get_dot_data_directory(wheel_dir: str) -> Optional[str]:
-    """Returns the relative path to the data directory if it exists.
-
-    See: https://www.python.org/dev/peps/pep-0491/#the-data-directory
-
-    Args:
-         wheel_dir: The root of the extracted wheel directory.
-
-    Returns:
-        Relative path to the data directory if it exists, else, None.
-    """
-
-    dot_data_dirs = glob.glob(os.path.join(wheel_dir, "*.data"))
-    if not dot_data_dirs:
-        return None
-
-    if len(dot_data_dirs) > 1:
-        raise ValueError(
-            "Found more than 1 *.data directory. %s is not a valid Wheel." % wheel_dir
-        )
-
-    return dot_data_dirs[0]
-
-
-def parse_wheel_meta_file(wheel_dir: str) -> Dict[str, str]:
-    """Parses the given WHEEL file into a dictionary.
-
-    Args:
-         wheel_dir: The file path of the WHEEL metadata file in dist-info.
-
-    Returns:
-        The WHEEL file mapped into a dictionary.
-    """
-    contents = {}
-    with open(wheel_dir, "r") as wheel_file:
-        for line in wheel_file:
-            cleaned = line.strip()
-            if not cleaned:
-                continue
-            try:
-                key, value = cleaned.split(":", maxsplit=1)
-                contents[key] = value.strip()
-            except ValueError:
-                raise RuntimeError(
-                    "Encounted invalid line in WHEEL file: '%s'" % cleaned
-                )
-    return contents
+        with installer.sources.WheelFile.open(self.path) as wheel_source:
+            installer.install(
+                source=wheel_source,
+                destination=destination,
+                additional_metadata={
+                    "INSTALLER": b"https://github.com/bazelbuild/rules_python",
+                },
+            )
diff --git a/python/pip_install/private/srcs.bzl b/python/pip_install/private/srcs.bzl
index 3f20c45..a253b66 100644
--- a/python/pip_install/private/srcs.bzl
+++ b/python/pip_install/private/srcs.bzl
@@ -14,7 +14,6 @@
     "@rules_python//python/pip_install/extract_wheels/lib:arguments.py",
     "@rules_python//python/pip_install/extract_wheels/lib:bazel.py",
     "@rules_python//python/pip_install/extract_wheels/lib:namespace_pkgs.py",
-    "@rules_python//python/pip_install/extract_wheels/lib:purelib.py",
     "@rules_python//python/pip_install/extract_wheels/lib:requirements.py",
     "@rules_python//python/pip_install/extract_wheels/lib:wheel.py",
     "@rules_python//python/pip_install/parse_requirements_to_bzl:__init__.py",