refactor: make modules_mapping a regular rule (#578)

* refactor: make modules_mapping a regular rule

* fix: remove unnecessary comment

Signed-off-by: Thulio Ferraz Assis <3149049+f0rmiga@users.noreply.github.com>

Co-authored-by: Alex Eagle <eagle@post.harvard.edu>
diff --git a/examples/build_file_generation/BUILD b/examples/build_file_generation/BUILD
index ec31255..3e6d44a 100644
--- a/examples/build_file_generation/BUILD
+++ b/examples/build_file_generation/BUILD
@@ -1,8 +1,17 @@
 load("@bazel_gazelle//:def.bzl", "gazelle")
+load("@pip//:requirements.bzl", "all_whl_requirements")
 load("@rules_python//gazelle:def.bzl", "GAZELLE_PYTHON_RUNTIME_DEPS")
 load("@rules_python//gazelle/manifest:defs.bzl", "gazelle_python_manifest")
+load("@rules_python//gazelle/modules_mapping:def.bzl", "modules_mapping")
 load("@rules_python//python:defs.bzl", "py_library")
 
+# This rule fetches the metadata for python packages we depend on. That data is
+# required for the gazelle_python_manifest rule to update our manifest file.
+modules_mapping(
+    name = "modules_map",
+    wheels = all_whl_requirements,
+)
+
 # Gazelle python extension needs a manifest file mapping from
 # an import to the installed package that provides it.
 # This macro produces two targets:
@@ -12,7 +21,7 @@
 #   the manifest doesn't need to be updated
 gazelle_python_manifest(
     name = "gazelle_python_manifest",
-    modules_mapping = "@modules_map//:modules_mapping.json",
+    modules_mapping = ":modules_map",
     pip_deps_repository_name = "pip",
     requirements = "//:requirements_lock.txt",
 )
diff --git a/examples/build_file_generation/WORKSPACE b/examples/build_file_generation/WORKSPACE
index 4255932..c58b50f 100644
--- a/examples/build_file_generation/WORKSPACE
+++ b/examples/build_file_generation/WORKSPACE
@@ -60,14 +60,3 @@
 load("@rules_python//gazelle:deps.bzl", _py_gazelle_deps = "gazelle_deps")
 
 _py_gazelle_deps()
-
-load("@rules_python//gazelle/modules_mapping:def.bzl", "modules_mapping")
-
-# This repository rule fetches the metadata for python packages we
-# depend on. That data is required for the gazelle_python_manifest
-# rule to update our manifest file.
-# To see what this rule does, try `bazel run @modules_map//:print`
-modules_mapping(
-    name = "modules_map",
-    requirements = "//:requirements_lock.txt",
-)
diff --git a/gazelle/README.md b/gazelle/README.md
index 9edf773..2058458 100644
--- a/gazelle/README.md
+++ b/gazelle/README.md
@@ -9,11 +9,8 @@
 First, you'll need to add Gazelle to your `WORKSPACE` file.
 Follow the instructions at https://github.com/bazelbuild/bazel-gazelle#running-gazelle-with-bazel
 
-Next, we need to add two more things to the `WORKSPACE`:
-
-1. fetch the third-party Go libraries that the python extension depends on
-1. fetch metadata about your Python dependencies, so that gazelle can
-   determine which package a given import statement comes from.
+Next, we need to fetch the third-party Go libraries that the python extension
+depends on.
 
 Add this to your `WORKSPACE`:
 
@@ -23,22 +20,12 @@
 load("@rules_python//gazelle:deps.bzl", _py_gazelle_deps = "gazelle_deps")
 
 _py_gazelle_deps()
-
-load("@rules_python//gazelle/modules_mapping:def.bzl", "modules_mapping")
-
-# This repository rule fetches the metadata for python packages we
-# depend on. That data is required for the gazelle_python_manifest
-# rule to update our manifest file.
-# To see what this rule does, try `bazel run @modules_map//:print`
-modules_mapping(
-    name = "modules_map",
-    # This should point to wherever we declare our python dependencies
-    requirements = "//:requirements_lock.txt",
-)
 ```
 
-Next, we'll make a pair of targets for consuming that `modules_mapping` we
-fetched, and writing it as a manifest file for Gazelle to read.
+Next, we'll fetch metadata about your Python dependencies, so that gazelle can
+determine which package a given import statement comes from. This is provided
+by the `modules_mapping` rule. We'll make a target for consuming this
+`modules_mapping`, and writing it as a manifest file for Gazelle to read.
 This is checked into the repo for speed, as it takes some time to calculate
 in a large monorepo.
 
@@ -48,7 +35,16 @@
 Then put this in your `BUILD.bazel` file next to the `requirements.txt`:
 
 ```starlark
+load("@pip//:requirements.bzl", "all_whl_requirements")
 load("@rules_python//gazelle/manifest:defs.bzl", "gazelle_python_manifest")
+load("@rules_python//gazelle/modules_mapping:def.bzl", "modules_mapping")
+
+# This rule fetches the metadata for python packages we depend on. That data is
+# required for the gazelle_python_manifest rule to update our manifest file.
+modules_mapping(
+    name = "modules_map",
+    wheels = all_whl_requirements,
+)
 
 # Gazelle python extension needs a manifest file mapping from
 # an import to the installed package that provides it.
@@ -59,9 +55,7 @@
 #   the manifest doesn't need to be updated
 gazelle_python_manifest(
     name = "gazelle_python_manifest",
-    # The @modules_map refers to the name we gave in the modules_mapping
-    # rule in the WORKSPACE
-    modules_mapping = "@modules_map//:modules_mapping.json",
+    modules_mapping = ":modules_map",
     # This is what we called our `pip_install` rule, where third-party
     # python libraries are loaded in BUILD files.
     pip_deps_repository_name = "pip",
diff --git a/gazelle/modules_mapping/BUILD.bazel b/gazelle/modules_mapping/BUILD.bazel
index 4ce6a00..d1cd42e 100644
--- a/gazelle/modules_mapping/BUILD.bazel
+++ b/gazelle/modules_mapping/BUILD.bazel
@@ -1,4 +1,7 @@
-exports_files([
-    "builder.py",
-    "generator.py",
-])
+load("@rules_python//python:defs.bzl", "py_binary")
+
+py_binary(
+    name = "generator",
+    srcs = ["generator.py"],
+    visibility = ["//visibility:public"],
+)
diff --git a/gazelle/modules_mapping/builder.py b/gazelle/modules_mapping/builder.py
deleted file mode 100644
index 3b471c0..0000000
--- a/gazelle/modules_mapping/builder.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import argparse
-import multiprocessing
-import subprocess
-import sys
-from datetime import datetime
-
-mutex = multiprocessing.Lock()
-
-
-def build(wheel):
-    print("{}: building {}".format(datetime.now(), wheel), file=sys.stderr)
-    process = subprocess.run(
-        [sys.executable, "-m", "build", "--wheel", "--no-isolation"], cwd=wheel
-    )
-    if process.returncode != 0:
-        # If the build without isolation fails, try to build it again with
-        # isolation. We need to protect this following logic in two ways:
-        #   1. Only build one at a time in this process.
-        #   2. Retry a few times to get around flakiness.
-        success = False
-        for _ in range(0, 3):
-            with mutex:
-                process = subprocess.run(
-                    [sys.executable, "-m", "build", "--wheel"],
-                    encoding="utf-8",
-                    cwd=wheel,
-                    capture_output=True,
-                )
-                if process.returncode != 0:
-                    continue
-                success = True
-                break
-        if not success:
-            print("STDOUT:", file=sys.stderr)
-            print(process.stdout, file=sys.stderr)
-            print("STDERR:", file=sys.stderr)
-            print(process.stderr, file=sys.stderr)
-            raise RuntimeError(
-                "{}: ERROR: failed to build {}".format(datetime.now(), wheel)
-            )
-
-
-def main(jobs, wheels):
-    with multiprocessing.Pool(jobs) as pool:
-        results = []
-        for wheel in wheels:
-            result = pool.apply_async(build, args=(wheel,))
-            results.append(result)
-        pool.close()
-        for result in results:
-            result.get()
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Builds Python wheels.")
-    parser.add_argument(
-        "wheels",
-        metavar="wheel",
-        type=str,
-        nargs="+",
-        help="A path to the extracted wheel directory.",
-    )
-    parser.add_argument(
-        "--jobs",
-        type=int,
-        default=8,
-        help="The number of concurrent build jobs to be executed.",
-    )
-    args = parser.parse_args()
-    exit(main(args.jobs, args.wheels))
diff --git a/gazelle/modules_mapping/def.bzl b/gazelle/modules_mapping/def.bzl
index e01ebd3..e90d454 100644
--- a/gazelle/modules_mapping/def.bzl
+++ b/gazelle/modules_mapping/def.bzl
@@ -7,324 +7,39 @@
 This mapping is necessary when reading Python import statements and determining
 if they are provided by third-party dependencies. Most importantly, when the
 module name doesn't match the wheel distribution name.
-
-Currently, this module only works with requirements.txt files locked using
-pip-tools (https://github.com/jazzband/pip-tools) with hashes. This is necessary
-in order to keep downloaded wheels in the Bazel cache. Also, the
-modules_mapping rule does not consider extras as specified by PEP 508.
 """
 
-# _modules_mapping_impl is the root entry for the modules_mapping rule
-# implementation.
-def _modules_mapping_impl(rctx):
-    requirements_data = rctx.read(rctx.attr.requirements)
-    python_interpreter = _get_python_interpreter(rctx)
-    pythonpath = "{}/__pythonpath".format(rctx.path(""))
-    res = rctx.execute(
-        [
-            python_interpreter,
-            "-m",
-            "pip",
-            "--verbose",
-            "--isolated",
-            "install",
-            "--target={}".format(pythonpath),
-            "--upgrade",
-            "--no-build-isolation",
-            "--no-cache-dir",
-            "--disable-pip-version-check",
-            "--index-url={}".format(rctx.attr.pip_index_url),
-            "build=={}".format(rctx.attr.build_wheel_version),
-            "setuptools=={}".format(rctx.attr.setuptools_wheel_version),
-        ],
-        quiet = rctx.attr.quiet,
-        timeout = rctx.attr.install_build_timeout,
+def _modules_mapping_impl(ctx):
+    modules_mapping = ctx.actions.declare_file(ctx.attr.modules_mapping_name)
+    args = ctx.actions.args()
+    args.add(modules_mapping.path)
+    args.add_all([whl.path for whl in ctx.files.wheels])
+    ctx.actions.run(
+        inputs = ctx.files.wheels,
+        outputs = [modules_mapping],
+        executable = ctx.executable._generator,
+        arguments = [args],
+        use_default_shell_env = False,
     )
-    if res.return_code != 0:
-        fail(res.stderr)
-    parsed_requirements = _parse_requirements_txt(requirements_data)
-    wheels = _get_wheels(rctx, python_interpreter, pythonpath, parsed_requirements)
-    res = rctx.execute(
-        [
-            python_interpreter,
-            rctx.path(rctx.attr._generator),
-        ] + wheels,
-        quiet = rctx.attr.quiet,
-        timeout = rctx.attr.generate_timeout,
-    )
-    if res.return_code != 0:
-        fail(res.stderr)
-    rctx.file("modules_mapping.json", content = res.stdout)
-    rctx.file("print.sh", content = "#!/usr/bin/env bash\ncat $1", executable = True)
-    rctx.file("BUILD", """\
-exports_files(["modules_mapping.json"])
+    return [DefaultInfo(files = depset([modules_mapping]))]
 
-sh_binary(
-    name = "print",
-    srcs = ["print.sh"],
-    data = [":modules_mapping.json"],
-    args = ["$(rootpath :modules_mapping.json)"],
-)
-""")
-
-# _get_python_interpreter determines whether the system or the user-provided
-# Python interpreter should be used and returns the path to be called.
-def _get_python_interpreter(rctx):
-    if rctx.attr.python_interpreter == None:
-        return "python"
-    return rctx.path(rctx.attr.python_interpreter)
-
-# _parse_requirements_txt parses the requirements.txt data into structs with the
-# information needed to download them using Bazel.
-def _parse_requirements_txt(data):
-    result = []
-    lines = data.split("\n")
-    current_requirement = ""
-    continue_previous_line = False
-    for line in lines:
-        # Ignore empty lines and comments.
-        if len(line) == 0 or line.startswith("#"):
-            continue
-
-        line = line.strip()
-
-        stripped_backslash = False
-        if line.endswith("\\"):
-            line = line[:-1]
-            stripped_backslash = True
-
-        # If this line is a continuation of the previous one, append the current
-        # line to the current requirement being processed, otherwise, start a
-        # new requirement.
-        if continue_previous_line:
-            current_requirement += line
-        else:
-            current_requirement = line
-
-        # Control whether the next line in the requirements.txt should be a
-        # continuation of the current requirement being processed or not.
-        continue_previous_line = stripped_backslash
-        if not continue_previous_line:
-            result.append(_parse_requirement(current_requirement))
-    return result
-
-# _parse_requirement parses a single requirement line.
-def _parse_requirement(requirement_line):
-    split = requirement_line.split("==")
-    requirement = {}
-
-    # Removing the extras (https://www.python.org/dev/peps/pep-0508/#extras)
-    # from the requirement name is fine since it's expected that the
-    # requirements.txt was compiled with pip-tools, which includes the extras as
-    # direct dependencies.
-    name = _remove_extras_from_name(split[0])
-    requirement["name"] = name
-    if len(split) == 1:
-        return struct(**requirement)
-    split = split[1].split(" ")
-    requirement["version"] = split[0]
-    if len(split) == 1:
-        return struct(**requirement)
-    args = split[1:]
-    hashes = []
-    for arg in args:
-        arg = arg.strip()
-
-        # Skip empty arguments.
-        if len(arg) == 0:
-            continue
-
-        # Halt processing if it hits a comment.
-        if arg.startswith("#"):
-            break
-        if arg.startswith("--hash="):
-            hashes.append(arg[len("--hash="):])
-    requirement["hashes"] = hashes
-    return struct(**requirement)
-
-# _remove_extras_from_name removes the [extras] from a requirement.
-# https://www.python.org/dev/peps/pep-0508/#extras
-def _remove_extras_from_name(name):
-    bracket_index = name.find("[")
-    if bracket_index == -1:
-        return name
-    return name[:bracket_index]
-
-# _get_wheels returns the wheel distributions for the given requirements. It
-# uses a few different strategies depending on whether compiled wheel
-# distributions exist on the remote index or not. The order in which it
-# operates:
-#
-#   1. Try to use the platform-independent compiled wheel (*-none-any.whl).
-#   2. Try to use the first match of the linux-dependent compiled wheel from the
-#      sorted releases list. This is valid as it's deterministic and the Python
-#      extension for Gazelle doesn't support other platform-specific wheels
-#      (one must use manual means to accomplish platform-specific dependency
-#      resolution).
-#   3. Use the published source for the wheel.
-def _get_wheels(rctx, python_interpreter, pythonpath, requirements):
-    wheels = []
-    to_build = []
-    for requirement in requirements:
-        if not hasattr(requirement, "hashes"):
-            if hasattr(requirement, "name") and requirement.name.startswith("#"):
-                # This is a comment in the requirements file.
-                continue
-            else:
-                fail("missing requirement hash for {}-{}: use pip-tools to produce a locked file".format(
-                    requirement.name,
-                    requirement.version,
-                ))
-
-        wheel = {}
-        wheel["name"] = requirement.name
-
-        requirement_info_url = "{index_base}/{name}/{version}/json".format(
-            index_base = rctx.attr.index_base,
-            name = requirement.name,
-            version = requirement.version,
-        )
-        requirement_info_path = "{}_info.json".format(requirement.name)
-
-        # TODO(f0rmiga): if the logs are too spammy, use rctx.execute with
-        # Python to perform the downloads since it's impossible to get the
-        # checksums of these JSON files and there's no option to mute Bazel
-        # here.
-        rctx.download(requirement_info_url, output = requirement_info_path)
-        requirement_info = json.decode(rctx.read(requirement_info_path))
-        if requirement.version in requirement_info["releases"]:
-            wheel["version"] = requirement.version
-        elif requirement.version.endswith(".0") and requirement.version[:-len(".0")] in requirement_info["releases"]:
-            wheel["version"] = requirement.version[:-len(".0")]
-        else:
-            fail("missing requirement version \"{}\" for wheel \"{}\" in fetched releases: available {}".format(
-                requirement.version,
-                requirement.name,
-                [version for version in requirement_info["releases"]],
-            ))
-        releases = sorted(requirement_info["releases"][wheel["version"]], key = _sort_release_by_url)
-        (wheel_url, sha256) = _search_url(releases, "-none-any.whl")
-
-        # TODO(f0rmiga): handle PEP 600.
-        # https://www.python.org/dev/peps/pep-0600/
-        if not wheel_url:
-            # Search for the Linux tag as defined in PEP 599.
-            (wheel_url, sha256) = _search_url(releases, "manylinux2014_x86_64")
-        if not wheel_url:
-            # Search for the Linux tag as defined in PEP 571.
-            (wheel_url, sha256) = _search_url(releases, "manylinux2010_x86_64")
-        if not wheel_url:
-            # Search for the Linux tag as defined in PEP 513.
-            (wheel_url, sha256) = _search_url(releases, "manylinux1_x86_64")
-        if not wheel_url:
-            # Search for the MacOS tag
-            (wheel_url, sha256) = _search_url(releases, "macosx_10_9_x86_64")
-
-        if wheel_url:
-            wheel_path = wheel_url.split("/")[-1]
-            rctx.download(wheel_url, output = wheel_path, sha256 = sha256)
-            wheel["path"] = wheel_path
-        else:
-            extension = ".tar.gz"
-            (src_url, sha256) = _search_url(releases, extension)
-            if not src_url:
-                extension = ".zip"
-                (src_url, sha256) = _search_url(releases, extension)
-            if not src_url:
-                fail("requirement URL for {}-{} not found".format(requirement.name, wheel["version"]))
-            rctx.download_and_extract(src_url, sha256 = sha256)
-            sanitized_name = requirement.name.lower().replace("-", "_")
-            requirement_path = src_url.split("/")[-1]
-            requirement_path = requirement_path[:-len(extension)]
-
-            # The resulting filename for the .whl file is not feasible to
-            # predict as it has too many variations, so we defer it to the
-            # Python globing to find the right file name since only one .whl
-            # file should be generated by the compilation.
-            wheel_path = "{}/**/*.whl".format(requirement_path)
-            wheel["path"] = wheel_path
-            to_build.append(requirement_path)
-
-        wheels.append(json.encode(wheel))
-
-    if len(to_build) > 0:
-        res = rctx.execute(
-            [python_interpreter, rctx.path(rctx.attr._builder)] + to_build,
-            quiet = rctx.attr.quiet,
-            environment = {
-                # To avoid use local "pip.conf"
-                "HOME": str(rctx.path("").realpath),
-                # Make uses of pip to use the requested index
-                "PIP_INDEX_URL": rctx.attr.pip_index_url,
-                "PYTHONPATH": pythonpath,
-            },
-        )
-        if res.return_code != 0:
-            fail(res.stderr)
-
-    return wheels
-
-# _sort_release_by_url is the custom function for the key property of the sorted
-# releases.
-def _sort_release_by_url(release):
-    return release["url"]
-
-# _search_url searches for a release in the list of releases that has a url
-# matching the provided extension.
-def _search_url(releases, extension):
-    for release in releases:
-        url = release["url"]
-        if url.find(extension) >= 0:
-            return (url, release["digests"]["sha256"])
-    return (None, None)
-
-modules_mapping = repository_rule(
+modules_mapping = rule(
     _modules_mapping_impl,
     attrs = {
-        "build_wheel_version": attr.string(
-            default = "0.5.1",
-            doc = "The build wheel version.",
+        "modules_mapping_name": attr.string(
+            default = "modules_mapping.json",
+            doc = "The name for the output JSON file.",
+            mandatory = False,
         ),
-        "generate_timeout": attr.int(
-            default = 30,
-            doc = "The timeout for the generator.py command.",
-        ),
-        "index_base": attr.string(
-            default = "https://pypi.org/pypi",
-            doc = "The base URL used for querying releases data as JSON.",
-        ),
-        "install_build_timeout": attr.int(
-            default = 30,
-            doc = "The timeout for the `pip install build` command.",
-        ),
-        "pip_index_url": attr.string(
-            default = "https://pypi.python.org/simple",
-            doc = "The index URL used for any pip install actions",
-        ),
-        "python_interpreter": attr.label(
-            allow_single_file = True,
-            doc = "If set, uses the custom-built Python interpreter, otherwise, uses the system one.",
-        ),
-        "quiet": attr.bool(
-            default = True,
-            doc = "Toggle this attribute to get verbose output from this rule.",
-        ),
-        "requirements": attr.label(
-            allow_single_file = True,
-            doc = "The requirements.txt file with hashes locked using pip-tools.",
+        "wheels": attr.label_list(
+            allow_files = True,
+            doc = "The list of wheels, usually the 'all_whl_requirements' from @<pip_repository>//:requirements.bzl",
             mandatory = True,
         ),
-        "setuptools_wheel_version": attr.string(
-            default = "v57.5.0",
-            doc = "The setuptools wheel version.",
-        ),
-        "_builder": attr.label(
-            allow_single_file = True,
-            default = "//gazelle/modules_mapping:builder.py",
-        ),
         "_generator": attr.label(
-            allow_single_file = True,
-            default = "//gazelle/modules_mapping:generator.py",
+            cfg = "host",
+            default = "//gazelle/modules_mapping:generator",
+            executable = True,
         ),
     },
     doc = "Creates a modules_mapping.json file for mapping module names to wheel distribution names.",
diff --git a/gazelle/modules_mapping/generator.py b/gazelle/modules_mapping/generator.py
index 6ee654c..b93f968 100644
--- a/gazelle/modules_mapping/generator.py
+++ b/gazelle/modules_mapping/generator.py
@@ -1,4 +1,3 @@
-import glob
 import json
 import pathlib
 import sys
@@ -7,29 +6,19 @@
 
 # Generator is the modules_mapping.json file generator.
 class Generator:
-    stdout = None
     stderr = None
+    output_file = None
 
-    def __init__(self, stdout, stderr):
-        self.stdout = stdout
+    def __init__(self, stderr, output_file):
         self.stderr = stderr
+        self.output_file = output_file
 
     # dig_wheel analyses the wheel .whl file determining the modules it provides
     # by looking at the directory structure.
-    def dig_wheel(self, wheel):
+    def dig_wheel(self, whl):
         mapping = {}
-        wheel_paths = glob.glob(wheel["path"])
-        assert len(wheel_paths) != 0, "wheel not found for {}: searched for {}".format(
-            wheel["name"],
-            wheel["path"],
-        )
-        wheel_path = wheel_paths[0]
-        assert (
-            "UNKNOWN" not in wheel_path
-        ), "unknown-named wheel found for {}: possibly bad compilation".format(
-            wheel["name"],
-        )
-        with zipfile.ZipFile(wheel_path, "r") as zip_file:
+        wheel_name = get_wheel_name(whl)
+        with zipfile.ZipFile(whl, "r") as zip_file:
             for path in zip_file.namelist():
                 if is_metadata(path):
                     continue
@@ -40,32 +29,43 @@
                     # where this file is as an importable package.
                     if path.endswith("/__init__.py"):
                         module = path[: -len("/__init__.py")].replace("/", ".")
-                        mapping[module] = wheel["name"]
+                        mapping[module] = wheel_name
                     # Always index the module file.
                     if ext == ".so":
                         # Also remove extra metadata that is embeded as part of
                         # the file name as an extra extension.
                         ext = "".join(pathlib.Path(path).suffixes)
                     module = path[: -len(ext)].replace("/", ".")
-                    mapping[module] = wheel["name"]
+                    mapping[module] = wheel_name
         return mapping
 
     # run is the entrypoint for the generator.
     def run(self, wheels):
         mapping = {}
-        for wheel_json in wheels:
-            wheel = json.loads(wheel_json)
+        for whl in wheels:
             try:
-                mapping.update(self.dig_wheel(wheel))
+                mapping.update(self.dig_wheel(whl))
             except AssertionError as error:
                 print(error, file=self.stderr)
                 return 1
         mapping_json = json.dumps(mapping)
-        print(mapping_json, file=self.stdout)
-        self.stdout.flush()
+        with open(self.output_file, "w") as f:
+            f.write(mapping_json)
         return 0
 
 
+def get_wheel_name(path):
+    pp = pathlib.PurePath(path)
+    if pp.suffix != ".whl":
+        raise RuntimeError(
+            "{} is not a valid wheel file name: the wheel doesn't follow ".format(
+                pp.name
+            )
+            + "https://www.python.org/dev/peps/pep-0427/#file-name-convention"
+        )
+    return pp.name[: pp.name.find("-")]
+
+
 # is_metadata checks if the path is in a metadata directory.
 # Ref: https://www.python.org/dev/peps/pep-0427/#file-contents.
 def is_metadata(path):
@@ -74,6 +74,7 @@
 
 
 if __name__ == "__main__":
-    wheels = sys.argv[1:]
-    generator = Generator(sys.stdout, sys.stderr)
+    output_file = sys.argv[1]
+    wheels = sys.argv[2:]
+    generator = Generator(sys.stderr, output_file)
     exit(generator.run(wheels))