feat: expose 'pip_utils.normalize_name' function (#1542)

With this change users can use a previously private function to
normalize a
PyPI package name into something that bazel can use.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 57c4eff..dc3b079 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,8 +33,11 @@
   dependencies is now done as part of `py_repositories` call.
 
 * (pip_parse) The generated `requirements.bzl` file now has an additional symbol
-  `all_whl_requirements_by_package` which provides a map from the original package name
-  (as it appears in requirements.txt) to the target that provides the built wheel file.
+  `all_whl_requirements_by_package` which provides a map from the normalized
+  PyPI package name to the target that provides the built wheel file. Use
+  `pip_utils.normalize_name` function from `@rules_python//python:pip.bzl` to
+  convert a PyPI package name to a key in the `all_whl_requirements_by_package`
+  map.
 
 * (pip_parse) The flag `incompatible_generate_aliases` has been flipped to
   `True` by default on `non-bzlmod` setups allowing users to use the same label
@@ -88,6 +91,9 @@
 * (pip) Support for using [PEP621](https://peps.python.org/pep-0621/) compliant
   `pyproject.toml` for creating a resolved `requirements.txt` file.
 
+* (utils) Added a `pip_utils` struct with a `normalize_name` function to allow users
+  to find out how `rules_python` would normalize a PyPI distribution name.
+
 ## [0.26.0] - 2023-10-06
 
 ### Changed
diff --git a/examples/pip_parse_vendored/BUILD.bazel b/examples/pip_parse_vendored/BUILD.bazel
index 8741c5a..ddf3281 100644
--- a/examples/pip_parse_vendored/BUILD.bazel
+++ b/examples/pip_parse_vendored/BUILD.bazel
@@ -19,7 +19,7 @@
     cmd = " | ".join([
         "cat $<",
         # Insert our load statement after the existing one so we don't produce a file with buildifier warnings
-        """sed -e '/^load.*.whl_library/i\\'$$'\\n''load("@python39//:defs.bzl", "interpreter")'""",
+        """sed -e '/^load.*.pip.bzl/i\\'$$'\\n''load("@python39//:defs.bzl", "interpreter")'""",
         # Replace the bazel 6.0.0 specific comment with something that bazel 5.4.0 would produce.
         # This enables this example to be run as a test under bazel 5.4.0.
         """sed -e 's#@//#//#'""",
diff --git a/examples/pip_parse_vendored/requirements.bzl b/examples/pip_parse_vendored/requirements.bzl
index 48371ed..21a2556 100644
--- a/examples/pip_parse_vendored/requirements.bzl
+++ b/examples/pip_parse_vendored/requirements.bzl
@@ -5,11 +5,12 @@
 """
 
 load("@python39//:defs.bzl", "interpreter")
+load("@rules_python//python:pip.bzl", "pip_utils")
 load("@rules_python//python/pip_install:pip_repository.bzl", "whl_library")
 
 all_requirements = ["@pip//certifi:pkg", "@pip//charset_normalizer:pkg", "@pip//idna:pkg", "@pip//requests:pkg", "@pip//urllib3:pkg"]
 
-all_whl_requirements_by_package = {"certifi": "@pip//certifi:whl", "charset-normalizer": "@pip//charset_normalizer:whl", "idna": "@pip//idna:whl", "requests": "@pip//requests:whl", "urllib3": "@pip//urllib3:whl"}
+all_whl_requirements_by_package = {"certifi": "@pip//certifi:whl", "charset_normalizer": "@pip//charset_normalizer:whl", "idna": "@pip//idna:whl", "requests": "@pip//requests:whl", "urllib3": "@pip//urllib3:whl"}
 
 all_whl_requirements = all_whl_requirements_by_package.values()
 
@@ -19,25 +20,22 @@
 _config = {"download_only": False, "enable_implicit_namespace_pkgs": False, "environment": {}, "extra_pip_args": [], "isolated": True, "pip_data_exclude": [], "python_interpreter": "python3", "python_interpreter_target": interpreter, "quiet": True, "repo": "pip", "repo_prefix": "pip_", "timeout": 600}
 _annotations = {}
 
-def _clean_name(name):
-    return name.replace("-", "_").replace(".", "_").lower()
-
 def requirement(name):
-    return "@pip//{}:{}".format(_clean_name(name), "pkg")
+    return "@pip//{}:{}".format(pip_utils.normalize_name(name), "pkg")
 
 def whl_requirement(name):
-    return "@pip//{}:{}".format(_clean_name(name), "whl")
+    return "@pip//{}:{}".format(pip_utils.normalize_name(name), "whl")
 
 def data_requirement(name):
-    return "@pip//{}:{}".format(_clean_name(name), "data")
+    return "@pip//{}:{}".format(pip_utils.normalize_name(name), "data")
 
 def dist_info_requirement(name):
-    return "@pip//{}:{}".format(_clean_name(name), "dist_info")
+    return "@pip//{}:{}".format(pip_utils.normalize_name(name), "dist_info")
 
 def entry_point(pkg, script = None):
     if not script:
         script = pkg
-    return "@pip_" + _clean_name(pkg) + "//:rules_python_wheel_entry_point_" + script
+    return "@pip_" + pip_utils.normalize_name(pkg) + "//:rules_python_wheel_entry_point_" + script
 
 def _get_annotation(requirement):
     # This expects to parse `setuptools==58.2.0     --hash=sha256:2551203ae6955b9876741a26ab3e767bb3242dafe86a32a749ea0d78b6792f11`
diff --git a/python/pip.bzl b/python/pip.bzl
index fd02a56..26e99fe 100644
--- a/python/pip.bzl
+++ b/python/pip.bzl
@@ -23,6 +23,7 @@
 load("//python/pip_install:requirements.bzl", _compile_pip_requirements = "compile_pip_requirements")
 load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED")
 load("//python/private:full_version.bzl", "full_version")
+load("//python/private:normalize_name.bzl", "normalize_name")
 load("//python/private:render_pkg_aliases.bzl", "NO_MATCH_ERROR_MESSAGE_TEMPLATE")
 
 compile_pip_requirements = _compile_pip_requirements
@@ -86,7 +87,7 @@
     requirements_bzl = """\
 # Generated by python/pip.bzl
 
-load("@{rules_python}//python:pip.bzl", "whl_library_alias")
+load("@{rules_python}//python:pip.bzl", "whl_library_alias", "pip_utils")
 {load_statements}
 
 _wheel_names = []
@@ -106,20 +107,17 @@
 
 {process_requirements_calls}
 
-def _clean_name(name):
-    return name.replace("-", "_").replace(".", "_").lower()
-
 def requirement(name):
-    return "{macro_tmpl}".format(_clean_name(name), "pkg")
+    return "{macro_tmpl}".format(pip_utils.normalize_name(name), "pkg")
 
 def whl_requirement(name):
-    return "{macro_tmpl}".format(_clean_name(name), "whl")
+    return "{macro_tmpl}".format(pip_utils.normalize_name(name), "whl")
 
 def data_requirement(name):
-    return "{macro_tmpl}".format(_clean_name(name), "data")
+    return "{macro_tmpl}".format(pip_utils.normalize_name(name), "data")
 
 def dist_info_requirement(name):
-    return "{macro_tmpl}".format(_clean_name(name), "dist_info")
+    return "{macro_tmpl}".format(pip_utils.normalize_name(name), "dist_info")
 
 def entry_point(pkg, script = None):
     fail("Not implemented yet")
@@ -278,3 +276,8 @@
         default_version = default_version,
         pip_parses = pip_parses,
     )
+
+# Extra utilities visible to rules_python users.
+pip_utils = struct(
+    normalize_name = normalize_name,
+)
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index b841772..07ca3c2 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -276,9 +276,11 @@
 
     packages = [(normalize_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]
 
-    bzl_packages = dict(sorted([[name, normalize_name(name)] for name, _ in parsed_requirements_txt.requirements]))
+    bzl_packages = sorted([normalize_name(name) for name, _ in parsed_requirements_txt.requirements])
 
     imports = [
+        # NOTE: Maintain the order consistent with `buildifier`
+        'load("@rules_python//python:pip.bzl", "pip_utils")',
         'load("@rules_python//python/pip_install:pip_repository.bzl", "whl_library")',
     ]
 
@@ -314,7 +316,7 @@
 
     if rctx.attr.incompatible_generate_aliases:
         macro_tmpl = "@%s//{}:{}" % rctx.attr.name
-        aliases = render_pkg_aliases(repo_name = rctx.attr.name, bzl_packages = bzl_packages.values())
+        aliases = render_pkg_aliases(repo_name = rctx.attr.name, bzl_packages = bzl_packages)
         for path, contents in aliases.items():
             rctx.file(path, contents)
     else:
@@ -324,20 +326,20 @@
     rctx.template("requirements.bzl", rctx.attr._template, substitutions = {
         "%%ALL_DATA_REQUIREMENTS%%": _format_repr_list([
             macro_tmpl.format(p, "data")
-            for p in bzl_packages.values()
+            for p in bzl_packages
         ]),
         "%%ALL_REQUIREMENTS%%": _format_repr_list([
             macro_tmpl.format(p, "pkg")
-            for p in bzl_packages.values()
+            for p in bzl_packages
         ]),
         "%%ALL_WHL_REQUIREMENTS_BY_PACKAGE%%": _format_dict(_repr_dict({
-            name: macro_tmpl.format(p, "whl")
-            for name, p in bzl_packages.items()
+            p: macro_tmpl.format(p, "whl")
+            for p in bzl_packages
         })),
         "%%ANNOTATIONS%%": _format_dict(_repr_dict(annotations)),
         "%%CONFIG%%": _format_dict(_repr_dict(config)),
         "%%EXTRA_PIP_ARGS%%": json.encode(options),
-        "%%IMPORTS%%": "\n".join(sorted(imports)),
+        "%%IMPORTS%%": "\n".join(imports),
         "%%MACRO_TMPL%%": macro_tmpl,
         "%%NAME%%": rctx.attr.name,
         "%%PACKAGES%%": _format_repr_list(
diff --git a/python/pip_install/pip_repository_requirements.bzl.tmpl b/python/pip_install/pip_repository_requirements.bzl.tmpl
index 23c8311..7a9e54c 100644
--- a/python/pip_install/pip_repository_requirements.bzl.tmpl
+++ b/python/pip_install/pip_repository_requirements.bzl.tmpl
@@ -18,25 +18,22 @@
 _config = %%CONFIG%%
 _annotations = %%ANNOTATIONS%%
 
-def _clean_name(name):
-    return name.replace("-", "_").replace(".", "_").lower()
-
 def requirement(name):
-    return "%%MACRO_TMPL%%".format(_clean_name(name), "pkg")
+    return "%%MACRO_TMPL%%".format(pip_utils.normalize_name(name), "pkg")
 
 def whl_requirement(name):
-    return "%%MACRO_TMPL%%".format(_clean_name(name), "whl")
+    return "%%MACRO_TMPL%%".format(pip_utils.normalize_name(name), "whl")
 
 def data_requirement(name):
-    return "%%MACRO_TMPL%%".format(_clean_name(name), "data")
+    return "%%MACRO_TMPL%%".format(pip_utils.normalize_name(name), "data")
 
 def dist_info_requirement(name):
-    return "%%MACRO_TMPL%%".format(_clean_name(name), "dist_info")
+    return "%%MACRO_TMPL%%".format(pip_utils.normalize_name(name), "dist_info")
 
 def entry_point(pkg, script = None):
     if not script:
         script = pkg
-    return "@%%NAME%%_" + _clean_name(pkg) + "//:rules_python_wheel_entry_point_" + script
+    return "@%%NAME%%_" + pip_utils.normalize_name(pkg) + "//:rules_python_wheel_entry_point_" + script
 
 def _get_annotation(requirement):
     # This expects to parse `setuptools==58.2.0     --hash=sha256:2551203ae6955b9876741a26ab3e767bb3242dafe86a32a749ea0d78b6792f11`
diff --git a/python/private/bzlmod/pip_repository.bzl b/python/private/bzlmod/pip_repository.bzl
index e4e59b5..9e6b0f4 100644
--- a/python/private/bzlmod/pip_repository.bzl
+++ b/python/private/bzlmod/pip_repository.bzl
@@ -56,7 +56,7 @@
             for p in bzl_packages
         }),
         "%%MACRO_TMPL%%": macro_tmpl,
-        "%%NAME%%": rctx.attr.name,
+        "%%NAME%%": rctx.attr.repo_name,
     })
 
 pip_repository_attrs = {
diff --git a/python/private/bzlmod/requirements.bzl.tmpl b/python/private/bzlmod/requirements.bzl.tmpl
index 5ed1e49..b99322d 100644
--- a/python/private/bzlmod/requirements.bzl.tmpl
+++ b/python/private/bzlmod/requirements.bzl.tmpl
@@ -3,6 +3,8 @@
 @generated by rules_python pip.parse bzlmod extension.
 """
 
+load("@rules_python//python:pip.bzl", "pip_utils")
+
 all_requirements = %%ALL_REQUIREMENTS%%
 
 all_whl_requirements_by_package = %%ALL_WHL_REQUIREMENTS_BY_PACKAGE%%
@@ -11,26 +13,23 @@
 
 all_data_requirements = %%ALL_DATA_REQUIREMENTS%%
 
-def _clean_name(name):
-    return name.replace("-", "_").replace(".", "_").lower()
-
 def requirement(name):
-    return "%%MACRO_TMPL%%".format(_clean_name(name), "pkg")
+    return "%%MACRO_TMPL%%".format(pip_utils.normalize_name(name), "pkg")
 
 def whl_requirement(name):
-    return "%%MACRO_TMPL%%".format(_clean_name(name), "whl")
+    return "%%MACRO_TMPL%%".format(pip_utils.normalize_name(name), "whl")
 
 def data_requirement(name):
-    return "%%MACRO_TMPL%%".format(_clean_name(name), "data")
+    return "%%MACRO_TMPL%%".format(pip_utils.normalize_name(name), "data")
 
 def dist_info_requirement(name):
-    return "%%MACRO_TMPL%%".format(_clean_name(name), "dist_info")
+    return "%%MACRO_TMPL%%".format(pip_utils.normalize_name(name), "dist_info")
 
 def entry_point(pkg, script = None):
     """entry_point returns the target of the canonical label of the package entrypoints.
     """
-    if not script:
-        script = pkg
+    actual_script = script or pkg
+
     fail("""Please replace this instance of entry_point with the following:
 
 ```
@@ -38,12 +37,10 @@
 
 py_console_script_binary(
     name = "{pkg}",
-    pkg = "@%%{pkg_label}",
-    script = "{script}",
+    pkg = "@%%NAME%%//{pkg}",{script}
 )
 ```
 """.format(
-        pkg = _clean_name(pkg),
-        pkg_label = "%%MACRO_TMPL%%".format(_clean_name(pkg), "pkg"),
-        script = script,
+        pkg = pip_utils.normalize_name(pkg),
+        script = "" if not script else "\n    script = \"%s\"," % actual_script,
     ))