refactor(internal): add a semver parsing utility function (#2218)

This `semver` function may turn out to be useful in validating
the input for the `python.*override` tag classes to be added in
a followup PR. Because this is a refactor of an existing code and
adding tests, I decided to split it out.

For a POC see #2151, work towards #2081.
diff --git a/examples/bzlmod/MODULE.bazel.lock b/examples/bzlmod/MODULE.bazel.lock
index af31a12..0cfe49d 100644
--- a/examples/bzlmod/MODULE.bazel.lock
+++ b/examples/bzlmod/MODULE.bazel.lock
@@ -1231,7 +1231,7 @@
     },
     "@@rules_python~//python/extensions:pip.bzl%pip": {
       "general": {
-        "bzlTransitiveDigest": "QxV2PiqVV2B5LpnSrlzLgYyKNbUEXyVc1u+ahMrefws=",
+        "bzlTransitiveDigest": "7vRndkQ5a5Q2gcPIP8Jd/AkNRuB4n7SofpNFmFvodG8=",
         "usagesDigest": "MChlcSw99EuW3K7OOoMcXQIdcJnEh6YmfyjJm+9mxIg=",
         "recordedFileInputs": {
           "@@other_module~//requirements_lock_3_11.txt": "a7d0061366569043d5efcf80e34a32c732679367cb3c831c4cdc606adc36d314",
@@ -6140,7 +6140,7 @@
     },
     "@@rules_python~//python/private/pypi:pip.bzl%pip_internal": {
       "general": {
-        "bzlTransitiveDigest": "P0W31OsSgVVNQ3oRHHFiRWK7NLBLyI+KbQQBCPhou7w=",
+        "bzlTransitiveDigest": "DQe4hZM+myEcJ/pVW54jl5vWJOw+oZNBZfE0WOX/S9g=",
         "usagesDigest": "Y8ihY+R57BAFhalrVLVdJFrpwlbsiKz9JPJ99ljF7HA=",
         "recordedFileInputs": {
           "@@rules_python~//tools/publish/requirements.txt": "031e35d03dde03ae6305fe4b3d1f58ad7bdad857379752deede0f93649991b8a",
diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel
index 3d23614..a35e2f7 100644
--- a/python/private/BUILD.bazel
+++ b/python/private/BUILD.bazel
@@ -296,6 +296,11 @@
 )
 
 bzl_library(
+    name = "semver_bzl",
+    srcs = ["semver.bzl"],
+)
+
+bzl_library(
     name = "sentinel_bzl",
     srcs = ["sentinel.bzl"],
 )
diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel
index 21f69bf..1db50af 100644
--- a/python/private/pypi/BUILD.bazel
+++ b/python/private/pypi/BUILD.bazel
@@ -59,6 +59,7 @@
     srcs = ["extension.bzl"],
     deps = [
         ":attrs_bzl",
+        "//python/private:semver_bzl",
         ":hub_repository_bzl",
         ":parse_requirements_bzl",
         ":evaluate_markers_bzl",
diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl
index 1bc8f15..77a4778 100644
--- a/python/private/pypi/extension.bzl
+++ b/python/private/pypi/extension.bzl
@@ -19,6 +19,7 @@
 load("//python/private:auth.bzl", "AUTH_ATTRS")
 load("//python/private:normalize_name.bzl", "normalize_name")
 load("//python/private:repo_utils.bzl", "repo_utils")
+load("//python/private:semver.bzl", "semver")
 load("//python/private:version_label.bzl", "version_label")
 load(":attrs.bzl", "use_isolated")
 load(":evaluate_markers.bzl", "evaluate_markers", EVALUATE_MARKERS_SRCS = "SRCS")
@@ -32,22 +33,8 @@
 load(":whl_library.bzl", "whl_library")
 load(":whl_repo_name.bzl", "whl_repo_name")
 
-def _parse_version(version):
-    major, _, version = version.partition(".")
-    minor, _, version = version.partition(".")
-    patch, _, version = version.partition(".")
-    build, _, version = version.partition(".")
-
-    return struct(
-        # use semver vocabulary here
-        major = major,
-        minor = minor,
-        patch = patch,  # this is called `micro` in the Python interpreter versioning scheme
-        build = build,
-    )
-
 def _major_minor_version(version):
-    version = _parse_version(version)
+    version = semver(version)
     return "{}.{}".format(version.major, version.minor)
 
 def _whl_mods_impl(mctx):
diff --git a/python/private/semver.bzl b/python/private/semver.bzl
new file mode 100644
index 0000000..9a240d4
--- /dev/null
+++ b/python/private/semver.bzl
@@ -0,0 +1,65 @@
+# Copyright 2024 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"A semver version parser"
+
+def _key(version):
+    return (
+        version.major,
+        version.minor,
+        version.patch,
+        # non pre-release versions are higher
+        version.pre_release == "",
+        # then we compare each element of the pre_release tag separately
+        tuple([
+            (
+                i if not i.isdigit() else "",
+                # digit values take precedence
+                int(i) if i.isdigit() else 0,
+            )
+            for i in version.pre_release.split(".")
+        ]) if version.pre_release else None,
+        # And build info is just alphabetic
+        version.build,
+    )
+
+def semver(version):
+    """Parse the semver version and return the values as a struct.
+
+    Args:
+        version: {type}`str` the version string
+
+    Returns:
+        A {type}`struct` with `major`, `minor`, `patch` and `build` attributes.
+    """
+
+    # Implement the https://semver.org/ spec
+    major, _, tail = version.partition(".")
+    minor, _, tail = tail.partition(".")
+    patch, _, build = tail.partition("+")
+    patch, _, pre_release = patch.partition("-")
+
+    public = struct(
+        major = int(major),
+        minor = int(minor or "0"),
+        # NOTE: this is called `micro` in the Python interpreter versioning scheme
+        patch = int(patch or "0"),
+        pre_release = pre_release,
+        build = build,
+        # buildifier: disable=uninitialized
+        key = lambda: _key(self.actual),
+        str = lambda: version,
+    )
+    self = struct(actual = public)
+    return public
diff --git a/tests/semver/BUILD.bazel b/tests/semver/BUILD.bazel
new file mode 100644
index 0000000..e12b1e5
--- /dev/null
+++ b/tests/semver/BUILD.bazel
@@ -0,0 +1,17 @@
+# Copyright 2024 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load(":semver_test.bzl", "semver_test_suite")
+
+semver_test_suite(name = "semver_tests")
diff --git a/tests/semver/semver_test.bzl b/tests/semver/semver_test.bzl
new file mode 100644
index 0000000..6395639
--- /dev/null
+++ b/tests/semver/semver_test.bzl
@@ -0,0 +1,113 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""
+
+load("@rules_testing//lib:test_suite.bzl", "test_suite")
+load("//python/private:semver.bzl", "semver")  # buildifier: disable=bzl-visibility
+
+_tests = []
+
+def _test_semver_from_major(env):
+    actual = semver("3")
+    env.expect.that_int(actual.major).equals(3)
+    env.expect.that_int(actual.minor).equals(0)
+    env.expect.that_int(actual.patch).equals(0)
+    env.expect.that_str(actual.build).equals("")
+
+_tests.append(_test_semver_from_major)
+
+def _test_semver_from_major_minor_version(env):
+    actual = semver("4.9")
+    env.expect.that_int(actual.major).equals(4)
+    env.expect.that_int(actual.minor).equals(9)
+    env.expect.that_int(actual.patch).equals(0)
+    env.expect.that_str(actual.build).equals("")
+
+_tests.append(_test_semver_from_major_minor_version)
+
+def _test_semver_with_build_info(env):
+    actual = semver("1.2.3+mybuild")
+    env.expect.that_int(actual.major).equals(1)
+    env.expect.that_int(actual.minor).equals(2)
+    env.expect.that_int(actual.patch).equals(3)
+    env.expect.that_str(actual.build).equals("mybuild")
+
+_tests.append(_test_semver_with_build_info)
+
+def _test_semver_with_build_info_multiple_pluses(env):
+    actual = semver("1.2.3-rc0+build+info")
+    env.expect.that_int(actual.major).equals(1)
+    env.expect.that_int(actual.minor).equals(2)
+    env.expect.that_int(actual.patch).equals(3)
+    env.expect.that_str(actual.pre_release).equals("rc0")
+    env.expect.that_str(actual.build).equals("build+info")
+
+_tests.append(_test_semver_with_build_info_multiple_pluses)
+
+def _test_semver_alpha_beta(env):
+    actual = semver("1.2.3-alpha.beta")
+    env.expect.that_int(actual.major).equals(1)
+    env.expect.that_int(actual.minor).equals(2)
+    env.expect.that_int(actual.patch).equals(3)
+    env.expect.that_str(actual.pre_release).equals("alpha.beta")
+
+_tests.append(_test_semver_alpha_beta)
+
+def _test_semver_sort(env):
+    want = [
+        semver(item)
+        for item in [
+            # The items are sorted from lowest to highest version
+            "0.0.1",
+            "0.1.0-rc",
+            "0.1.0",
+            "0.9.11",
+            "0.9.12",
+            "1.0.0-alpha",
+            "1.0.0-alpha.1",
+            "1.0.0-alpha.beta",
+            "1.0.0-beta",
+            "1.0.0-beta.2",
+            "1.0.0-beta.11",
+            "1.0.0-rc.1",
+            "1.0.0-rc.2",
+            "1.0.0",
+            # Also handle missing minor and patch version strings
+            "2.0",
+            "3",
+            # Alphabetic comparison for different builds
+            "3.0.0+build0",
+            "3.0.0+build1",
+        ]
+    ]
+    actual = sorted(want, key = lambda x: x.key())
+    env.expect.that_collection(actual).contains_exactly(want).in_order()
+    for i, greater in enumerate(want[1:]):
+        smaller = actual[i]
+        if greater.key() <= smaller.key():
+            env.fail("Expected '{}' to be smaller than '{}', but got otherwise".format(
+                smaller.str(),
+                greater.str(),
+            ))
+
+_tests.append(_test_semver_sort)
+
+def semver_test_suite(name):
+    """Create the test suite.
+
+    Args:
+        name: the name of the test suite
+    """
+    test_suite(name = name, basic_tests = _tests)