Add pip_data_exclude to pip_repository (#43)

diff --git a/defs.bzl b/defs.bzl
index b0a3c2a..d46004b 100644
--- a/defs.bzl
+++ b/defs.bzl
@@ -40,6 +40,12 @@
             "\"" + " ".join(rctx.attr.extra_pip_args) + "\"",
         ]
 
+    if rctx.attr.pip_data_exclude:
+        args += [
+            "--pip_data_exclude",
+            struct(exclude = rctx.attr.pip_data_exclude).to_json(),
+        ]
+
     result = rctx.execute(
         args,
         environment = {
@@ -71,6 +77,9 @@
         "extra_pip_args": attr.string_list(
             doc = "Extra arguments to pass on to pip. Must not contain spaces.",
         ),
+        "pip_data_exclude": attr.string_list(
+            doc = "Additional data exclusion parameters to add to the pip packages BUILD file.",
+        ),
     },
     implementation = _pip_repository_impl,
 )
diff --git a/extract_wheels/__init__.py b/extract_wheels/__init__.py
index bd161a5..c42965d 100644
--- a/extract_wheels/__init__.py
+++ b/extract_wheels/__init__.py
@@ -10,6 +10,7 @@
 import os
 import subprocess
 import sys
+import json
 
 from extract_wheels.lib import bazel, requirements
 
@@ -65,6 +66,11 @@
     )
     parser.add_argument('--extra_pip_args', action='store',
                         help=('Extra arguments to pass down to pip.'))
+    parser.add_argument(
+        "--pip_data_exclude",
+        action='store',
+        help='Additional data exclusion parameters to add to the pip packages BUILD file.'
+    )
     args = parser.parse_args()
 
     pip_args = [sys.executable, "-m", "pip", "wheel", "-r", args.requirements]
@@ -75,8 +81,13 @@
 
     extras = requirements.parse_extras(args.requirements)
 
+    if args.pip_data_exclude:
+        pip_data_exclude = json.loads(args.pip_data_exclude)["exclude"]
+    else:
+        pip_data_exclude = []
+
     targets = [
-        '"%s%s"' % (args.repo, bazel.extract_wheel(whl, extras))
+        '"%s%s"' % (args.repo, bazel.extract_wheel(whl, extras, pip_data_exclude))
         for whl in glob.glob("*.whl")
     ]
 
diff --git a/extract_wheels/lib/bazel.py b/extract_wheels/lib/bazel.py
index 1b68d91..cc7d879 100644
--- a/extract_wheels/lib/bazel.py
+++ b/extract_wheels/lib/bazel.py
@@ -1,12 +1,13 @@
 """Utility functions to manipulate Bazel files"""
 import os
 import textwrap
+import json
 from typing import Iterable, List, Dict, Set
 
 from extract_wheels.lib import namespace_pkgs, wheel, purelib
 
 
-def generate_build_file_contents(name: str, dependencies: List[str]) -> str:
+def generate_build_file_contents(name: str, dependencies: List[str], pip_data_exclude: List[str]) -> str:
     """Generate a BUILD file for an unzipped Wheel
 
     Args:
@@ -20,6 +21,8 @@
     there may be no Python sources whatsoever (e.g. packages written in Cython: like `pymssql`).
     """
 
+    data_exclude = ["**/*.py", "**/* *", "BUILD", "WORKSPACE"] + pip_data_exclude
+
     return textwrap.dedent(
         """\
         package(default_visibility = ["//visibility:public"])
@@ -29,14 +32,14 @@
         py_library(
             name = "{name}",
             srcs = glob(["**/*.py"], allow_empty = True),
-            data = glob(["**/*"], exclude=["**/*.py", "**/* *", "BUILD", "WORKSPACE"]),
+            data = glob(["**/*"], exclude={data_exclude}),
             # This makes this directory a top-level in the python import
             # search path for anything that depends on this.
             imports = ["."],
             deps = [{dependencies}],
         )
         """.format(
-            name=name, dependencies=",".join(dependencies)
+            name=name, dependencies=",".join(dependencies), data_exclude=json.dumps(data_exclude)
         )
     )
 
@@ -116,7 +119,7 @@
         namespace_pkgs.add_pkgutil_style_namespace_pkg_init(ns_pkg_dir)
 
 
-def extract_wheel(wheel_file: str, extras: Dict[str, Set[str]]) -> str:
+def extract_wheel(wheel_file: str, extras: Dict[str, Set[str]], pip_data_exclude: List[str]) -> str:
     """Extracts wheel into given directory and creates a py_library target.
 
     Args:
@@ -145,7 +148,7 @@
 
     with open(os.path.join(directory, "BUILD"), "w") as build_file:
         contents = generate_build_file_contents(
-            sanitise_name(whl.name), sanitised_dependencies,
+            sanitise_name(whl.name), sanitised_dependencies, pip_data_exclude,
         )
         build_file.write(contents)