lib/private/tar.bzl - third_party/github/aspect-build/bazel-lib - Git at Google

 "Implementation of tar rule"

 load("//lib:paths.bzl", "to_repository_relative_path")

 TAR_TOOLCHAIN_TYPE = "@aspect_bazel_lib//lib:tar_toolchain_type"

 # https://www.gnu.org/software/tar/manual/html_section/Compression.html
 _ACCEPTED_EXTENSIONS = [
     ".tar",  # uncompressed,
     ".gz",  # gzip
     ".tgz",  # gzip
     ".taz",  # gzip
     ".Z",  # compress
     ".taZ",  # compress
     ".bz2",  # bzip2
     ".tz2",  # bzip2
     ".tbz2",  # bzip2
     ".tbz",  # bzip2
     ".lz",  # lzip
     ".lzma",  # lzma
     ".tlz",  # lzma
     ".lzo",  # lzop
     ".xz",  # xz
     ".zst",  # zstd
     ".tzst",  # zstd
 ]

 _COMPRESSION_TO_EXTENSION = {
     "bzip2": ".tar.bz2",
     "compress": ".tar.Z",
     "gzip": ".tar.gz",
     "lrzip": ".tar.lrz",
     "lz4": ".tar.lz4",
     "lzma": ".tar.lzma",
     "lzop": ".tar.lzo",
     "xz": ".tar.xz",
     "zstd": ".tar.zst",
 }

 # https://www.gnu.org/software/tar/manual/html_section/Compression.html
 _ACCEPTED_COMPRESSION_TYPES = _COMPRESSION_TO_EXTENSION.keys()

 _tar_attrs = {
     "args": attr.string_list(
         doc = "Additional flags permitted by BSD tar; see the man page.",
     ),
     "srcs": attr.label_list(
         doc = """\
         Files, directories, or other targets whose default outputs are placed into the tar.

         If any of the srcs are binaries with runfiles, those are copied into the resulting tar as well.
         """,
         allow_files = True,
     ),
     "mode": attr.string(
         doc = """A mode indicator from the following list, copied from the tar manpage:

        - create: Create a new archive containing the specified items.
        - append: Like `create`, but new entries are appended to the archive.
             Note that this only works on uncompressed archives stored in regular files.
             The -f option is required.
        - list: List  archive contents to stdout.
        - update: Like `append`, but new entries are added only if they have a
             modification date newer than the corresponding entry in the archive.
 	       Note that this only works on uncompressed archives stored in
 	       regular files. The -f option	is required.
        - extract: Extract to disk from the archive. If a file with the same name
 	       appears more than once in the archive, each copy	 will  be  extracted,
            with  later  copies  overwriting  (replacing) earlier copies.
         """,
         values = ["create"],  # TODO: support other modes: ["append", "list", "update", "extract"]
         default = "create",
     ),
     "mtree": attr.label(
         doc = "An mtree specification file",
         allow_single_file = True,
         # Mandatory since it's the only way to set constant timestamps
         mandatory = True,
     ),
     "out": attr.output(
         doc = "Resulting tar file to write. If absent, `[name].tar` is written.",
     ),
     "compress": attr.string(
         doc = "Compress the archive file with a supported algorithm.",
         values = _ACCEPTED_COMPRESSION_TYPES,
     ),
 }

 _mtree_attrs = {
     "srcs": attr.label_list(doc = "Files that are placed into the tar", allow_files = True),
     "out": attr.output(doc = "Resulting specification file to write"),
 }

 def _add_compression_args(compress, args):
     if compress == "bzip2":
         args.add("--bzip2")
     if compress == "compress":
         args.add("--compress")
     if compress == "gzip":
         args.add("--gzip")
     if compress == "lrzip":
         args.add("--lrzip")
     if compress == "lzma":
         args.add("--lzma")
     if compress == "lz4":
         args.add("--lz4")
     if compress == "lzop":
         args.add("--lzop")
     if compress == "xz":
         args.add("--xz")
     if compress == "zstd":
         args.add("--zstd")

 def _calculate_runfiles_dir(default_info):
     manifest = default_info.files_to_run.runfiles_manifest

     # Newer versions of Bazel put the manifest besides the runfiles with the suffix .runfiles_manifest.
     # For example, the runfiles directory is named my_binary.runfiles then the manifest is beside the
     # runfiles directory and named my_binary.runfiles_manifest
     # Older versions of Bazel put the manifest file named MANIFEST in the runfiles directory
     # See similar logic:
     # https://github.com/aspect-build/rules_js/blob/c50bd3f797c501fb229cf9ab58e0e4fc11464a2f/js/private/bash.bzl#L63
     if manifest.short_path.endswith("_manifest") or manifest.short_path.endswith("/MANIFEST"):
         # Trim last 9 characters, as that's the length in both cases
         return manifest.short_path[:-9]
     fail("manifest path {} seems malformed".format(manifest.short_path))

 def _tar_impl(ctx):
     bsdtar = ctx.toolchains[TAR_TOOLCHAIN_TYPE]
     inputs = ctx.files.srcs[:]
     args = ctx.actions.args()

     # Set mode
     args.add(ctx.attr.mode, format = "--%s")

     # User-provided args first
     args.add_all(ctx.attr.args)

     # Compression args
     _add_compression_args(ctx.attr.compress, args)

     ext = _COMPRESSION_TO_EXTENSION[ctx.attr.compress] if ctx.attr.compress else ".tar"

     out = ctx.outputs.out or ctx.actions.declare_file(ctx.attr.name + ext)
     args.add("--file", out)

     args.add(ctx.file.mtree, format = "@%s")
     inputs.append(ctx.file.mtree)

     ctx.actions.run(
         executable = bsdtar.tarinfo.binary,
         inputs = depset(direct = inputs, transitive = [bsdtar.default.files] + [
             src[DefaultInfo].default_runfiles.files
             for src in ctx.attr.srcs
         ]),
         outputs = [out],
         arguments = [args],
         mnemonic = "Tar",
     )

     return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out]))

 def _mtree_line(file, type, content = None, uid = "0", gid = "0", time = "1672560000", mode = "0755"):
     spec = [
         file,
         "uid=" + uid,
         "gid=" + gid,
         "time=" + time,
         "mode=" + mode,
         "type=" + type,
     ]
     if content:
         spec.append("content=" + content)
     return " ".join(spec)

 # This function exactly same as the one from "@aspect_bazel_lib//lib:paths.bzl"
 # except that it takes workspace_name directly instead of the ctx object.
 # Reason is the performance of Args.add_all closures where we use this function.
 # https://bazel.build/rules/lib/builtins/Args#add_all `allow_closure` explains this.
 def _to_rlocation_path(file, workspace):
     if file.short_path.startswith("../"):
         return file.short_path[3:]
     else:
         return workspace + "/" + file.short_path

 def _vis_encode(filename):
     # TODO(#794): correctly encode all filenames by using vis(3) (or porting it)
     return filename.replace(" ", "\\040")

 def _expand(file, expander, transform = to_repository_relative_path):
     expanded = expander.expand(file)
     lines = []
     for e in expanded:
         path = transform(e)
         segments = path.split("/")
         for i in range(1, len(segments)):
             parent = "/".join(segments[:i])

             # NOTE: The mtree format treats file paths without slashes as "relative" entries.
             #       If a relative entry is a directory, then it will "change directory" to that
             #       directory, and any subsequent "relative" entries will be created inside that
             #       directory. This causes issues when there is a top-level directory that is
             #       followed by a top-level file, as the file will be created inside the directory.
             #       To avoid this, we append a slash to the directory path to make it a "full" entry.
             if i == 1:
                 parent += "/"

             lines.append(_mtree_line(parent, "dir"))

         lines.append(_mtree_line(_vis_encode(path), "file", content = _vis_encode(e.path)))
     return lines

 def _mtree_impl(ctx):
     out = ctx.outputs.out or ctx.actions.declare_file(ctx.attr.name + ".spec")

     content = ctx.actions.args()
     content.set_param_file_format("multiline")
     content.add_all(
         ctx.files.srcs,
         map_each = _expand,
         expand_directories = True,
         uniquify = True,
     )

     for s in ctx.attr.srcs:
         default_info = s[DefaultInfo]
         if not default_info.files_to_run.runfiles_manifest:
             continue

         runfiles_dir = _calculate_runfiles_dir(default_info)

         # copy workspace name here just in case to prevent ctx
         # to be transferred to execution phase.
         workspace_name = str(ctx.workspace_name)

         content.add(_mtree_line(runfiles_dir, type = "dir"))
         content.add_all(
             s.default_runfiles.files,
             expand_directories = True,
             uniquify = True,
             format_each = "{}/%s".format(runfiles_dir),
             # be careful about what you pass to _expand_for_runfiles as it will carry the data structures over to execution phase.
             map_each = lambda f, e: _expand(f, e, lambda f: _to_rlocation_path(f, workspace_name)),
             allow_closure = True,
         )

     ctx.actions.write(out, content = content)

     return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out]))

 tar_lib = struct(
     attrs = _tar_attrs,
     implementation = _tar_impl,
     mtree_attrs = _mtree_attrs,
     mtree_implementation = _mtree_impl,
     toolchain_type = TAR_TOOLCHAIN_TYPE,
     common = struct(
         accepted_tar_extensions = _ACCEPTED_EXTENSIONS,
         accepted_compression_types = _ACCEPTED_COMPRESSION_TYPES,
         compression_to_extension = _COMPRESSION_TO_EXTENSION,
         add_compression_args = _add_compression_args,
     ),
 )

 tar = rule(
     doc = "Rule that executes BSD `tar`. Most users should use the [`tar`](#tar) macro, rather than load this directly.",
     implementation = tar_lib.implementation,
     attrs = tar_lib.attrs,
     toolchains = [tar_lib.toolchain_type],
 )
	"Implementation of tar rule"

	load("//lib:paths.bzl", "to_repository_relative_path")

	TAR_TOOLCHAIN_TYPE = "@aspect_bazel_lib//lib:tar_toolchain_type"

	# https://www.gnu.org/software/tar/manual/html_section/Compression.html
	_ACCEPTED_EXTENSIONS = [
	".tar", # uncompressed,
	".gz", # gzip
	".tgz", # gzip
	".taz", # gzip
	".Z", # compress
	".taZ", # compress
	".bz2", # bzip2
	".tz2", # bzip2
	".tbz2", # bzip2
	".tbz", # bzip2
	".lz", # lzip
	".lzma", # lzma
	".tlz", # lzma
	".lzo", # lzop
	".xz", # xz
	".zst", # zstd
	".tzst", # zstd
	]

	_COMPRESSION_TO_EXTENSION = {
	"bzip2": ".tar.bz2",
	"compress": ".tar.Z",
	"gzip": ".tar.gz",
	"lrzip": ".tar.lrz",
	"lz4": ".tar.lz4",
	"lzma": ".tar.lzma",
	"lzop": ".tar.lzo",
	"xz": ".tar.xz",
	"zstd": ".tar.zst",
	}

	# https://www.gnu.org/software/tar/manual/html_section/Compression.html
	_ACCEPTED_COMPRESSION_TYPES = _COMPRESSION_TO_EXTENSION.keys()

	_tar_attrs = {
	"args": attr.string_list(
	doc = "Additional flags permitted by BSD tar; see the man page.",
	),
	"srcs": attr.label_list(
	doc = """\
	Files, directories, or other targets whose default outputs are placed into the tar.

	If any of the srcs are binaries with runfiles, those are copied into the resulting tar as well.
	""",
	allow_files = True,
	),
	"mode": attr.string(
	doc = """A mode indicator from the following list, copied from the tar manpage:

	- create: Create a new archive containing the specified items.
	- append: Like `create`, but new entries are appended to the archive.
	Note that this only works on uncompressed archives stored in regular files.
	The -f option is required.
	- list: List archive contents to stdout.
	- update: Like `append`, but new entries are added only if they have a
	modification date newer than the corresponding entry in the archive.
	Note that this only works on uncompressed archives stored in
	regular files. The -f option is required.
	- extract: Extract to disk from the archive. If a file with the same name
	appears more than once in the archive, each copy will be extracted,
	with later copies overwriting (replacing) earlier copies.
	""",
	values = ["create"], # TODO: support other modes: ["append", "list", "update", "extract"]
	default = "create",
	),
	"mtree": attr.label(
	doc = "An mtree specification file",
	allow_single_file = True,
	# Mandatory since it's the only way to set constant timestamps
	mandatory = True,
	),
	"out": attr.output(
	doc = "Resulting tar file to write. If absent, `[name].tar` is written.",
	),
	"compress": attr.string(
	doc = "Compress the archive file with a supported algorithm.",
	values = _ACCEPTED_COMPRESSION_TYPES,
	),
	}

	_mtree_attrs = {
	"srcs": attr.label_list(doc = "Files that are placed into the tar", allow_files = True),
	"out": attr.output(doc = "Resulting specification file to write"),
	}

	def _add_compression_args(compress, args):
	if compress == "bzip2":
	args.add("--bzip2")
	if compress == "compress":
	args.add("--compress")
	if compress == "gzip":
	args.add("--gzip")
	if compress == "lrzip":
	args.add("--lrzip")
	if compress == "lzma":
	args.add("--lzma")
	if compress == "lz4":
	args.add("--lz4")
	if compress == "lzop":
	args.add("--lzop")
	if compress == "xz":
	args.add("--xz")
	if compress == "zstd":
	args.add("--zstd")

	def _calculate_runfiles_dir(default_info):
	manifest = default_info.files_to_run.runfiles_manifest

	# Newer versions of Bazel put the manifest besides the runfiles with the suffix .runfiles_manifest.
	# For example, the runfiles directory is named my_binary.runfiles then the manifest is beside the
	# runfiles directory and named my_binary.runfiles_manifest
	# Older versions of Bazel put the manifest file named MANIFEST in the runfiles directory
	# See similar logic:
	# https://github.com/aspect-build/rules_js/blob/c50bd3f797c501fb229cf9ab58e0e4fc11464a2f/js/private/bash.bzl#L63
	if manifest.short_path.endswith("_manifest") or manifest.short_path.endswith("/MANIFEST"):
	# Trim last 9 characters, as that's the length in both cases
	return manifest.short_path[:-9]
	fail("manifest path {} seems malformed".format(manifest.short_path))

	def _tar_impl(ctx):
	bsdtar = ctx.toolchains[TAR_TOOLCHAIN_TYPE]
	inputs = ctx.files.srcs[:]
	args = ctx.actions.args()

	# Set mode
	args.add(ctx.attr.mode, format = "--%s")

	# User-provided args first
	args.add_all(ctx.attr.args)

	# Compression args
	_add_compression_args(ctx.attr.compress, args)

	ext = _COMPRESSION_TO_EXTENSION[ctx.attr.compress] if ctx.attr.compress else ".tar"

	out = ctx.outputs.out or ctx.actions.declare_file(ctx.attr.name + ext)
	args.add("--file", out)

	args.add(ctx.file.mtree, format = "@%s")
	inputs.append(ctx.file.mtree)

	ctx.actions.run(
	executable = bsdtar.tarinfo.binary,
	inputs = depset(direct = inputs, transitive = [bsdtar.default.files] + [
	src[DefaultInfo].default_runfiles.files
	for src in ctx.attr.srcs
	]),
	outputs = [out],
	arguments = [args],
	mnemonic = "Tar",
	)

	return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out]))

	def _mtree_line(file, type, content = None, uid = "0", gid = "0", time = "1672560000", mode = "0755"):
	spec = [
	file,
	"uid=" + uid,
	"gid=" + gid,
	"time=" + time,
	"mode=" + mode,
	"type=" + type,
	]
	if content:
	spec.append("content=" + content)
	return " ".join(spec)

	# This function exactly same as the one from "@aspect_bazel_lib//lib:paths.bzl"
	# except that it takes workspace_name directly instead of the ctx object.
	# Reason is the performance of Args.add_all closures where we use this function.
	# https://bazel.build/rules/lib/builtins/Args#add_all `allow_closure` explains this.
	def _to_rlocation_path(file, workspace):
	if file.short_path.startswith("../"):
	return file.short_path[3:]
	else:
	return workspace + "/" + file.short_path

	def _vis_encode(filename):
	# TODO(#794): correctly encode all filenames by using vis(3) (or porting it)
	return filename.replace(" ", "\\040")

	def _expand(file, expander, transform = to_repository_relative_path):
	expanded = expander.expand(file)
	lines = []
	for e in expanded:
	path = transform(e)
	segments = path.split("/")
	for i in range(1, len(segments)):
	parent = "/".join(segments[:i])

	# NOTE: The mtree format treats file paths without slashes as "relative" entries.
	# If a relative entry is a directory, then it will "change directory" to that
	# directory, and any subsequent "relative" entries will be created inside that
	# directory. This causes issues when there is a top-level directory that is
	# followed by a top-level file, as the file will be created inside the directory.
	# To avoid this, we append a slash to the directory path to make it a "full" entry.
	if i == 1:
	parent += "/"

	lines.append(_mtree_line(parent, "dir"))

	lines.append(_mtree_line(_vis_encode(path), "file", content = _vis_encode(e.path)))
	return lines

	def _mtree_impl(ctx):
	out = ctx.outputs.out or ctx.actions.declare_file(ctx.attr.name + ".spec")

	content = ctx.actions.args()
	content.set_param_file_format("multiline")
	content.add_all(
	ctx.files.srcs,
	map_each = _expand,
	expand_directories = True,
	uniquify = True,
	)

	for s in ctx.attr.srcs:
	default_info = s[DefaultInfo]
	if not default_info.files_to_run.runfiles_manifest:
	continue

	runfiles_dir = _calculate_runfiles_dir(default_info)

	# copy workspace name here just in case to prevent ctx
	# to be transferred to execution phase.
	workspace_name = str(ctx.workspace_name)

	content.add(_mtree_line(runfiles_dir, type = "dir"))
	content.add_all(
	s.default_runfiles.files,
	expand_directories = True,
	uniquify = True,
	format_each = "{}/%s".format(runfiles_dir),
	# be careful about what you pass to _expand_for_runfiles as it will carry the data structures over to execution phase.
	map_each = lambda f, e: _expand(f, e, lambda f: _to_rlocation_path(f, workspace_name)),
	allow_closure = True,
	)

	ctx.actions.write(out, content = content)

	return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out]))

	tar_lib = struct(
	attrs = _tar_attrs,
	implementation = _tar_impl,
	mtree_attrs = _mtree_attrs,
	mtree_implementation = _mtree_impl,
	toolchain_type = TAR_TOOLCHAIN_TYPE,
	common = struct(
	accepted_tar_extensions = _ACCEPTED_EXTENSIONS,
	accepted_compression_types = _ACCEPTED_COMPRESSION_TYPES,
	compression_to_extension = _COMPRESSION_TO_EXTENSION,
	add_compression_args = _add_compression_args,
	),
	)

	tar = rule(
	doc = "Rule that executes BSD `tar`. Most users should use the [`tar`](#tar) macro, rather than load this directly.",
	implementation = tar_lib.implementation,
	attrs = tar_lib.attrs,
	toolchains = [tar_lib.toolchain_type],
	)