bazel 工具函数

最新推荐文章于 2025-03-05 14:48:50 发布

蓝鲸123

最新推荐文章于 2025-03-05 14:48:50 发布

阅读量4.8k

点赞数 5

分类专栏： bazel 文章标签： bazel

本文链接：https://blog.csdn.net/TH_NUM/article/details/107008922

版权

bazel 专栏收录该内容

2 篇文章

订阅专栏

文章目录

Bazel 官方文档

不管是写WORKSPACE，BUILD、.bzl(主要是一些函数)或者其他文件，都要遵循一些Bazel的规则，有些规则是bazel内置的函数使用说明，有的规则是一些语法分析用到的。

例如：

cc_library(
    name = "mkl_dnn",
    srcs = glob([
        "src/common/*.cpp",
        "src/common/*.hpp",
        "src/cpu/*.cpp",
        "src/cpu/*.hpp",
        "src/cpu/**/*.cpp",
        "src/cpu/**/*.hpp",
        "src/cpu/xbyak/*.h",
    ]),
    hdrs = glob(["include/*"]),
    copts = [
        "-fexceptions",
        "-DUSE_MKL",
        "-DUSE_CBLAS",
    ],
    includes = [
        "include",
        "src",
        "src/common",
        "src/cpu",
        "src/cpu/gemm",
        "src/cpu/xbyak",
    ],
    nocopts = "-fno-exceptions",
    visibility = ["//visibility:public"],
     deps = select({
        "@org_tensorflow//tensorflow:linux_x86_64": [
            "@mkl_linux//:mkl_headers",
            "@mkl_linux//:mkl_libs_linux",
        ],
        "//conditions:default": [],
    }),

bazel info output_base # 列出bazel output dictionary

visibility

visibility = ["//visibility:public"],
有5种形式的label
(1)["//visibility:public"]: Anyone can use this rule

(2) ["//visibility:private"]: Only rules in this package can use this rule.

(3) ["//some/package:__pkg__", "//other/package:__pkg__"]: Only rules in some/package and other/package (defined in some/package/BUILDand other/package/BUILD) have access to this rule. 必须是规定的package下面的BUILD文件中的rule才有权限使用

(4)["//project:__subpackages__", "//other:__subpackages__"]: Only rules in packages project or other or in one of their sub-packages have access to this rule. 指定的package下面的包括子目录都可以使用

(5) ["//some/package:my_package_group"]: A package group is a named set of package names.

可配置的属性 config_setting

cc_library(
    name = "multiplatform_lib",
    srcs = select({
        ":x86_mode": ["x86_impl.cc"],
        ":arm_mode": ["arm_impl.cc"]
    })
)
config_setting(
    name = "x86_mode",
    values = { "cpu": "x86" }
)
config_setting(
    name = "arm_mode",
    values = { "cpu": "arm" }
)

在属性值的设置中使用select()可以根据输入的config_setting的值来进行匹配。比如

bazel build :multiplatform_lib --cpu=arm

sets multiplatform_lib’s srcs to [“arm_impl.cc”], while bazel build :multiplatform_lib --cpu=x86sets srcs to [“x86_impl.cc”]

glob

glob(include, exclude=[], exclude_directories=1)

Glob是一个辅助函数，用于在任何位置获得想要的文件列表。可以使用*通配符以及目录分隔符/，另外**表示递归通配符只能在目录分隔符/之间使用，比如"x/**/*.java" is valid, but “test**/testdata.xml” and “**.java” are both invalid. No other wildcards are supported.

select

select(
    {conditionA: valuesA, conditionB: valuesB, ...},
    no_match_error = "custom message"
)

这也是一个辅助函数，可以使得rule的属性被配置，选取的方式通过读Bazel的命令行flag。
里面的conditionA是在config_setting中定义出来的。比如：

config_setting(
    name = "simple",
    values = {"compilation_mode": "opt"}
)

那么在命令行中如果有 --compilation_mode=opt那么就是使用了这个simple

workspace

workspace(name = "com_example_project")

在WORKSPACE文件中使用，每个仓库的WORKSPACE文件都应该有这个函数，来设置这个仓库的全局名称。这个名称被用来存放输出目录。

repository_rule

repository_rule 只能在 WORKSPACE 文件里面使用,在加载 .bzl 文件时候使用非密封的操作。
例如：
WORKSPACE 文件

workspace(name = "org_tensorflow")
# Load tf_repositories() before loading dependencies for other repository so
# that dependencies like com_google_protobuf won't be overridden.
load("//tensorflow:workspace.bzl", "tf_repositories")
# Please add all new TensorFlow dependencies in workspace.bzl.
tf_repositories()

//tensorflow:workspace.bzl 文件：

load("//third_party/mkl:build_defs.bzl", "mkl_repository")
load("//third_party:repo.bzl", "tf_http_archive")

def tf_repositories(path_prefix = "", tf_repo_name = ""):
"""All external dependencies for TF builds."""
mkl_repository(
        name = "mkl_linux",
        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
        sha256 = "a936d6b277a33d2a027a024ea8e65df62bd2e162c7ca52c48486ed9d5dc27160",
        strip_prefix = "mklml_lnx_2019.0.5.20190502",
        urls = [
            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.20-rc/mklml_lnx_2019.0.5.20190502.tgz",
            "https://github.com/intel/mkl-dnn/releases/download/v0.20-rc/mklml_lnx_2019.0.5.20190502.tgz",
        ],
    )
    mkl_repository(
        name = "mkl_windows",
        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
        sha256 = "535857b17643d7f7546b58fc621244e7cfcc4fff2aa2ebd3fc5b4e126bfc36cf",
        strip_prefix = "mklml_win_2019.0.5.20190502",
        urls = [
            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.20-rc/mklml_win_2019.0.5.20190502.zip",
            "https://github.com/intel/mkl-dnn/releases/download/v0.20-rc/mklml_win_2019.0.5.20190502.zip",
        ],
    )
    mkl_repository(
        name = "mkl_darwin",
        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
        sha256 = "2fbb71a0365d42a39ea7906568d69b1db3bfc9914fee75eedb06c5f32bf5fa68",
        strip_prefix = "mklml_mac_2019.0.5.20190502",
        urls = [
            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.20-rc/mklml_mac_2019.0.5.20190502.tgz",
            "https://github.com/intel/mkl-dnn/releases/download/v0.20-rc/mklml_mac_2019.0.5.20190502.tgz",
        ],
    )
    # Important: If you are upgrading MKL-DNN, then update the version numbers
    # in third_party/mkl_dnn/mkldnn.BUILD. In addition, the new version of
    # MKL-DNN might require upgrading MKL ML libraries also. If they need to be
    # upgraded then update the version numbers on all three versions above
    # (Linux, Mac, Windows).
    tf_http_archive(
        name = "mkl_dnn",
        build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"),
        sha256 = "a198a9bd3c584607e6a467f780beca92c8411cd656fcc8ec6fa5abe73d4af823",
        strip_prefix = "mkl-dnn-0.20.3",
        urls = [
            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/archive/v0.20.3.tar.gz",
            "https://github.com/intel/mkl-dnn/archive/v0.20.3.tar.gz",
        ],
    )

    tf_http_archive(
        name = "mkl_dnn_v1",
        build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"),
        sha256 = "fcc2d951f7170eade0cfdd0d8d1d58e3e7785bd326bca6555f3722f8cba71811",
        strip_prefix = "mkl-dnn-1.0-pc2",
        urls = [
            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/archive/v1.0-pc2.tar.gz",
            "https://github.com/intel/mkl-dnn/archive/v1.0-pc2.tar.gz",
        ],
    )

mkl_repository 在文件//third_party/mkl:build_defs.bzl 中定义：

_TF_MKL_ROOT = "TF_MKL_ROOT"

def _enable_local_mkl(repository_ctx):
    return _TF_MKL_ROOT in repository_ctx.os.environ
    
def _mkl_autoconf_impl(repository_ctx):
    """Implementation of the local_mkl_autoconf repository rule."""

    if _enable_local_mkl(repository_ctx):
        # Symlink lib and include local folders.
        mkl_root = repository_ctx.os.environ[_TF_MKL_ROOT]
        mkl_lib_path = "%s/lib" % mkl_root
        repository_ctx.symlink(mkl_lib_path, "lib")
        mkl_include_path = "%s/include" % mkl_root
        repository_ctx.symlink(mkl_include_path, "include")
        mkl_license_path = "%s/license.txt" % mkl_root
        repository_ctx.symlink(mkl_license_path, "license.txt")
    else:
        # setup remote mkl repository.
        repository_ctx.download_and_extract(
            repository_ctx.attr.urls,
            sha256 = repository_ctx.attr.sha256,
            stripPrefix = repository_ctx.attr.strip_prefix,
        )

    # Also setup BUILD file.
    repository_ctx.symlink(repository_ctx.attr.build_file, "BUILD")
    
mkl_repository = repository_rule(
    implementation = _mkl_autoconf_impl,
    environ = [
        _TF_MKL_ROOT,
    ],
    attrs = {
        "build_file": attr.label(),
        "urls": attr.string_list(default = []),
        "sha256": attr.string(default = ""),
        "strip_prefix": attr.string(default = ""),
    },
)

可以看出在这里定义了repository_rule， attrs 里面定义了build_file （这里这个文件主要是 mkl.BUILD 文件，在mkl.BUILD文件内定义了相关的操作library）。定义attrs ，就可以在implementation 的函数内部使用repository_ctx.attr.<attribute_name> 进行获取。而 repository_rule 的name，可以通过repository_ctx.name 得到。

Implementation 函数：
只有一个输入参数，repository_ctx，函数返回为None 表示给定指定参数后该规则是可复制的或者返回具有该规则的一组参数的字典，该字典会将该规则转换为可重复的规则，从而生成相同的存储库。例如，对于跟踪git存储库的规则，这意味着将返回特定的提交标识符，而不是最初指定的浮动分支。

输入参数repository_ctx可用于访问属性值和非密封功能（查找二进制文件，执行二进制文件，在存储库中创建文件或从Internet下载文件）。更多的函数参考链接

例如：

licenses(["notice"])  # 3-Clause BSD

exports_files(["license.txt"])
filegroup(
    name = "LICENSE",
    srcs = [
        "license.txt",
    ],
    visibility = ["//visibility:public"],
)
cc_library(
    name = "mkl_headers",
    srcs = glob(["include/*(.cc|.cpp|.cxx|.c++|.C|.c|.h|.hh|.hpp|.ipp|.hxx|.inc|.S|.s|.asm|.a|.lib|.pic.a|.lo|.lo.lib|.pic.lo|.so|.dylib|.dll|.o|.obj|.pic.o)"]),
    includes = ["include"],
    visibility = ["//visibility:public"],
)

在其他的BUILD文件，就可以通过 @mkl_linux//:mkl_headers进行访问，例如：

cc_library(
    name = "intel_binary_blob",
    visibility = ["//visibility:public"],
    deps = select({
        "@org_tensorflow//tensorflow:linux_x86_64": [
            "@mkl_linux//:mkl_headers",
            "@mkl_linux//:mkl_libs_linux",
        ],
        "@org_tensorflow//tensorflow:macos": [
            "@mkl_darwin//:mkl_headers",
            "@mkl_darwin//:mkl_libs_darwin",
        ],
        "@org_tensorflow//tensorflow:windows": [
            "@mkl_windows//:mkl_headers",
            "@mkl_windows//:mkl_libs_windows",
        ],
        "//conditions:default": [],
    }),
)

Python py_binary & py_library

py_binary

py_binary(name, deps, srcs, data, args, compatible_with, default_python_version, deprecation, distribs, features, imports, legacy_create_init, licenses, main, output_licenses, restricted_to, srcs_version, stamp, tags, testonly, toolchains, visibility)

这个rule是一个可执行的Python程序，包含了一些Python源码，一个*.runfiles的目录树内含了所有运行时所需的代码和数据，以及一个配置了初始化环境和数据的启动脚本。
在py_binary中可以依赖别的py_library。

py_binary(
    name = "foo",
    srcs = ["foo.py"],
    data = [":transform"],  # a cc_binary which we invoke at run time
    deps = [
        "//pyglib",
        ":foolib",  # a py_library
    ],
)

在别的非Python的library 中如果想放入py_binary那么可以放入data属性中：

py_binary(
    name = "test_main",
    srcs = ["test_main.py"],
    data = [":testing"],
)

cc_library(
    name = "testing",
    srcs = glob(["*.cc"]),
)

py_library

py_library(name, deps, srcs, data, compatible_with, deprecation, distribs, features, imports, licenses, restricted_to, srcs_version, tags, testonly, visibility)

参数介绍

name：唯一名字
deps：List of labels; optional依赖，可以是源码但是为了方便阅读还是放在srcs里好
srcs：List of labels; optional包含生成目标的源码
data：List of strings; optional运行时所需的文件
imports：List of strings; optional要加载的路径，加到PYTHONPATH中
srcs_version：String; optional; default is "PY2AND3"只是为了文档的目的，不会影响python解释器的版本。

filegroup

可以得到一个目标合集的名字吗，方便在其它的rule中使用

filegroup(name, srcs, data, compatible_with, deprecation, distribs, features, licenses, output_group, output_licenses, path, restricted_to, tags, testonly, visibility)

filegroup(
    name = "mygroup",
    srcs = [
        "a_file.txt",
        "some/subdirectory/another_file.txt",
    ],
)

filegroup(
    name = "exported_testdata",
    srcs = glob([
        "testdata/*.dat",
        "testdata/logs/**/*.log",
    ]),
)

cc_library(
    name = "my_library",
    srcs = ["foo.cc"],
    data = [
        "//my_package:exported_testdata",
        "//my_package:mygroup",
    ],
)

rule

rule 定义一个规则。输入文件，通过action操作，产生输出文件。
例子：

# This function attempts to append init_module_name to list of
# exported functions in version script
def _append_init_to_versionscript_impl(ctx):
    mod_name = ctx.attr.module_name
    if ctx.attr.is_version_script:
        ctx.actions.expand_template(
            template = ctx.file.template_file,
            output = ctx.outputs.versionscript,
            substitutions = {
                "global:": "global:\n     init_%s;\n     _init_%s;\n     PyInit_*;\n     _PyInit_*;" % (mod_name, mod_name),
            },
            is_executable = False,
        )
    else:
        ctx.actions.expand_template(
            template = ctx.file.template_file,
            output = ctx.outputs.versionscript,
            substitutions = {
                "*tensorflow*": "*tensorflow*\ninit_%s\n_init_%s\nPyInit_*\n_PyInit_*\n" % (mod_name, mod_name),
            },
            is_executable = False,
        )
        
_append_init_to_versionscript = rule(
    attrs = {
        "module_name": attr.string(mandatory = True),
        "template_file": attr.label(
            allow_single_file = True,
            mandatory = True,
        ),
        "is_version_script": attr.bool(
            default = True,
            doc = "whether target is a ld version script or exported symbol list",
            mandatory = False,
        ),
    },
    outputs = {"versionscript": "%{name}.lds"},
    implementation = _append_init_to_versionscript_impl,
)

genrule

这个rule的功能是使用用户自定义的bash命令，产生一个或者多个文件。

genrule(name, srcs, outs, cmd, compatible_with, deprecation, distribs, executable, features, licenses, local, message, output_licenses, output_to_bindir, restricted_to, tags, testonly, tools, visibility)

参数介绍：

参数介绍：
name：唯一名字
srcs：List of labels; optional源输入文件
outs：List of filenames; required; nonconfigurable输出文件列表，文件处于package内可以被label索引到。
cmd：String; required运行的cmd
    1、可以使用$(location)。
    2、outs中的文件名不包括在替换中，在cmd中需要以$(@D), $@, $(OUTS) or $(location output_name)方式出现。
    3、有了host配置后，环境变量共享库是可在cmd中用的。比如$(JAVA), $(JAVAC) and $(JAVABASE)。
    4、如果cmd执行后的返回值不是0那么就认为失败了。
executable：Boolean; optional; nonconfigurable; default is 0指示输出文件是否是可执行的。
tools：List of labels; optional表示rule依赖的tool，可以是已有的文件，也可以是一个target或者生成的文件。

使用建议：

1、输出应该是确定和封闭的。每次运行不会反生变化
2、广泛使用$(location)，来获取文件位置, $(location :name）
3、写common Skylark宏代码
4、确保推出代码可以正确只是genrule的成功或失败
5、不要写print，一个成功的genrule应该是无打印的
6、$$表示$，为了避免和shell命令冲突，ls $(dirname $x)应写成ls $$(dirname $$x)。
7、避免在genrule中创建软连接以及文件夹。因为Bazel不会复制和检查这些。
8、引用genrule的时候，可以使用genrule的label或者每个genrule输出的label。这两种方法根据实际需要来。
9、genrule 会主动执行，可以用在srcs里面，或者cmd里面类似上面2的用法。

下面这个例子是调用perl脚本来产生一个foo.h文件。

genrule(
    name = "foo",
    srcs = [],
    outs = ["foo.h"],
    cmd = "./$(location create_foo.pl) > \"$@\"",
    tools = ["create_foo.pl"],
)

接下来的例子是使用filegroup和另一个genrule产生的文件作为src：

genrule(
    name = "concat_all_files",
    srcs = [
        "//some:files",  # a filegroup with multiple files in it ==> $(locations)
        "//other:gen",   # a genrule with a single output ==> $(location)
    ],
    outs = ["concatenated.txt"],
    cmd = "cat $(locations //some:files) $(location //other:gen) > $@",
)

用法9：genrule 会主动执行，可以用在srcs里面

genrule(
    name = "wuyongyu_test",
    outs = ["output.py"],
    cmd = "touch $@",
)


py_binary(
    name = "main_build",
    srcs = ["platform/main_build.py",
            ":output.py"], # 直接引用genrule输出文件
    python_version = "PY3",
    srcs_version = "PY2AND3",
    deps = [
            ":_pywrap_cost_analyzer",
        ],
)