diff --git a/.bazelrc b/.bazelrc index 3cabad0b8ca6..3035b0beb395 100644 --- a/.bazelrc +++ b/.bazelrc @@ -10,15 +10,18 @@ common --override_module=semmle_code=%workspace%/misc/bazel/semmle_code_stub build --repo_env=CC=clang --repo_env=CXX=clang++ -build:linux --cxxopt=-std=c++20 +build:linux --cxxopt=-std=c++20 --host_cxxopt=-std=c++20 # we currently cannot built the swift extractor for ARM -build:macos --cxxopt=-std=c++20 --copt=-arch --copt=x86_64 --linkopt=-arch --linkopt=x86_64 -build:windows --cxxopt=/std:c++20 --cxxopt=/Zc:preprocessor +build:macos --cxxopt=-std=c++20 --host_cxxopt=-std=c++20 --copt=-arch --copt=x86_64 --linkopt=-arch --linkopt=x86_64 +build:windows --cxxopt=/std:c++20 --cxxopt=/Zc:preprocessor --host_cxxopt=/std:c++20 --host_cxxopt=/Zc:preprocessor # this requires developer mode, but is required to have pack installer functioning startup --windows_enable_symlinks common --enable_runfiles +# with the above, we can avoid building python zips which is the default on windows as that's expensive +build --nobuild_python_zip + common --registry=file:///%workspace%/misc/bazel/registry common --registry=https://bcr.bazel.build diff --git a/.gitattributes b/.gitattributes index c58cda655f36..22bd4746ab7c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -80,3 +80,9 @@ csharp/paket.lock linguist-generated=true csharp/.paket/Paket.Restore.targets linguist-generated=true eol=crlf csharp/paket.main.bzl linguist-generated=true csharp/paket.main_extension.bzl linguist-generated=true + +# ripunzip tool +/misc/bazel/internal/ripunzip/ripunzip-* filter=lfs diff=lfs merge=lfs -text + +# swift prebuilt resources +/swift/third_party/resource-dir/*.zip filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/zipmerge-test.yml b/.github/workflows/zipmerge-test.yml new file mode 100644 index 000000000000..edae93a90a00 --- /dev/null +++ b/.github/workflows/zipmerge-test.yml @@ -0,0 +1,23 @@ +name: "Test zipmerge code" + +on: + pull_request: + paths: + - "misc/bazel/internal/zipmerge/**" + - "MODULE.bazel" + - ".bazelrc*" + branches: + - main + - "rc/*" + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - run: | + bazel test //misc/bazel/internal/zipmerge:test --test_output=all diff --git a/MODULE.bazel b/MODULE.bazel index 424312f828ef..dff694ce0d86 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -24,6 +24,7 @@ bazel_dep(name = "nlohmann_json", version = "3.11.3", repo_name = "json") bazel_dep(name = "fmt", version = "10.0.0") bazel_dep(name = "gazelle", version = "0.36.0") bazel_dep(name = "rules_dotnet", version = "0.15.1") +bazel_dep(name = "googletest", version = "1.14.0.bcr.1") bazel_dep(name = "buildifier_prebuilt", version = "6.4.0", dev_dependency = True) @@ -67,6 +68,36 @@ use_repo(node, "nodejs", "nodejs_toolchains") go_sdk = use_extension("@rules_go//go:extensions.bzl", "go_sdk") go_sdk.download(version = "1.22.2") +lfs_files = use_repo_rule("//misc/bazel:lfs.bzl", "lfs_files") + +lfs_files( + name = "ripunzip-linux", + srcs = ["//misc/bazel/internal/ripunzip:ripunzip-linux"], + executable = True, +) + +lfs_files( + name = "ripunzip-windows", + srcs = ["//misc/bazel/internal/ripunzip:ripunzip-windows.exe"], + executable = True, +) + +lfs_files( + name = "ripunzip-macos", + srcs = ["//misc/bazel/internal/ripunzip:ripunzip-macos"], + executable = True, +) + +lfs_files( + name = "swift-resource-dir-linux", + srcs = ["//swift/third_party/resource-dir:resource-dir-linux.zip"], +) + +lfs_files( + name = "swift-resource-dir-macos", + srcs = ["//swift/third_party/resource-dir:resource-dir-macos.zip"], +) + register_toolchains( "@nodejs_toolchains//:all", ) diff --git a/misc/bazel/internal/BUILD.bazel b/misc/bazel/internal/BUILD.bazel index e69de29bb2d1..d9663e7f0c01 100644 --- a/misc/bazel/internal/BUILD.bazel +++ b/misc/bazel/internal/BUILD.bazel @@ -0,0 +1 @@ +exports_files(["install.py"]) diff --git a/misc/bazel/internal/install.py b/misc/bazel/internal/install.py new file mode 100644 index 000000000000..f1c1a410e0c6 --- /dev/null +++ b/misc/bazel/internal/install.py @@ -0,0 +1,55 @@ +""" +Helper script for installing `codeql_pack` targets. + +This mainly wraps around a `pkg_install` script from `rules_pkg` adding: +* resolving destination directory with respect to a provided `--build-file` +* clean-up of target destination directory before a reinstall +* installing imported zip files using a provided `--ripunzip` +""" + +import argparse +import pathlib +import shutil +import subprocess +from python.runfiles import runfiles + +runfiles = runfiles.Create() +assert runfiles, "Installer should be run with `bazel run`" + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument("--destdir", type=pathlib.Path, required=True, + help="Desination directory, relative to `--build-file`") +parser.add_argument("--pkg-install-script", required=True, + help="The wrapped `pkg_install` installation script rlocation") +parser.add_argument("--build-file", required=True, + help="BUILD.bazel rlocation relative to which the installation should take place") +parser.add_argument("--ripunzip", + help="ripunzip executable rlocation. Must be provided if `--zip-manifest` is.") +parser.add_argument("--zip-manifest", + help="The rlocation of a file containing newline-separated `prefix:zip_file` entries") +parser.add_argument("--cleanup", action=argparse.BooleanOptionalAction, default=True, + help="Whether to wipe the destination directory before installing (true by default)") +opts = parser.parse_args() +if opts.zip_manifest and not opts.ripunzip: + parser.error("Provide `--ripunzip` when specifying `--zip-manifest`") + +build_file = runfiles.Rlocation(opts.build_file) +script = runfiles.Rlocation(opts.pkg_install_script) +destdir = pathlib.Path(build_file).resolve().parent / opts.destdir + +if destdir.exists() and opts.cleanup: + shutil.rmtree(destdir) + +destdir.mkdir(parents=True, exist_ok=True) +subprocess.run([script, "--destdir", destdir], check=True) + +if opts.zip_manifest: + ripunzip = runfiles.Rlocation(opts.ripunzip) + zip_manifest = runfiles.Rlocation(opts.zip_manifest) + with open(zip_manifest) as manifest: + for line in manifest: + prefix, _, zip = line.partition(":") + assert zip, f"missing prefix for {prefix}, you should use prefix:zip format" + dest = destdir / prefix + dest.mkdir(parents=True, exist_ok=True) + subprocess.run([ripunzip, "unzip-file", zip, "-d", dest], check=True) diff --git a/misc/bazel/internal/ripunzip/BUILD.bazel b/misc/bazel/internal/ripunzip/BUILD.bazel new file mode 100644 index 000000000000..74ec7a86e8d5 --- /dev/null +++ b/misc/bazel/internal/ripunzip/BUILD.bazel @@ -0,0 +1,8 @@ +load("@bazel_skylib//rules:native_binary.bzl", "native_binary") + +native_binary( + name = "ripunzip", + src = select({"@platforms//os:" + os: "@ripunzip-" + os for os in ("linux", "windows", "macos")}), + out = "ripunzip.exe", + visibility = ["//visibility:public"], +) diff --git a/misc/bazel/internal/ripunzip/LICENSE.txt b/misc/bazel/internal/ripunzip/LICENSE.txt new file mode 100644 index 000000000000..7cb2473fa896 --- /dev/null +++ b/misc/bazel/internal/ripunzip/LICENSE.txt @@ -0,0 +1,236 @@ +This software is distributed under the terms of both the MIT license and the +Apache License (Version 2.0). + + +MIT license + +Copyright 2022 Google LLC + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + + +Apache 2 license + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/misc/bazel/internal/ripunzip/README.md b/misc/bazel/internal/ripunzip/README.md new file mode 100644 index 000000000000..4312f00a9b0b --- /dev/null +++ b/misc/bazel/internal/ripunzip/README.md @@ -0,0 +1,2 @@ +These LFS files are distributions of [ripunzip](https://github.com/google/ripunzip), compiled with this [workflow](https://github.com/github/codeql/actions/workflows/build-ripunzip.yml). +A [copy](./LICENSE.txt) of the ripunzip license is included. diff --git a/misc/bazel/internal/ripunzip/ripunzip-linux b/misc/bazel/internal/ripunzip/ripunzip-linux new file mode 100755 index 000000000000..356063894609 --- /dev/null +++ b/misc/bazel/internal/ripunzip/ripunzip-linux @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5e444b6efcb11e899ff932dc5846927dd78578d0889386d82aa21133e077fde +size 12423064 diff --git a/misc/bazel/internal/ripunzip/ripunzip-macos b/misc/bazel/internal/ripunzip/ripunzip-macos new file mode 100755 index 000000000000..d80eeea06670 --- /dev/null +++ b/misc/bazel/internal/ripunzip/ripunzip-macos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8ff604d47ec88c4a795d307dee9454771589e8bd0b9747c6f49d2a59081f829 +size 10632454 diff --git a/misc/bazel/internal/ripunzip/ripunzip-windows.exe b/misc/bazel/internal/ripunzip/ripunzip-windows.exe new file mode 100755 index 000000000000..44727f650dbd --- /dev/null +++ b/misc/bazel/internal/ripunzip/ripunzip-windows.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e6b68c668a84d1232335524f9ca15dff61f7365ec16d57caa9763fda145f33d +size 4548096 diff --git a/misc/bazel/internal/zipmerge/.clang-format b/misc/bazel/internal/zipmerge/.clang-format new file mode 100644 index 000000000000..ca0a3afd986d --- /dev/null +++ b/misc/bazel/internal/zipmerge/.clang-format @@ -0,0 +1,7 @@ +BasedOnStyle: Chromium +ColumnLimit: 100 +IndentWidth: 2 +SortIncludes: false +AllowShortIfStatementsOnASingleLine: WithoutElse +AlwaysBreakBeforeMultilineStrings: false +Standard: c++20 diff --git a/misc/bazel/internal/zipmerge/BUILD.bazel b/misc/bazel/internal/zipmerge/BUILD.bazel new file mode 100644 index 000000000000..cae83d529211 --- /dev/null +++ b/misc/bazel/internal/zipmerge/BUILD.bazel @@ -0,0 +1,31 @@ +cc_library( + name = "lib", + srcs = [ + "zipmerge.cpp", + ], + hdrs = ["zipmerge.h"], +) + +cc_binary( + name = "zipmerge", + srcs = [ + "zipmerge_main.cpp", + ], + visibility = ["//visibility:public"], + deps = [ + ":lib", + ], +) + +cc_test( + name = "test", + size = "small", + srcs = ["zipmerge_test.cpp"], + data = glob(["test-files/*"]), + linkstatic = True, # required to build the test in the internal repo + deps = [ + ":lib", + "@bazel_tools//tools/cpp/runfiles", + "@googletest//:gtest_main", + ], +) diff --git a/misc/bazel/internal/zipmerge/test-files/almost-minimal.zip b/misc/bazel/internal/zipmerge/test-files/almost-minimal.zip new file mode 100644 index 000000000000..44541199b782 Binary files /dev/null and b/misc/bazel/internal/zipmerge/test-files/almost-minimal.zip differ diff --git a/misc/bazel/internal/zipmerge/test-files/directory-partial.zip b/misc/bazel/internal/zipmerge/test-files/directory-partial.zip new file mode 100644 index 000000000000..2c65f48180a6 Binary files /dev/null and b/misc/bazel/internal/zipmerge/test-files/directory-partial.zip differ diff --git a/misc/bazel/internal/zipmerge/test-files/directory.zip b/misc/bazel/internal/zipmerge/test-files/directory.zip new file mode 100644 index 000000000000..046f0a963b9f Binary files /dev/null and b/misc/bazel/internal/zipmerge/test-files/directory.zip differ diff --git a/misc/bazel/internal/zipmerge/test-files/empty.zip b/misc/bazel/internal/zipmerge/test-files/empty.zip new file mode 100644 index 000000000000..15cb0ecb3e21 Binary files /dev/null and b/misc/bazel/internal/zipmerge/test-files/empty.zip differ diff --git a/misc/bazel/internal/zipmerge/test-files/footers.jar b/misc/bazel/internal/zipmerge/test-files/footers.jar new file mode 100644 index 000000000000..0ed06d829770 Binary files /dev/null and b/misc/bazel/internal/zipmerge/test-files/footers.jar differ diff --git a/misc/bazel/internal/zipmerge/test-files/minimal-x3.zip b/misc/bazel/internal/zipmerge/test-files/minimal-x3.zip new file mode 100644 index 000000000000..a9854656fb04 Binary files /dev/null and b/misc/bazel/internal/zipmerge/test-files/minimal-x3.zip differ diff --git a/misc/bazel/internal/zipmerge/test-files/minimal.zip b/misc/bazel/internal/zipmerge/test-files/minimal.zip new file mode 100644 index 000000000000..6948de0b3e65 Binary files /dev/null and b/misc/bazel/internal/zipmerge/test-files/minimal.zip differ diff --git a/misc/bazel/internal/zipmerge/test-files/no-footers.jar b/misc/bazel/internal/zipmerge/test-files/no-footers.jar new file mode 100644 index 000000000000..781e3bd5d72d Binary files /dev/null and b/misc/bazel/internal/zipmerge/test-files/no-footers.jar differ diff --git a/misc/bazel/internal/zipmerge/zipmerge.cpp b/misc/bazel/internal/zipmerge/zipmerge.cpp new file mode 100644 index 000000000000..6a663ad7bbc5 --- /dev/null +++ b/misc/bazel/internal/zipmerge/zipmerge.cpp @@ -0,0 +1,529 @@ +/* + Utility for munging zip files. + + The high-level pseudo-code is: + for each input zip Z: + for each file F in Z: + F.name = adjust(F.name) + if F.name should be included: + write F to the output zip + + File inclusion testing consists of two parts: + 1. Don't include anything matching an explicit removal list. + 2. If the same filename occurs in multiple input zips, only include the file from the last input + zip. + + Filename adjustment consists of optionally prepending a prefix to the filename. +*/ + +#include "misc/bazel/internal/zipmerge/zipmerge.h" + +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#define unlink(s) DeleteFileA(s) +#else +#include +#include +#include +#include +#include +#endif + +#include + +namespace { +struct { + FILE* file; + uint32_t num_bytes_written; + uint16_t num_files_written; +} output_zip{}; // The zip file being written. + +struct { + uint8_t* bytes; + uint16_t length; +} filename_prefix{}; // A string to prepend to all filenames added to the output file. + +constexpr size_t maximum_input_files = 1000; +struct { + int count; + struct { + const char* prefix; + const char* name; + } entries[maximum_input_files]; +} input_files; // A list of input zip files. + +static bool verbose; // If true, more things are written to stdout. +static const char* output_file_name; // The name of the output zip file. +static const char* + current_input_file_name; // The name of the current input zip file (used for diagnostics). + +constexpr size_t filename_hash_table_size = 0x20000; +typedef struct { + uint32_t hash; + uint32_t len; + const uint8_t* data; +} hash_entry_t; + +// A hash set containing the name of everything so far written to the output file. +static hash_entry_t filename_hash_table[filename_hash_table_size]; + +constexpr size_t maximum_removals = 1000; +struct removal_entry { + // A removal entry can either be a literal string, or a wildcard containing a single "*". + // In the former case, the literal string is called the head. In the latter case, the + // segment before the "*" is called the head, and the segment after the "*" is called the tail. + uint32_t head_len; + uint32_t tail_len; // zero for literal removals, possibly zero for wildcard removals + const uint8_t* head; + const uint8_t* tail; // NULL for literal removals, non-NULL for wildcard removals +}; + +struct { + int count; + removal_entry entries[maximum_removals]; +} removals; // A list of files and directories to ignore in input files. + +// Sizes and signatures of zip file structures (central-directory, local-file-header, +// end-of-central-directory). +constexpr size_t cd_size = 46; +constexpr std::string_view cd_signature = "\x50\x4b\x01\x02"; +constexpr size_t lfh_size = 30; +constexpr std::string_view lfh_signature = "\x50\x4b\x03\x04"; +constexpr size_t eocd_size = 22; + +// Write the bytes [src, src + len) to the output file. +void append_data(const uint8_t* src, uint32_t len) { + if (fwrite(src, 1, len, output_zip.file) != len) { + printf("Error: Could not write %lu bytes to output file.\n", (unsigned long)len); + exit(1); + } + uint32_t new_output_size = output_zip.num_bytes_written + len; + if (new_output_size < output_zip.num_bytes_written) { + printf("Error: Output zip file exceeds 4 gigabytes.\n"); + exit(1); + } + output_zip.num_bytes_written = new_output_size; +} +} // namespace + +void append_cd(const uint8_t* src, uint32_t len) { + if ((output_cd.capacity - output_cd.length) < len) { + uint32_t new_capacity; + uint8_t* new_data; + + new_capacity = output_cd.capacity + (output_cd.capacity >> 1); + if (new_capacity < output_cd.length + len) new_capacity = output_cd.length + len; + new_data = (uint8_t*)realloc(output_cd.bytes, new_capacity); + if (!new_data) { + printf("Error: Could not grow central-directory buffer from %lu bytes to %lu bytes.\n", + (unsigned long)output_cd.capacity, (unsigned long)new_capacity); + exit(1); + } + output_cd.bytes = new_data; + output_cd.capacity = new_capacity; + } + memcpy(output_cd.bytes + output_cd.length, src, len); + output_cd.length += len; +} + +namespace { +// Copy a local-file-header and accompanying file data from an input file to the output file. +// The input file is [input_file, input_file + input_file_len). +// The offset within the input file of the local-file-header is given by lfh_offset. +// The central-directory entry corresponding to the file is given by cd. +void copy_file_data(const uint8_t* input_file, + size_t lfh_offset, + const uint8_t* cd, + size_t input_file_len) { + if (lfh_offset >= input_file_len || (size_t)(input_file_len - lfh_offset) < lfh_size) { + printf("Error: %s is invalid; central-directory references local-file-header at offset %llu, " + "but file is only %llu bytes.\n", + current_input_file_name, (unsigned long long)lfh_offset, + (unsigned long long)input_file_len); + exit(1); + } + + const uint8_t* lfh = input_file + lfh_offset; + if (memcmp(lfh, lfh_signature.data(), lfh_signature.size()) != 0) { + printf("Error: Expected local-file-header signature at offset %llu of %s, but instead got %02x " + "%02x %02x %02x.\n", + (unsigned long long)lfh_offset, current_input_file_name, lfh[0], lfh[1], lfh[2], lfh[3]); + exit(1); + } + + size_t data_offset = lfh_offset + lfh_size; + uint16_t name_len = read2(lfh + 26); + uint16_t extra_len = read2(lfh + 28); + uint32_t data_len = read4(cd + 20); + append_data(lfh, 6); // signature, version + // flags, compression, mod-time, mod-date, crc-32, compressed-size, uncompressed-size, name-len + append_data(cd + 8, 22); + append_data(lfh + 28, 2); // extra-len + + size_t total_variable_len = (size_t)name_len + (size_t)extra_len + (size_t)data_len; + if ((size_t)(input_file_len - data_offset) < total_variable_len) { + printf( + "Error: %s is invalid; starting at offset %llu, reading a filename of %u bytes, extra data " + "of %u bytes, and %lu bytes of compressed data would exceed file size of %llu bytes.\n", + current_input_file_name, (unsigned long long)data_offset, (unsigned)name_len, + (unsigned)extra_len, (unsigned long)data_len, (unsigned long long)input_file_len); + exit(1); + } + append_data(filename_prefix.bytes, filename_prefix.length); + append_data(input_file + data_offset, (uint32_t)total_variable_len); +} + +bool removal_entry_matches(const struct removal_entry* re, const uint8_t* full_name, uint32_t len) { + if (len < re->head_len + re->tail_len) { + return false; + } + if (memcmp(full_name, re->head, re->head_len) != 0) { + return false; + } + if (re->tail) { + for (uint32_t i = re->head_len + re->tail_len;; ++i) { + if (len == i || full_name[i] == '/') { + if (memcmp(full_name + i - re->tail_len, re->tail, re->tail_len) == 0) { + return true; + } + } + if (len == i || full_name[i - re->tail_len] == '/') { + return false; + } + } + } else { + return len == re->head_len || full_name[re->head_len] == '/'; + } +} +} // namespace + +bool should_include_filename_now(const uint8_t* name, uint32_t len) { + uint8_t* full_name = (uint8_t*)malloc(filename_prefix.length + len + 1); + memcpy(full_name, filename_prefix.bytes, filename_prefix.length); + memcpy(full_name + filename_prefix.length, name, len); + len += filename_prefix.length; + + for (int i = 0; i < removals.count; ++i) { + if (removal_entry_matches(&removals.entries[i], full_name, len)) { + free(full_name); + return false; + } + } + + uint32_t hash = 5381; + for (uint32_t i = 0; i < len; ++i) + hash = hash * 33 ^ full_name[i]; + + for (uint32_t idx = hash;; ++idx) { + hash_entry_t* e = filename_hash_table + (idx & (filename_hash_table_size - 1)); + if (e->hash == hash && e->len == len && memcmp(e->data, full_name, len) == 0) { + free(full_name); + return false; + } else if (e->data == NULL) { + e->hash = hash; + e->len = len; + e->data = full_name; + return true; + } + } +} + +// Try to find the end-of-central-directory record in a zip file. +const uint8_t* find_eocd(const uint8_t* input_file, size_t input_file_len) { + for (size_t i = eocd_size; i < 1024 + eocd_size && i <= input_file_len; ++i) { + const uint8_t* candidate = input_file + input_file_len - i; + if (memcmp(candidate, eocd_signature.data(), eocd_signature.size()) == 0) { + return candidate; + } + } + return NULL; +} + +namespace { +// Copy all appropriate files from an input zip to the output zip. +void process_input_file(const uint8_t* input_file, size_t input_file_len) { + const uint8_t* eocd = find_eocd(input_file, input_file_len); + if (!eocd) { + printf("Error: Could not find end-of-central-directory in %s.\n", current_input_file_name); + exit(1); + } + if (read2(eocd + 4) != 0 || read2(eocd + 6) != 0) { + printf("Error: %s is split over multiple disks, which is not supported.\n", + current_input_file_name); + exit(1); + } + if (!(uint16_t)~read2(eocd + 8) || !(uint16_t)~read2(eocd + 10) || !~read4(eocd + 12) || + !~read4(eocd + 16)) { + printf("Error: %s is zip64, which is not supported.\n", current_input_file_name); + exit(1); + } + uint16_t num_entries = read2(eocd + 10); + size_t cd_offset = read4(eocd + 16); + + for (uint16_t i = 0; i < num_entries; ++i) { + uint8_t cd[cd_size]; + if (cd_offset >= input_file_len || (size_t)(input_file_len - cd_offset) < sizeof(cd)) { + printf("Error: %s is invalid; central-directory %u/%u would start at offset %llu, but file " + "is only %llu bytes.\n", + current_input_file_name, (unsigned)i, (unsigned)num_entries, + (unsigned long long)cd_offset, (unsigned long long)input_file_len); + exit(1); + } + + memcpy(cd, input_file + cd_offset, sizeof(cd)); + if (memcmp(cd, cd_signature.data(), cd_signature.size()) != 0) { + printf("Error: Expected central-directory signature at offset %llu of %s, but instead got " + "%02x %02x %02x %02x.\n", + (unsigned long long)cd_offset, current_input_file_name, cd[0], cd[1], cd[2], cd[3]); + exit(1); + } + cd[8] &= 0xF7; // Clear the bit indicating that a local-file-footer follows the file data + cd_offset += sizeof(cd); + + uint16_t name_len = read2(cd + 28); + if (((uint32_t)name_len + (uint32_t)filename_prefix.length) > 0xFFFFU) { + printf("Error: Combining prefix of %.*s with filename of %.*s results in a filename which is " + "too long.\n", + (int)filename_prefix.length, (const char*)filename_prefix.bytes, (int)name_len, + (const char*)(input_file + cd_offset)); + exit(1); + } + write2(cd + 28, name_len + filename_prefix.length); + uint16_t extra_len = read2(cd + 30); + uint16_t comment_len = read2(cd + 32); + uint32_t offset = read4(cd + 42); + write4(cd + 42, output_zip.num_bytes_written); + if (!~offset || !~read4(cd + 20)) { + printf("Error: %s is zip64 (because of %.*s), which is not supported.\n", + current_input_file_name, (int)name_len, (const char*)(input_file + cd_offset)); + exit(1); + } + + size_t total_variable_len = (size_t)name_len + (size_t)extra_len + (size_t)comment_len; + if ((size_t)(input_file_len - cd_offset) < total_variable_len) { + printf("Error: %s is invalid; starting at offset %llu, reading a filename of %u bytes, extra " + "data of %u bytes, and comment of %u bytes exceed file size of %llu bytes.\n", + current_input_file_name, (unsigned long long)offset, (unsigned)name_len, + (unsigned)extra_len, (unsigned)comment_len, (unsigned long long)input_file_len); + exit(1); + } + + bool should_include = should_include_filename_now(input_file + cd_offset, name_len); + if (verbose) { + printf("%s %.*s from %s\n", should_include ? "Using" : "Skipping", (int)name_len, + (const char*)(input_file + cd_offset), current_input_file_name); + } + if (should_include) { + append_cd(cd, sizeof(cd)); + append_cd(filename_prefix.bytes, filename_prefix.length); + append_cd(input_file + cd_offset, (uint32_t)total_variable_len); + copy_file_data(input_file, offset, cd, input_file_len); + if (output_zip.num_files_written == 0xFFFFU) { + printf("Error: Too many files in output zip.\n"); + exit(1); + } + ++output_zip.num_files_written; + } + cd_offset += total_variable_len; + } +} + +// Read a file into memory and pass it to process_input_file. +void read_and_process_input_file(const char* filename) { +#ifdef _WIN32 + HANDLE file = CreateFileA(filename, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, NULL); + if (file == INVALID_HANDLE_VALUE) { + printf("Error: Cannot open %s for reading.\n", filename); + exit(1); + } + LARGE_INTEGER size; + if (!GetFileSizeEx(file, &size)) { + printf("Error: Cannot determine size of %s.\n", filename); + exit(1); + } + if (size.HighPart != 0) { + printf("Error: Input file %s exceeds 4 gigabytes.\n", filename); + exit(1); + } + if (size.LowPart == 0) { + printf("Error: Input file %s is empty.\n", filename); + exit(1); + } + HANDLE mapping = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, size.LowPart, NULL); + if (mapping == NULL) { + printf("Error: Cannot mmap %s (CreateFileMapping).\n", filename); + exit(1); + } + void* data = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, size.LowPart); + if (data == NULL) { + printf("Error: Cannot mmap %s (MapViewOfFile).\n", filename); + exit(1); + } + process_input_file((uint8_t*)data, size.LowPart); + UnmapViewOfFile(data); + CloseHandle(mapping); + CloseHandle(file); +#else + int file = open(filename, O_RDONLY); + if (file == -1) { + printf("Error: Cannot open %s for reading.\n", filename); + exit(1); + } + struct stat st; + if (fstat(file, &st) == -1) { + printf("Error: Cannot stat %s.\n", filename); + exit(1); + } + void* data = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, file, 0); + if (data == MAP_FAILED) { + printf("Error: Cannot mmap %s.\n", filename); + exit(1); + } + process_input_file((uint8_t*)data, st.st_size); + munmap(data, st.st_size); + close(file); +#endif +} + +// Print usage information and exit. +void usage_and_exit(const char** argv) { + printf("Usage: %s [-v|--verbose] [--remove=FILE] outfile.zip [--prefix=PREFIX] infile1.zip " + "[--prefix=PREFIX] infile2.zip ...\n", + argv[0]); + exit(1); +} + +// Set filename_prefix based on a string from the command line. +void set_filename_prefix(const char* prefix) { + free(filename_prefix.bytes); + filename_prefix.bytes = NULL; + filename_prefix.length = 0; + + if (prefix == NULL) { + return; + } + if (*prefix == '/' || *prefix == '\\') { + ++prefix; + } + size_t len = strlen(prefix); + if (len == 0) { + return; + } + + filename_prefix.bytes = (uint8_t*)malloc(len + 1); + memcpy(filename_prefix.bytes, prefix, len); + for (size_t i = 0; i < len; ++i) { + if (filename_prefix.bytes[i] == '\\') filename_prefix.bytes[i] = '/'; + } + filename_prefix.bytes[len] = '/'; + filename_prefix.length = (uint16_t)(len + 1); +} + +// Set various global variables based on the command line. +void parse_command_line(int argc, const char** argv) { + int i = 1; + for (; i < argc; ++i) { + const char* arg = argv[i]; + if (strcmp(arg, "-v") == 0 || strcmp(arg, "--verbose") == 0) { + verbose = true; + } else if (strncmp(arg, "--remove=", 9) == 0) { + arg += 9; + if (*arg == '/' || *arg == '\\') ++arg; + if (removals.count == maximum_removals) { + printf("Error: Too many --remove flags.\n"); + exit(1); + } + const char* star = strchr(arg, '*'); + struct removal_entry* re = &removals.entries[removals.count++]; + if (star == NULL) { + re->head_len = (uint32_t)strlen(arg); + re->tail_len = 0; + re->head = (const uint8_t*)arg; + re->tail = NULL; + } else { + if (strchr(star + 1, '*')) { + printf("Error: At most one * is permitted per removal (%s).\n", arg); + exit(1); + } + re->head_len = (uint32_t)(star - arg); + re->tail_len = (uint32_t)strlen(star + 1); + re->head = (const uint8_t*)arg; + re->tail = (const uint8_t*)(star + 1); + } + ++removals.count; + } else { + break; + } + } + + if (i == argc) { + printf("Error: Missing output file name.\n"); + usage_and_exit(argv); + } + output_file_name = argv[i]; + ++i; + + const char* prefix = NULL; + for (; i < argc; ++i) { + const char* arg = argv[i]; + if (strncmp(arg, "--prefix=", 9) == 0) { + prefix = arg + 9; + } else { + if (input_files.count == maximum_input_files) { + printf("Error: Too many input files.\n"); + exit(1); + } + input_files.entries[input_files.count].prefix = prefix; + input_files.entries[input_files.count].name = arg; + ++input_files.count; + } + } + + if (input_files.count <= 0) { + printf("Error: Missing input file names.\n"); + usage_and_exit(argv); + } +} +} // namespace + +int zipmerge_main(int argc, const char** argv) { + parse_command_line(argc, argv); + + output_zip.file = fopen(output_file_name, "wb"); + if (!output_zip.file) { + printf("Error: Cannot open %s for writing.\n", output_file_name); + return 1; + } + + for (int i = input_files.count - 1; i >= 0; --i) { + set_filename_prefix(input_files.entries[i].prefix); + current_input_file_name = input_files.entries[i].name; + read_and_process_input_file(current_input_file_name); + } + + uint8_t eocd[eocd_size] = {0}; + memcpy(eocd, eocd_signature.data(), eocd_signature.size()); + write2(eocd + 8, output_zip.num_files_written); + write2(eocd + 10, output_zip.num_files_written); + write4(eocd + 12, output_cd.length); + write4(eocd + 16, output_zip.num_bytes_written); + append_data(output_cd.bytes, output_cd.length); + append_data(eocd, sizeof(eocd)); + fclose(output_zip.file); + return 0; +} + +void reset() { + memset(&output_zip, 0, sizeof(output_zip)); + memset(&filename_prefix, 0, sizeof(filename_prefix)); + memset(&output_cd, 0, sizeof(output_cd)); + memset(&input_files, 0, sizeof(input_files)); + memset(&filename_hash_table, 0, sizeof(filename_hash_table)); + memset(&removals, 0, sizeof(removals)); +} diff --git a/misc/bazel/internal/zipmerge/zipmerge.h b/misc/bazel/internal/zipmerge/zipmerge.h new file mode 100644 index 000000000000..096f080faf94 --- /dev/null +++ b/misc/bazel/internal/zipmerge/zipmerge.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include + +struct output_cd_t { + uint8_t* bytes; + uint32_t length; + uint32_t capacity; +}; + +inline output_cd_t output_cd{}; // An in-memory buffer in which the central-directory records for + // the output file are accumulated. + +// Read and write little-endian integers (as the only supported host platforms are little-endian, +// and all host platforms support unaligned memory accesses, these macros are currently very +// simple). +#define read2(ptr) (*(uint16_t*)(ptr)) +#define read4(ptr) (*(uint32_t*)(ptr)) +#define write2(ptr, val) (*(uint16_t*)(ptr) = (val)) +#define write4(ptr, val) (*(uint32_t*)(ptr) = (val)) + +// Add the bytes [src, src + len) to the output's central-directory. +void append_cd(const uint8_t* src, uint32_t len); + +// Test whether a given filename should be included in the output zip. +// Note that if a call returns true for a given filename, all future calls with the same filename +// will return false. +bool should_include_filename_now(const uint8_t* name, uint32_t len); + +inline constexpr std::string_view eocd_signature = "\x50\x4b\x05\x06"; +const uint8_t* find_eocd(const uint8_t* input_file, size_t input_file_len); + +int zipmerge_main(int argc, const char** argv); + +void reset(); diff --git a/misc/bazel/internal/zipmerge/zipmerge_main.cpp b/misc/bazel/internal/zipmerge/zipmerge_main.cpp new file mode 100644 index 000000000000..eeb9133d9c90 --- /dev/null +++ b/misc/bazel/internal/zipmerge/zipmerge_main.cpp @@ -0,0 +1,5 @@ +#include "misc/bazel/internal/zipmerge/zipmerge.h" + +int main(int argc, const char** argv) { + return zipmerge_main(argc, argv); +} diff --git a/misc/bazel/internal/zipmerge/zipmerge_test.cpp b/misc/bazel/internal/zipmerge/zipmerge_test.cpp new file mode 100644 index 000000000000..ee30b764a6d2 --- /dev/null +++ b/misc/bazel/internal/zipmerge/zipmerge_test.cpp @@ -0,0 +1,166 @@ +#include "misc/bazel/internal/zipmerge/zipmerge.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include "tools/cpp/runfiles/runfiles.h" + +using bazel::tools::cpp::runfiles::Runfiles; +using namespace std::string_literals; +namespace fs = std::filesystem; + +namespace codeql_testing { + +TEST(Zipmerge, ReadAndWrite) { + char buf[7] = {0}; + write2(buf + 1, 0xF2F1U); + write4(buf + 3, 0xF6F5F4F3UL); + EXPECT_STREQ(buf, "\x00\xF1\xF2\xF3\xF4\xF5\xF6"); + EXPECT_EQ(read2(buf + 1), 0xF2F1U); + EXPECT_EQ(read4(buf + 3), 0xF6F5F4F3UL); +} + +TEST(Zipmerge, AppendCd) { + output_cd.length = 0; + append_cd((const uint8_t*)"a", 1); + append_cd((const uint8_t*)"bcd", 3); + append_cd((const uint8_t*)"efghijklmno", 11); + EXPECT_EQ(output_cd.length, 15); + std::string_view bytes{reinterpret_cast(output_cd.bytes), 15}; + EXPECT_EQ(bytes, "abcdefghijklmno"); +} + +TEST(Zipmerge, ShouldIncludeFilenameNow) { + EXPECT_TRUE(should_include_filename_now((const uint8_t*)"x", 1)); + EXPECT_FALSE(should_include_filename_now((const uint8_t*)"x", 1)); + EXPECT_TRUE(should_include_filename_now((const uint8_t*)"y", 1)); + EXPECT_TRUE(should_include_filename_now((const uint8_t*)"yy", 2)); + EXPECT_FALSE(should_include_filename_now((const uint8_t*)"x", 1)); + EXPECT_FALSE(should_include_filename_now((const uint8_t*)"yy", 2)); +} + +TEST(Zipmerge, FindEocd) { + uint8_t buf[500] = {0}; + auto i = 0u; + for (auto& b : buf) { + b = i % 256; + } + memcpy(buf + 17, eocd_signature.data(), eocd_signature.size()); + memcpy(buf + 101, eocd_signature.data(), eocd_signature.size()); + EXPECT_EQ(find_eocd(buf, sizeof(buf)), buf + 101); +} + +std::string read_file(const std::string& filename) { + std::ifstream f(filename, std::ios::binary); + EXPECT_TRUE(f) << "Could not open '" << filename << "' (" << std::strerror(errno) << ")"; + if (!f) { + return 0; + } + std::stringstream contents; + contents << f.rdbuf(); + return contents.str(); +} + +std::string get_file(const char* name) { + static auto runfiles = [] { + std::string error; + auto ret = Runfiles::CreateForTest(&error); + EXPECT_TRUE(ret) << error; + return ret; + }(); + // this works from both `codeql` and the internal repository + for (auto prefix : {"_main", "codeql~"}) { + auto ret = runfiles->Rlocation(prefix + "/misc/bazel/internal/zipmerge/test-files/"s + name); + if (fs::exists(ret)) { + return ret; + } + } + EXPECT_TRUE(false) << "test file " << name << " not found"; + return ""; +} + +void expect_same_file(const char* actual, const char* expected) { + auto expected_file = get_file(expected); + auto actual_contents = read_file(actual); + unlink(actual); // If tests start failing, you might want to comment out this unlink in order to + // inspect the output. + ASSERT_EQ(actual_contents, read_file(expected_file)) + << "contents of " << actual << " do not match contents of " << expected_file; +} + +template +const char* zipmerge(Args*... inputs) { + reset(); + const char* output = nullptr; + std::vector args{"self"}; + std::array flags{{inputs...}}; + auto i = 0u; + for (; i < flags.size() && std::string_view{flags[i]}.starts_with("-"); ++i) { + args.push_back(flags[i]); + } + output = flags[i]; + args.push_back(output); + ++i; + for (; i < flags.size(); ++i) { + args.push_back(std::string_view{flags[i]}.starts_with("-") ? flags[i] : get_file(flags[i])); + } + std::vector argv; + std::transform(args.begin(), args.end(), std::back_inserter(argv), + [](const std::string& s) { return s.c_str(); }); + EXPECT_EQ(zipmerge_main(argv.size(), argv.data()), 0); + return output; +} + +TEST(Zipmerge, Identity) { + expect_same_file(zipmerge("out.zip", "directory.zip"), "directory.zip"); +} + +TEST(Zipmerge, Idempotent) { + expect_same_file(zipmerge("out.zip", "directory.zip", "directory.zip", "directory.zip"), + "directory.zip"); +} + +TEST(Zipmerge, RemoveEverything) { + expect_same_file(zipmerge("--remove=directory", "out.zip", "directory.zip"), "empty.zip"); +} + +TEST(Zipmerge, RemoveEverythingWildcard) { + expect_same_file(zipmerge("--remove=*ory", "out.zip", "directory.zip"), "empty.zip"); +} + +TEST(Zipmerge, RemovePrefixedPaths) { + expect_same_file(zipmerge("--remove=My/directory", "out.zip", "--prefix=My", "directory.zip"), + "empty.zip"); +} +TEST(Zipmerge, RemoveSome) { + expect_same_file( + zipmerge("--remove=directory/b.txt", "--remove=directory/c.txt", "out.zip", "directory.zip"), + "directory-partial.zip"); +} + +TEST(Zipmerge, RemoveSomeWildcard) { + expect_same_file(zipmerge("--remove=directory/b*t", "--remove=directory/c*", "--remove=dir*t", + "out.zip", "directory.zip"), + "directory-partial.zip"); +} + +TEST(Zipmerge, Prefix) { + expect_same_file( + zipmerge("out.zip", "minimal.zip", "--prefix=a", "minimal.zip", "--prefix=b", "minimal.zip"), + "minimal-x3.zip"); +} + +TEST(Zipmerge, InputFileOrder) { + expect_same_file(zipmerge("out.zip", "minimal.zip", "almost-minimal.zip"), "almost-minimal.zip"); +} + +TEST(Zipmerge, LocalFileFooters) { + expect_same_file(zipmerge("out.jar", "footers.jar"), "no-footers.jar"); +} +} // namespace codeql_testing diff --git a/misc/bazel/lfs.bzl b/misc/bazel/lfs.bzl index 3a496ea9530c..a068d76b2eae 100644 --- a/misc/bazel/lfs.bzl +++ b/misc/bazel/lfs.bzl @@ -1,4 +1,4 @@ -def lfs_smudge(repository_ctx, srcs, extract = False, stripPrefix = None): +def lfs_smudge(repository_ctx, srcs, *, extract = False, stripPrefix = None, executable = False): python = repository_ctx.which("python3") or repository_ctx.which("python") if not python: fail("Neither python3 nor python executables found") @@ -25,7 +25,7 @@ def lfs_smudge(repository_ctx, srcs, extract = False, stripPrefix = None): repository_ctx.symlink(src, src.basename) else: repository_ctx.report_progress("trying cache for remote %s" % src.basename) - res = repository_ctx.download([], src.basename, sha256 = info, allow_fail = True) + res = repository_ctx.download([], src.basename, sha256 = info, allow_fail = True, executable = executable) if not res.success: remote.append(src) if remote: @@ -33,7 +33,7 @@ def lfs_smudge(repository_ctx, srcs, extract = False, stripPrefix = None): for src, info in zip(remote, infos): sha256, _, url = info.partition(" ") repository_ctx.report_progress("downloading remote %s" % src.basename) - repository_ctx.download(url, src.basename, sha256 = sha256) + repository_ctx.download(url, src.basename, sha256 = sha256, executable = executable) if extract: for src in srcs: repository_ctx.report_progress("extracting %s" % src.basename) @@ -62,19 +62,20 @@ def _download_lfs(repository_ctx): if not dir.is_dir: fail("`dir` not a directory in @%s" % repository_ctx.name) srcs = [f for f in dir.readdir() if not f.is_dir] - lfs_smudge(repository_ctx, srcs) + lfs_smudge(repository_ctx, srcs, executable = repository_ctx.attr.executable) # with bzlmod the name is qualified with `~` separators, and we want the base name here name = repository_ctx.name.split("~")[-1] - repository_ctx.file("BUILD.bazel", """ -exports_files({files}) + basenames = [src.basename for src in srcs] + build = "exports_files(%s)\n" % repr(basenames) -filegroup( - name = "{name}", - srcs = {files}, - visibility = ["//visibility:public"], -) -""".format(name = name, files = repr([src.basename for src in srcs]))) + # add a main `name` filegroup only if it doesn't conflict with existing exported files + if name not in basenames: + build += 'filegroup(name = "%s", srcs = %s, visibility = ["//visibility:public"])\n' % ( + name, + basenames, + ) + repository_ctx.file("BUILD.bazel", build) lfs_archive = repository_rule( doc = "Export the contents from an on-demand LFS archive. The corresponding path should be added to be ignored " + @@ -98,5 +99,6 @@ lfs_files = repository_rule( "srcs": attr.label_list(doc = "Local paths to the LFS files to export."), "dir": attr.label(doc = "Local path to a directory containing LFS files to export. Only the direct contents " + "of the directory are exported"), + "executable": attr.bool(doc = "Whether files should be marked as executable"), }, ) diff --git a/misc/bazel/pkg.bzl b/misc/bazel/pkg.bzl index 0e01c4e3a9cc..fdfdb6be746b 100644 --- a/misc/bazel/pkg.bzl +++ b/misc/bazel/pkg.bzl @@ -1,4 +1,434 @@ +""" +Wrappers and helpers around `rules_pkg` to build codeql packs. +""" + +load("@rules_pkg//pkg:install.bzl", "pkg_install") +load("@rules_pkg//pkg:mappings.bzl", "pkg_attributes", "pkg_filegroup", "pkg_files", _strip_prefix = "strip_prefix") +load("@rules_pkg//pkg:pkg.bzl", "pkg_zip") load("@rules_pkg//pkg:providers.bzl", "PackageFilegroupInfo", "PackageFilesInfo") +load("@rules_python//python:defs.bzl", "py_binary") + +def _make_internal(name): + def internal(suffix = "internal", *args): + args = (name, suffix) + args + return "-".join(args) + + return internal + +_PLAT_DETECTION_ATTRS = { + "_windows": attr.label(default = "@platforms//os:windows"), + "_macos": attr.label(default = "@platforms//os:macos"), +} + +_PLAT_PLACEHOLDER = "{CODEQL_PLATFORM}" + +def _expand_path(path, platform): + if _PLAT_PLACEHOLDER in path: + path = path.replace(_PLAT_PLACEHOLDER, platform) + return ("arch", path) + return ("generic", path) + +def _platform_select( + ctx = None, + *, + linux, + windows, + macos): + if ctx: + if ctx.target_platform_has_constraint(ctx.attr._windows[platform_common.ConstraintValueInfo]): + return windows + elif ctx.target_platform_has_constraint(ctx.attr._macos[platform_common.ConstraintValueInfo]): + return macos + else: + return linux + else: + return select({ + "@platforms//os:linux": linux, + "@platforms//os:macos": macos, + "@platforms//os:windows": windows, + }) + +def _detect_platform(ctx = None): + return _platform_select(ctx, linux = "linux64", macos = "osx64", windows = "win64") + +def codeql_pkg_files( + *, + name, + srcs = None, + exes = None, + visibility = None, + **kwargs): + """ Wrapper around `pkg_files` adding a distinction between `srcs` and `exes`, where the + latter will get executable permissions. + """ + + internal = _make_internal(name) + if "attributes" in kwargs: + fail("do not use attributes with codeql_pkg_* rules. Use `exes` to mark executable files.") + internal_srcs = [] + if srcs and exes: + pkg_files( + name = internal("srcs"), + srcs = srcs, + visibility = ["//visibility:private"], + **kwargs + ) + pkg_files( + name = internal("exes"), + srcs = exes, + visibility = ["//visibility:private"], + attributes = pkg_attributes(mode = "755"), + **kwargs + ) + pkg_filegroup( + name = name, + srcs = [internal("srcs"), internal("exes")], + visibility = visibility, + ) + else: + pkg_files( + name = name, + srcs = srcs or exes, + visibility = visibility, + attributes = pkg_attributes(mode = "755") if exes else None, + **kwargs + ) + +def _extract_pkg_filegroup_impl(ctx): + src = ctx.attr.src[PackageFilegroupInfo] + platform = _detect_platform(ctx) + + if src.pkg_dirs or src.pkg_symlinks: + fail("`pkg_dirs` and `pkg_symlinks` are not supported for codeql packaging rules") + + pkg_files = [] + for pfi, origin in src.pkg_files: + dest_src_map = {} + for dest, file in pfi.dest_src_map.items(): + file_kind, expanded_dest = _expand_path(dest, platform) + if file_kind == ctx.attr.kind: + dest_src_map[expanded_dest] = file + if dest_src_map: + pkg_files.append((PackageFilesInfo(dest_src_map = dest_src_map, attributes = pfi.attributes), origin)) + + files = [depset(pfi.dest_src_map.values()) for pfi, _ in pkg_files] + return [ + PackageFilegroupInfo(pkg_files = pkg_files, pkg_dirs = [], pkg_symlinks = []), + DefaultInfo(files = depset(transitive = files)), + ] + +_extract_pkg_filegroup = rule( + implementation = _extract_pkg_filegroup_impl, + doc = """ + This internal rule extracts the arch or generic part of a `PackageFilegroupInfo` source, returning a + `PackageFilegroupInfo` that is a subset of the provided `src`, while expanding `{CODEQL_PLATFORM}` in + destination paths to the relevant codeql platform (linux64, win64 or osx64). + The distinction between generic and arch contents is given on a per-file basis depending on the install path + containing {CODEQL_PLATFORM}, which will typically have been added by a `prefix` attribute to a `pkg_*` rule. + No `pkg_dirs` or `pkg_symlink` must have been used for assembling the source mapping information: we could + easily add support for that, but we don't require it for now. + """, + attrs = { + "src": attr.label(providers = [PackageFilegroupInfo, DefaultInfo]), + "kind": attr.string(doc = "What part to extract", values = ["generic", "arch"]), + } | _PLAT_DETECTION_ATTRS, +) + +_ZipInfo = provider(fields = {"zips_to_prefixes": "mapping of zip files to prefixes"}) + +def _zip_info_impl(ctx): + zips = {} + for zip_target, prefix in ctx.attr.srcs.items(): + for zip in zip_target.files.to_list(): + zips[zip] = prefix + return [ + _ZipInfo(zips_to_prefixes = zips), + ] + +_zip_info = rule( + implementation = _zip_info_impl, + doc = """ + This internal rule simply instantiates a _ZipInfo provider out of `zips`. + """, + attrs = { + "srcs": attr.label_keyed_string_dict( + doc = "mapping from zip files to install prefixes", + allow_files = [".zip"], + ), + }, +) + +def _zip_info_filter_impl(ctx): + platform = _detect_platform(ctx) + filtered_zips = {} + for zip_info in ctx.attr.srcs: + for zip, prefix in zip_info[_ZipInfo].zips_to_prefixes.items(): + zip_kind, expanded_prefix = _expand_path(prefix, platform) + if zip_kind == ctx.attr.kind: + filtered_zips[zip] = expanded_prefix + return [ + _ZipInfo(zips_to_prefixes = filtered_zips), + ] + +_zip_info_filter = rule( + implementation = _zip_info_filter_impl, + doc = """ + This internal rule transforms a _ZipInfo provider so that: + * only zips matching `kind` are included + * a kind of a zip is given by its prefix: if it contains {CODEQL_PLATFORM} it is arch, otherwise it's generic + * in the former case, {CODEQL_PLATFORM} is expanded + """, + attrs = { + "srcs": attr.label_list(doc = "_ZipInfos to transform", providers = [_ZipInfo]), + "kind": attr.string(doc = "Which zip kind to consider", values = ["generic", "arch"]), + } | _PLAT_DETECTION_ATTRS, +) + +def _imported_zips_manifest_impl(ctx): + manifest = [] + files = [] + for zip_info in ctx.attr.srcs: + zip_info = zip_info[_ZipInfo] + manifest += ["%s:%s" % (p, z.short_path) for z, p in zip_info.zips_to_prefixes.items()] + files.extend(zip_info.zips_to_prefixes) + + output = ctx.actions.declare_file(ctx.label.name + ".params") + ctx.actions.write( + output, + "\n".join(manifest), + ) + return DefaultInfo( + files = depset([output]), + runfiles = ctx.runfiles(files), + ) + +_imported_zips_manifest = rule( + implementation = _imported_zips_manifest_impl, + doc = """ + This internal rule prints a zip manifest file that `misc/bazel/internal/install.py` understands. + {CODEQL_PLATFORM} can be used as zip prefixes and will be expanded to the relevant codeql platform. + """, + attrs = { + "srcs": attr.label_list( + doc = "mappings from zip files to install prefixes in _ZipInfo format", + providers = [_ZipInfo], + ), + }, +) + +def _zipmerge_impl(ctx): + zips = [] + transitive_zips = [] + output = ctx.actions.declare_file(ctx.attr.out) + args = [output.path] + for zip_target in ctx.attr.srcs: + if _ZipInfo in zip_target: + zip_info = zip_target[_ZipInfo] + for zip, prefix in zip_info.zips_to_prefixes.items(): + args += [ + "--prefix=%s/%s" % (ctx.attr.prefix, prefix.rstrip("/")), + zip.path, + ] + zips.append(zip) + else: + zip_files = zip_target.files.to_list() + for zip in zip_files: + if zip.extension != "zip": + fail("%s file found while expecting a .zip file " % zip.short_path) + args.append("--prefix=%s" % ctx.attr.prefix) + args += [z.path for z in zip_files] + transitive_zips.append(zip_target.files) + ctx.actions.run( + outputs = [output], + executable = ctx.executable._zipmerge, + inputs = depset(zips, transitive = transitive_zips), + arguments = args, + ) + + return [ + DefaultInfo(files = depset([output])), + ] + +_zipmerge = rule( + implementation = _zipmerge_impl, + doc = """ + This internal rule merges a zip files together + """, + attrs = { + "srcs": attr.label_list(doc = "Zip file to include, either as straight up `.zip` files or `_ZipInfo` data"), + "out": attr.string(doc = "output file name"), + "prefix": attr.string(doc = "Prefix posix path to add to the zip contents in the archive"), + "_zipmerge": attr.label(default = "//misc/bazel/internal/zipmerge", executable = True, cfg = "exec"), + }, +) + +def _get_zip_filename(name_prefix, kind): + if kind == "arch": + return name_prefix + "-" + _detect_platform() + ".zip" # using + because there's a select + else: + return "%s-generic.zip" % name_prefix + +def codeql_pack( + *, + name, + srcs = None, + zips = None, + zip_filename = None, + visibility = None, + install_dest = "extractor-pack", + compression_level = None, + **kwargs): + """ + Define a codeql pack. This macro accepts `pkg_files`, `pkg_filegroup` or their `codeql_*` counterparts as `srcs`. + `zips` is a map from prefixes to `.zip` files to import. + * defines a `-generic-zip` target creating a `-generic.zip` archive with the generic bits, + prefixed with `name` + * defines a `-arch-zip` target creating a `-.zip` archive with the + arch-specific bits, prefixed with `name` + * defines a runnable `-installer` target that will install the pack in `install_dest`, relative to where the + rule is used. The install destination can be overridden appending `-- --destdir=...` to the `bazel run` + invocation. This installation _does not_ prefix the contents with `name`. + + The distinction between arch-specific and generic contents is made based on whether the paths (including possible + prefixes added by rules) contain the special `{CODEQL_PLATFORM}` placeholder, which in case it is present will also + be replaced by the appropriate platform (`linux64`, `win64` or `osx64`). + + `compression_level` can be used to tweak the compression level used when creating archives. Consider that this + does not affect the contents of `zips`, only `srcs`. + """ + internal = _make_internal(name) + zip_filename = zip_filename or name + zips = zips or {} + pkg_filegroup( + name = internal("all"), + srcs = srcs, + visibility = ["//visibility:private"], + **kwargs + ) + if zips: + _zip_info( + name = internal("zip-info"), + srcs = zips, + visibility = ["//visibility:private"], + ) + for kind in ("generic", "arch"): + _extract_pkg_filegroup( + name = internal(kind), + src = internal("all"), + kind = kind, + visibility = ["//visibility:private"], + ) + if zips: + pkg_zip( + name = internal(kind, "zip-base"), + srcs = [internal(kind)], + visibility = ["//visibility:private"], + compression_level = compression_level, + ) + _zip_info_filter( + name = internal(kind, "zip-info"), + kind = kind, + srcs = [internal("zip-info")], + visibility = ["//visibility:private"], + ) + _zipmerge( + name = internal(kind, "zip"), + srcs = [internal(kind, "zip-base"), internal(kind, "zip-info")], + out = _get_zip_filename(name, kind), + prefix = name, + visibility = visibility, + ) + else: + pkg_zip( + name = internal(kind, "zip"), + srcs = [internal(kind)], + visibility = visibility, + package_dir = name, + package_file_name = _get_zip_filename(name, kind), + compression_level = compression_level, + ) + if zips: + _imported_zips_manifest( + name = internal("zip-manifest"), + srcs = [internal("generic-zip-info"), internal("arch-zip-info")], + visibility = ["//visibility:private"], + ) + + pkg_install( + name = internal("script"), + srcs = [internal("generic"), internal("arch")], + visibility = ["//visibility:private"], + ) + native.filegroup( + # used to locate current src directory + name = internal("build-file"), + srcs = ["BUILD.bazel"], + visibility = ["//visibility:private"], + ) + py_binary( + name = internal("installer"), + srcs = ["//misc/bazel/internal:install.py"], + main = "//misc/bazel/internal:install.py", + data = [ + internal("build-file"), + internal("script"), + ] + ([ + internal("zip-manifest"), + "//misc/bazel/internal/ripunzip", + ] if zips else []), + deps = ["@rules_python//python/runfiles"], + args = [ + "--build-file=$(rlocationpath %s)" % internal("build-file"), + "--pkg-install-script=$(rlocationpath %s)" % internal("script"), + "--destdir", + install_dest, + ] + ([ + "--ripunzip=$(rlocationpath //misc/bazel/internal/ripunzip)", + "--zip-manifest=$(rlocationpath %s)" % internal("zip-manifest"), + ] if zips else []), + visibility = visibility, + ) + native.filegroup( + name = name, + srcs = [internal("generic-zip"), internal("arch-zip")], + ) + +strip_prefix = _strip_prefix + +def _runfiles_group_impl(ctx): + files = [] + for src in ctx.attr.srcs: + rf = src[DefaultInfo].default_runfiles + if rf != None: + files.append(rf.files) + return [ + DefaultInfo( + files = depset(transitive = files), + ), + ] + +_runfiles_group = rule( + implementation = _runfiles_group_impl, + attrs = { + "srcs": attr.label_list(), + }, +) + +def codeql_pkg_runfiles(*, name, exes, **kwargs): + """ + Create a `codeql_pkg_files` with all runfiles from files in `exes`, flattened together. + """ + internal = _make_internal(name) + _runfiles_group( + name = internal("runfiles"), + srcs = exes, + visibility = ["//visibility:private"], + ) + codeql_pkg_files( + name = name, + exes = [internal("runfiles")], + **kwargs + ) def _pkg_overlay_impl(ctx): destinations = {} diff --git a/misc/bazel/pkg_runfiles.bzl b/misc/bazel/pkg_runfiles.bzl deleted file mode 100644 index 3d3bd8c028d5..000000000000 --- a/misc/bazel/pkg_runfiles.bzl +++ /dev/null @@ -1,33 +0,0 @@ -load("@rules_pkg//pkg:mappings.bzl", "pkg_attributes", "pkg_files") - -def _runfiles_group_impl(ctx): - files = [] - for src in ctx.attr.srcs: - rf = src[DefaultInfo].default_runfiles - if rf != None: - files.append(rf.files) - return [ - DefaultInfo( - files = depset(transitive = files), - ), - ] - -_runfiles_group = rule( - implementation = _runfiles_group_impl, - attrs = { - "srcs": attr.label_list(), - }, -) - -def pkg_runfiles(*, name, srcs, **kwargs): - internal_name = "_%s_runfiles" % name - _runfiles_group( - name = internal_name, - srcs = srcs, - ) - kwargs.setdefault("attributes", pkg_attributes(mode = "0755")) - pkg_files( - name = name, - srcs = [internal_name], - **kwargs - ) diff --git a/misc/codegen/loaders/dbschemeloader.py b/misc/codegen/loaders/dbschemeloader.py index 51e362362a77..f6fbab50499c 100644 --- a/misc/codegen/loaders/dbschemeloader.py +++ b/misc/codegen/loaders/dbschemeloader.py @@ -33,7 +33,7 @@ def _get_table(match): name=match["table"], columns=[_get_column(f) for f in _Re.field.finditer(match["tablebody"])], keyset=keyset, - dir=pathlib.PosixPath(match["tabledir"]) if match["tabledir"] else None, + dir=pathlib.PurePosixPath(match["tabledir"]) if match["tabledir"] else None, ) diff --git a/swift/BUILD.bazel b/swift/BUILD.bazel index 1ced5c9f1ca0..09f7e22ffe04 100644 --- a/swift/BUILD.bazel +++ b/swift/BUILD.bazel @@ -1,8 +1,10 @@ -load("@rules_pkg//pkg:install.bzl", "pkg_install") -load("@rules_pkg//pkg:mappings.bzl", "pkg_filegroup", "pkg_files") -load("//:defs.bzl", "codeql_platform") -load("//misc/bazel:pkg_runfiles.bzl", "pkg_runfiles") -load("//misc/bazel/cmake:cmake.bzl", "generate_cmake") +load("@rules_pkg//pkg:mappings.bzl", "pkg_filegroup") +load( + "//misc/bazel:pkg.bzl", + "codeql_pack", + "codeql_pkg_files", + "codeql_pkg_runfiles", +) filegroup( name = "schema", @@ -22,108 +24,82 @@ filegroup( visibility = ["//swift:__subpackages__"], ) -pkg_files( - name = "dbscheme_files", - srcs = [ - "ql/lib/swift.dbscheme.stats", - "//swift/extractor/trap:generated_dbscheme", - ], +codeql_pkg_files( + name = "autobuilder-incompatible-os", + exes = ["//swift/tools/diagnostics:autobuilder-incompatible-os"], ) -pkg_files( - name = "manifest", - srcs = ["codeql-extractor.yml"], +codeql_pkg_runfiles( + name = "autobuilder", + exes = ["//swift/swift-autobuilder"], ) pkg_filegroup( - name = "extractor-pack-generic", - srcs = [ - ":manifest", - "//swift/tools", - ] + select({ - "@platforms//os:windows": [], - "//conditions:default": [ - ":dbscheme_files", - "//swift/downgrades", + name = "tools-arch", + srcs = select({ + "@platforms//os:macos": [ + ":autobuilder", + "//swift/extractor:pkg", + ], + "@platforms//os:linux": [ + ":autobuilder-incompatible-os", + "//swift/extractor:pkg", + ], + "@platforms//os:windows": [ + ":autobuilder-incompatible-os", ], }), - visibility = ["//visibility:public"], + prefix = "{CODEQL_PLATFORM}", ) pkg_filegroup( - name = "extractor", - srcs = ["//swift/extractor:pkg"], - prefix = "tools/" + codeql_platform, -) - -pkg_runfiles( - name = "swift-autobuilder", - srcs = ["//swift/swift-autobuilder"], - prefix = "tools/" + codeql_platform, -) - -pkg_runfiles( - name = "diagnostics", - srcs = ["//swift/tools/diagnostics:autobuilder-incompatible-os"], - prefix = "tools/" + codeql_platform, -) - -pkg_filegroup( - name = "resource-dir-arch", - srcs = ["//swift/third_party/swift-llvm-support:swift-resource-dir"], - prefix = "resource-dir/" + codeql_platform, - visibility = ["//visibility:public"], + name = "tools", + srcs = [ + ":tools-arch", + "//swift/tools", + ], + prefix = "tools", ) -pkg_filegroup( - name = "extractor-pack-arch", - srcs = select({ - "@platforms//os:windows": [], - "//conditions:default": [ - ":extractor", - ":resource-dir-arch", - ], - }) + select({ - "@platforms//os:macos": [ - ":swift-autobuilder", - ], - "//conditions:default": [ - ":diagnostics", - ], - }), - visibility = ["//visibility:public"], +codeql_pkg_files( + name = "root-files", + srcs = [ + "codeql-extractor.yml", + "ql/lib/swift.dbscheme.stats", + "//swift/extractor/trap:generated_dbscheme", + ], ) -pkg_filegroup( - name = "extractor-pack", +codeql_pack( + name = "swift", srcs = [ - ":extractor-pack-arch", - ":extractor-pack-generic", + ":root-files", + ":tools", + "//swift/downgrades", ], visibility = ["//visibility:public"], + zips = select({ + "@platforms//os:windows": {}, + "//conditions:default": { + "//swift/third_party/resource-dir": "resource-dir/{CODEQL_PLATFORM}", + }, + }), ) -pkg_install( - name = "_create_extractor_pack", - srcs = ["//swift:extractor-pack"], +alias( + name = "create-extractor-pack", + actual = ":swift-installer", ) -py_binary( - name = "create-extractor-pack", - srcs = ["create_extractor_pack.py"], - main = "create_extractor_pack.py", - deps = [":_create_extractor_pack"], +# TODO: aliases for internal repo backward compatibility +alias( + name = "extractor-pack-generic", + actual = "swift-generic", + visibility = ["//visibility:public"], ) -# TODO this is unneeded here but still used in the internal repo. Remove once it's not -generate_cmake( - name = "cmake", - targets = [ - "//swift/extractor:extractor.real", - "//swift/logging/tests/assertion-diagnostics:assert-false", - ] + select({ - "@platforms//os:linux": ["//swift/tools/diagnostics:autobuilder-incompatible-os"], - "@platforms//os:macos": ["//swift/swift-autobuilder"], - }), +alias( + name = "extractor-pack-arch", + actual = "swift-arch", visibility = ["//visibility:public"], ) diff --git a/swift/actions/run-integration-tests/action.yml b/swift/actions/run-integration-tests/action.yml index fe5a20b02bd8..0efd2371b960 100644 --- a/swift/actions/run-integration-tests/action.yml +++ b/swift/actions/run-integration-tests/action.yml @@ -27,5 +27,5 @@ runs: with: name: swift-integration-tests-logs-${{ runner.os }} path: | - swift/ql/integration-tests/**/db/log + swift/ql/integration-tests/**/*db/log retention-days: 1 diff --git a/swift/extractor/BUILD.bazel b/swift/extractor/BUILD.bazel index 3acdbf014e34..8290aec41216 100644 --- a/swift/extractor/BUILD.bazel +++ b/swift/extractor/BUILD.bazel @@ -1,4 +1,4 @@ -load("//misc/bazel:pkg_runfiles.bzl", "pkg_runfiles") +load("//misc/bazel:pkg.bzl", "codeql_pkg_runfiles") load("//swift:rules.bzl", "swift_cc_binary") swift_cc_binary( @@ -29,9 +29,9 @@ sh_binary( data = [":extractor.real"], ) -pkg_runfiles( +codeql_pkg_runfiles( name = "pkg", - srcs = [":extractor"], excludes = ["extractor.sh"], # script gets copied as "extractor", no need for the original .sh file + exes = [":extractor"], visibility = ["//swift:__pkg__"], ) diff --git a/swift/third_party/BUILD.swift-toolchain-linux.bazel b/swift/third_party/BUILD.swift-toolchain-linux.bazel index be2c2e12a971..ef6d4d44be18 100644 --- a/swift/third_party/BUILD.swift-toolchain-linux.bazel +++ b/swift/third_party/BUILD.swift-toolchain-linux.bazel @@ -1,4 +1,5 @@ -load("@rules_pkg//pkg:mappings.bzl", "pkg_filegroup", "pkg_files") +load("@rules_pkg//pkg:mappings.bzl", "pkg_files") +load("@rules_pkg//pkg:pkg.bzl", "pkg_zip") _strip_prefix = "usr/lib/swift" @@ -42,8 +43,13 @@ _pm_interface_files = [ for dir, interface, module in _pm_interface_files ] -pkg_filegroup( - name = "resource-dir", +pkg_zip( + name = "resource-dir-linux", srcs = [":resource-dir-original"] + [":pkg-%s" % module for _, _, module in _pm_interface_files], +) + +alias( + name = "swift_toolchain_linux", + actual = ":resource-dir-linux", visibility = ["//visibility:public"], ) diff --git a/swift/third_party/BUILD.swift-toolchain-macos.bazel b/swift/third_party/BUILD.swift-toolchain-macos.bazel index 38250f1d2104..461b292ddbf3 100644 --- a/swift/third_party/BUILD.swift-toolchain-macos.bazel +++ b/swift/third_party/BUILD.swift-toolchain-macos.bazel @@ -1,12 +1,23 @@ load("@rules_pkg//pkg:mappings.bzl", "pkg_files") +load("@rules_pkg//pkg:pkg.bzl", "pkg_zip") _strip_prefix = "usr/lib/swift" pkg_files( - name = "resource-dir", + name = "resource-dir-files", srcs = glob( ["usr/lib/swift/**/*"], ), strip_prefix = _strip_prefix, +) + +pkg_zip( + name = "resource-dir-macos", + srcs = [":resource-dir-files"], +) + +alias( + name = "swift_toolchain_macos", + actual = ":resource-dir-macos", visibility = ["//visibility:public"], ) diff --git a/swift/third_party/resource-dir/BUILD.bazel b/swift/third_party/resource-dir/BUILD.bazel new file mode 100644 index 000000000000..0b4fdbf24df6 --- /dev/null +++ b/swift/third_party/resource-dir/BUILD.bazel @@ -0,0 +1,27 @@ +alias( + name = "resource-dir", + actual = select({"@platforms//os:" + os: "@swift-resource-dir-" + os for os in ("linux", "macos")}), + target_compatible_with = select({ + "@platforms//os:windows": ["@platforms//:incompatible"], + "//conditions:default": [], + }), + visibility = ["//swift:__pkg__"], +) + +[ + sh_binary( + name = "update-" + os, + srcs = ["update.sh"], + args = [ + "$(rlocationpath @swift_toolchain_%s)" % os, + "$(rlocationpath resource-dir-%s.zip)" % os, + ], + data = [ + "resource-dir-%s.zip" % os, + "@swift_toolchain_" + os, + ], + target_compatible_with = ["@platforms//os:" + os], + deps = ["@bazel_tools//tools/bash/runfiles"], + ) + for os in ("linux", "macos") +] diff --git a/swift/third_party/resource-dir/LICENSE.txt b/swift/third_party/resource-dir/LICENSE.txt new file mode 100644 index 000000000000..61b0c78195f2 --- /dev/null +++ b/swift/third_party/resource-dir/LICENSE.txt @@ -0,0 +1,211 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + +## Runtime Library Exception to the Apache 2.0 License: ## + + + As an exception, if you use this Software to compile your source code and + portions of this Software are embedded into the binary product as a result, + you may redistribute such product without providing attribution as would + otherwise be required by Sections 4(a), 4(b) and 4(d) of the License. diff --git a/swift/third_party/resource-dir/README.md b/swift/third_party/resource-dir/README.md new file mode 100644 index 000000000000..38873b4a54ff --- /dev/null +++ b/swift/third_party/resource-dir/README.md @@ -0,0 +1,2 @@ +These LFS files are redistributed parts of the [Swift toolchains](https://www.swift.org/download/). +A [copy](./LICENSE.txt) of the [swift](https://github.com/apple/swift) license is included. diff --git a/swift/third_party/resource-dir/resource-dir-linux.zip b/swift/third_party/resource-dir/resource-dir-linux.zip new file mode 100644 index 000000000000..dc52894ec90a --- /dev/null +++ b/swift/third_party/resource-dir/resource-dir-linux.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02bf1b93c60917b09e5de24f7a3a96e109337fedb7ee1cc0c2409d829866dbfe +size 190645227 diff --git a/swift/third_party/resource-dir/resource-dir-macos.zip b/swift/third_party/resource-dir/resource-dir-macos.zip new file mode 100644 index 000000000000..cfc595532191 --- /dev/null +++ b/swift/third_party/resource-dir/resource-dir-macos.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e29feb39124731510535f8d98be80bc68b10ff0e791c909c9ff96a9b97391fa +size 483440694 diff --git a/swift/third_party/resource-dir/update.sh b/swift/third_party/resource-dir/update.sh new file mode 100755 index 000000000000..dee216a61d67 --- /dev/null +++ b/swift/third_party/resource-dir/update.sh @@ -0,0 +1,12 @@ +# --- begin runfiles.bash initialization v3 --- +# Copy-pasted from the Bazel Bash runfiles library v3. +set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash +source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ + source "$0.runfiles/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e +# --- end runfiles.bash initialization v3 --- + +cp "$(rlocation "$1")" "$(rlocation "$2")" diff --git a/swift/third_party/swift-llvm-support/BUILD.bazel b/swift/third_party/swift-llvm-support/BUILD.bazel index 183f9e7a7ff9..4bc1fffba949 100644 --- a/swift/third_party/swift-llvm-support/BUILD.bazel +++ b/swift/third_party/swift-llvm-support/BUILD.bazel @@ -7,11 +7,3 @@ alias( "@bazel_tools//src/conditions:darwin": "@swift_prebuilt_darwin_x86_64//:swift-llvm-support", }), ) - -alias( - name = "swift-resource-dir", - actual = select({ - "@bazel_tools//src/conditions:linux": "@swift_toolchain_linux//:resource-dir", - "@bazel_tools//src/conditions:darwin": "@swift_toolchain_macos//:resource-dir", - }), -) diff --git a/swift/tools/BUILD.bazel b/swift/tools/BUILD.bazel index e59561bf528d..777b96490685 100644 --- a/swift/tools/BUILD.bazel +++ b/swift/tools/BUILD.bazel @@ -1,4 +1,4 @@ -load("@rules_pkg//pkg:mappings.bzl", "pkg_attributes", "pkg_filegroup", "pkg_files") +load("//misc/bazel:pkg.bzl", "codeql_pkg_files") sh_binary( name = "qltest", @@ -6,39 +6,16 @@ sh_binary( visibility = ["//swift/tools/test/qltest:__pkg__"], ) -sh_binary( - name = "autobuild", - srcs = ["autobuild.sh"], -) - -sh_binary( - name = "identify-environment", - srcs = ["identify-environment.sh"], -) - -pkg_files( - name = "scripts", +codeql_pkg_files( + name = "tools", srcs = [ "autobuild.cmd", - ":autobuild", - ":identify-environment", - ":qltest", + "tracing-config.lua", ], - attributes = pkg_attributes(mode = "0755"), - prefix = "tools", -) - -pkg_files( - name = "tracing-config", - srcs = ["tracing-config.lua"], - prefix = "tools", -) - -pkg_filegroup( - name = "tools", - srcs = [ - ":scripts", - ":tracing-config", + exes = [ + "autobuild.sh", + "identify-environment.sh", + "qltest.sh", ], visibility = ["//swift:__pkg__"], )