Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pkgs.dockertools.buildLayeredImage: customisable layering strategy #122608

Merged
93 changes: 30 additions & 63 deletions pkgs/build-support/docker/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -919,10 +919,19 @@ rec {
, includeStorePaths ? true
, includeNixDB ? false
, passthru ? {}
,
, # Pipeline used to produce docker layers. If not set, popularity contest
# algorithm is used. If set, maxLayers is ignored as the author of the
# pipeline can use one of the available functions (like "limit_layers")
# to control the amount of layers.
# See: pkgs/build-support/flatten-references-graph/src/flatten_references_graph/pipe.py
# for available functions, and it's test for how to use them.
# WARNING!! this interface is highly experimental and subject to change.
layeringPipeline ? null
, # Enables debug logging for the layering pipeline.
debug ? false
}:
assert
(lib.assertMsg (maxLayers > 1)
(lib.assertMsg (layeringPipeline == null -> maxLayers > 1)
"the maxLayers argument of dockerTools.buildLayeredImage function must be greather than 1 (current value: ${toString maxLayers})");
assert
(lib.assertMsg (enableFakechroot -> !stdenv.hostPlatform.isDarwin) ''
Expand Down Expand Up @@ -999,26 +1008,30 @@ rec {
'';
};

closureRoots = lib.optionals includeStorePaths /* normally true */ (
[ baseJson customisationLayer ]
);
overallClosure = writeText "closure" (lib.concatStringsSep " " closureRoots);

# These derivations are only created as implementation details of docker-tools,
# so they'll be excluded from the created images.
unnecessaryDrvs = [ baseJson overallClosure customisationLayer ];
layersJsonFile = buildPackages.dockerMakeLayers {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for people who would like to experiment with different ways of grouping paths into layers, this could now be override (at pkgs level) in order to inject arbitrary code for generating layers

probably not the most friendly interface but at least it's an improvement over status quo of streamLayeredImage being tightly coupled to referencesByPopularity approach

I'd be happy to iterate on this in order to make the injection of layering logic more user friendly.

inherit debug;
closureRoots = optionals includeStorePaths [ baseJson customisationLayer ];
excludePaths = [ baseJson customisationLayer ];
pipeline =
if layeringPipeline != null
then layeringPipeline
else import
./popularity-contest-layering-pipeline.nix
{ inherit lib jq runCommand; }
{ inherit fromImage maxLayers; }
;
};

conf = runCommand "${baseName}-conf.json"
{
inherit fromImage maxLayers created mtime uid gid uname gname;
inherit fromImage created mtime uid gid uname gname layersJsonFile;
imageName = lib.toLower name;
preferLocalBuild = true;
passthru.imageTag =
if tag != null
then tag
else
lib.head (lib.strings.splitString "-" (baseNameOf (builtins.unsafeDiscardStringContext conf.outPath)));
paths = buildPackages.referencesByPopularity overallClosure;
nativeBuildInputs = [ jq ];
} ''
${if (tag == null) then ''
Expand All @@ -1038,54 +1051,7 @@ rec {
mtime="$(date -Iseconds -d "$mtime")"
fi

paths() {
cat $paths ${lib.concatMapStringsSep " "
(path: "| (grep -v ${path} || true)")
unnecessaryDrvs}
}

# Compute the number of layers that are already used by a potential
# 'fromImage' as well as the customization layer. Ensure that there is
# still at least one layer available to store the image contents.
usedLayers=0

# subtract number of base image layers
if [[ -n "$fromImage" ]]; then
(( usedLayers += $(tar -xOf "$fromImage" manifest.json | jq '.[0].Layers | length') ))
fi

# one layer will be taken up by the customisation layer
(( usedLayers += 1 ))

if ! (( $usedLayers < $maxLayers )); then
echo >&2 "Error: usedLayers $usedLayers layers to store 'fromImage' and" \
"'extraCommands', but only maxLayers=$maxLayers were" \
"allowed. At least 1 layer is required to store contents."
exit 1
fi
availableLayers=$(( maxLayers - usedLayers ))

# Create $maxLayers worth of Docker Layers, one layer per store path
# unless there are more paths than $maxLayers. In that case, create
# $maxLayers-1 for the most popular layers, and smush the remainaing
# store paths in to one final layer.
#
# The following code is fiddly w.r.t. ensuring every layer is
# created, and that no paths are missed. If you change the
# following lines, double-check that your code behaves properly
# when the number of layers equals:
# maxLayers-1, maxLayers, and maxLayers+1, 0
paths |
jq -sR '
rtrimstr("\n") | split("\n")
| (.[:$maxLayers-1] | map([.])) + [ .[$maxLayers-1:] ]
| map(select(length > 0))
' \
--argjson maxLayers "$availableLayers" > store_layers.json

# The index on $store_layers is necessary because the --slurpfile
# automatically reads the file as an array.
cat ${baseJson} | jq '
jq '
. + {
"store_dir": $store_dir,
"from_image": $from_image,
Expand All @@ -1101,16 +1067,17 @@ rec {
}
' --arg store_dir "${storeDir}" \
--argjson from_image ${if fromImage == null then "null" else "'\"${fromImage}\"'"} \
--slurpfile store_layers store_layers.json \
--slurpfile store_layers "$layersJsonFile" \
--arg customisation_layer ${customisationLayer} \
--arg repo_tag "$imageName:$imageTag" \
--arg created "$created" \
--arg mtime "$mtime" \
--arg uid "$uid" \
--arg gid "$gid" \
--arg uname "$uname" \
--arg gname "$gname" |
tee $out
--arg gname "$gname" \
${baseJson} \
| tee $out
'';

result = runCommand "stream-${baseName}"
Expand Down
50 changes: 50 additions & 0 deletions pkgs/build-support/docker/make-layers.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
coreutils,
flattenReferencesGraph,
lib,
jq,
runCommand,
}:
{
closureRoots,
excludePaths ? [ ],
# This could be a path to (or a derivation producing a path to)
# a json file containing the pipeline
pipeline ? [ ],
debug ? false,
}:
if closureRoots == [ ] then
builtins.toFile "docker-layers-empty" "[]"
else
runCommand "docker-layers"
{
__structuredAttrs = true;
# graph, exclude_paths and pipeline are expected by the
# flatten_references_graph executable.
exportReferencesGraph.graph = closureRoots;
exclude_paths = excludePaths;
inherit pipeline;
nativeBuildInputs = [
coreutils
flattenReferencesGraph
jq
];
}
''
. .attrs.sh

flatten_references_graph_arg=.attrs.json

echo "pipeline: $pipeline"

if jq -e '.pipeline | type == "string"' .attrs.json; then
jq '. + { "pipeline": $pipeline[0] }' \
--slurpfile pipeline "$pipeline" \
.attrs.json > flatten_references_graph_arg.json

flatten_references_graph_arg=flatten_references_graph_arg.json
fi

${lib.optionalString debug "export DEBUG=True"}
flatten_references_graph "$flatten_references_graph_arg" > ''${outputs[out]}
''
34 changes: 34 additions & 0 deletions pkgs/build-support/docker/popularity-contest-layering-pipeline.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
lib,
runCommand,
jq,
}:
{
maxLayers,
fromImage ? null,
}:
runCommand "popularity-contest-layering-pipeline.json" { inherit maxLayers; } ''
# Compute the number of layers that are already used by a potential
# 'fromImage' as well as the customization layer. Ensure that there is
# still at least one layer available to store the image contents.
# one layer will be taken up by the customisation layer
usedLayers=1

${lib.optionalString (fromImage != null) ''
# subtract number of base image layers
baseImageLayersCount=$(tar -xOf "${fromImage}" manifest.json | ${lib.getExe jq} '.[0].Layers | length')

(( usedLayers += baseImageLayersCount ))
''}

if ! (( $usedLayers < $maxLayers )); then
echo >&2 "Error: usedLayers $usedLayers layers to store 'fromImage' and" \
"'extraCommands', but only maxLayers=$maxLayers were" \
"allowed. At least 1 layer is required to store contents."
exit 1
fi
availableLayers=$(( maxLayers - usedLayers ))

# Produce pipeline which uses popularity_contest algo.
echo '[["popularity_contest"],["limit_layers",'$availableLayers']]' > $out
''
54 changes: 54 additions & 0 deletions pkgs/by-name/fl/flattenReferencesGraph/dev-shell.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Start this shell with:
# nix-shell path/to/root/of/nixpkgs -A flattenReferencesGraph.dev-shell
{
mkShell,
callPackage,
python3Packages,
}:
let
helpers = callPackage (import ./helpers.nix) { };
in
mkShell {
inputsFrom = [ (callPackage (import ./package.nix) { }) ];
buildInputs = [
helpers.format
helpers.lint
helpers.unittest
# This is needed to plot graphs when DEBUG_PLOT is set to True.
python3Packages.pycairo
# This can be used on linux to display the graphs.
# On other platforms the image viewer needs to be set with
# DEBUG_PLOT_IMAGE_VIEWER env var.
# pkgs.gwenview
];
shellHook = ''
echo '
**********************************************************************
**********************************************************************

Commands useful for development (should be executed from scr dir):


format
* formats all files in place using autopep8

lint
* lints all files using flake8

unittest
* runs all unit tests

following env vars can be set to enable extra output in tests:
- DEBUG=True - enable debug logging
- DEBUG_PLOT=True - plot graphs processed by split_paths.py and
subcomponent.py
- DEBUG_PLOT_IMAGE_VIEWER=$PATH_OF_IMAGE_VIEWER_APP - app used to
display plots (default: gwenview)
- DEBUG_PLOT_SAVE_BASE_NAME=$SOME_NAME - if set, plots will be saved
to files instead of displayed with image viewer

**********************************************************************
**********************************************************************
'
'';
}
36 changes: 36 additions & 0 deletions pkgs/by-name/fl/flattenReferencesGraph/helpers.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
bash,
writers,
python3Packages,
}:
let
writeCheckedBashBin =
name:
let
interpreter = "${bash}/bin/bash";
in
writers.makeScriptWriter {
inherit interpreter;
check = "${interpreter} -n $1";
} "/bin/${name}";

# Helpers used during build/development.
lint = writeCheckedBashBin "lint" ''
${python3Packages.flake8}/bin/flake8 --show-source ''${@}
'';

unittest = writeCheckedBashBin "unittest" ''
if [ "$#" -eq 0 ]; then
set -- discover -p '*_test.py'
fi

${python3Packages.python}/bin/python -m unittest "''${@}"
'';

format = writeCheckedBashBin "format" ''
${python3Packages.autopep8}/bin/autopep8 -r -i . "''${@}"
'';
in
{
inherit format lint unittest;
}
38 changes: 38 additions & 0 deletions pkgs/by-name/fl/flattenReferencesGraph/package.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
callPackage,
lib,
python3Packages,
}:
let
inherit (lib) fileset;
helpers = callPackage ./helpers.nix { };
pythonPackages = python3Packages;
in
pythonPackages.buildPythonApplication {
version = "0.1.0";
pname = "flatten-references-graph";

src = fileset.toSource {
root = ./src;
fileset = fileset.unions [
./src/.flake8
./src/flatten_references_graph
./src/setup.py
];
};

propagatedBuildInputs = with pythonPackages; [
igraph
toolz
];

doCheck = true;

checkPhase = ''
${helpers.unittest}/bin/unittest
'';

passthru = {
dev-shell = callPackage ./dev-shell.nix { };
};
}
4 changes: 4 additions & 0 deletions pkgs/by-name/fl/flattenReferencesGraph/src/.flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[flake8]
max-line-length = 80
[pep8]
aggressive = 1
1 change: 1 addition & 0 deletions pkgs/by-name/fl/flattenReferencesGraph/src/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__
Loading