From 358c26fd13a902d9a4032a00e6683571be07a384 Mon Sep 17 00:00:00 2001 From: DavHau Date: Sat, 17 Feb 2024 19:36:32 +0700 Subject: [PATCH] fetchTree: shallow git fetching by default Motivation: make git fetching more efficient for most repos by default --- .../shallow-git-fetching-by-default.md | 12 +++++ src/libexpr/primops/fetchTree.cc | 27 ++++++++--- .../test-cases/fetchTree-shallow/default.nix | 45 +++++++++++++++++++ 3 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 doc/manual/rl-next/shallow-git-fetching-by-default.md create mode 100644 tests/nixos/fetch-git/test-cases/fetchTree-shallow/default.nix diff --git a/doc/manual/rl-next/shallow-git-fetching-by-default.md b/doc/manual/rl-next/shallow-git-fetching-by-default.md new file mode 100644 index 000000000..4d044f881 --- /dev/null +++ b/doc/manual/rl-next/shallow-git-fetching-by-default.md @@ -0,0 +1,12 @@ +--- +synopsis: "`fetchTree` now fetches git repositories shallowly by default" +prs: 10028 +--- + +`builtins.fetchTree` now clones git repositories shallowly by default, which reduces network traffic and disk usage significantly in many cases. + +Previously, the default behavior was to clone the full history of a specific tag or branch (eg. `ref`) and only afterwards extract the files of one specific revision. + +From now on, the `ref` and `allRefs` arguments will be ignored, except if shallow cloning is disabled by setting `shallow = false`. + +The defaults for `builtins.fetchGit` remain unchanged. Here, shallow cloning has to be enabled manually by passing `shallow = true`. diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index 1997d5513..2926e3161 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -138,6 +138,11 @@ static void fetchTree( attrs.emplace("exportIgnore", Explicit{true}); } + // fetchTree should fetch git repos with shallow = true by default + if (type == "git" && !params.isFetchGit && !attrs.contains("shallow")) { + attrs.emplace("shallow", Explicit{true}); + } + if (!params.allowNameArgument) if (auto nameIter = attrs.find("name"); nameIter != attrs.end()) state.error( @@ -321,6 +326,8 @@ static RegisterPrimOp primop_fetchTree({ - `ref` (String, optional) + By default, this has no effect. This becomes relevant only once `shallow` cloning is disabled. + A [Git reference](https://git-scm.com/book/en/v2/Git-Internals-Git-References), such as a branch or tag name. Default: `"HEAD"` @@ -334,8 +341,9 @@ static RegisterPrimOp primop_fetchTree({ - `shallow` (Bool, optional) Make a shallow clone when fetching the Git tree. + When this is enabled, the options `ref` and `allRefs` have no effect anymore. - Default: `false` + Default: `true` - `submodules` (Bool, optional) @@ -345,8 +353,11 @@ static RegisterPrimOp primop_fetchTree({ - `allRefs` (Bool, optional) - If set to `true`, always fetch the entire repository, even if the latest commit is still in the cache. - Otherwise, only the latest commit is fetched if it is not already cached. + By default, this has no effect. This becomes relevant only once `shallow` cloning is disabled. + + Whether to fetch all references (eg. branches and tags) of the repository. + With this argument being true, it's possible to load a `rev` from *any* `ref`. + (Without setting this option, only `rev`s from the specified `ref` are supported). Default: `false` @@ -600,6 +611,8 @@ static RegisterPrimOp primop_fetchGit({ [Git reference]: https://git-scm.com/book/en/v2/Git-Internals-Git-References + This option has no effect once `shallow` cloning is enabled. + By default, the `ref` value is prefixed with `refs/heads/`. As of 2.3.0, Nix will not prefix `refs/heads/` if `ref` starts with `refs/`. @@ -617,13 +630,15 @@ static RegisterPrimOp primop_fetchGit({ - `shallow` (default: `false`) Make a shallow clone when fetching the Git tree. - + When this is enabled, the options `ref` and `allRefs` have no effect anymore. - `allRefs` - Whether to fetch all references of the repository. - With this argument being true, it's possible to load a `rev` from *any* `ref` + Whether to fetch all references (eg. branches and tags) of the repository. + With this argument being true, it's possible to load a `rev` from *any* `ref`. (by default only `rev`s from the specified `ref` are supported). + This option has no effect once `shallow` cloning is enabled. + - `verifyCommit` (default: `true` if `publicKey` or `publicKeys` are provided, otherwise `false`) Whether to check `rev` for a signature matching `publicKey` or `publicKeys`. diff --git a/tests/nixos/fetch-git/test-cases/fetchTree-shallow/default.nix b/tests/nixos/fetch-git/test-cases/fetchTree-shallow/default.nix new file mode 100644 index 000000000..f635df1f8 --- /dev/null +++ b/tests/nixos/fetch-git/test-cases/fetchTree-shallow/default.nix @@ -0,0 +1,45 @@ +{ + description = "fetchTree fetches git repos shallowly by default"; + script = '' + # purge nix git cache to make sure we start with a clean slate + client.succeed("rm -rf ~/.cache/nix") + + # add two commits to the repo: + # - one with a large file (2M) + # - another one making the file small again + client.succeed(f""" + dd if=/dev/urandom of={repo.path}/thailand bs=1M count=2 \ + && {repo.git} add thailand \ + && {repo.git} commit -m 'commit1' \ + && echo 'ThaigerSprint' > {repo.path}/thailand \ + && {repo.git} add thailand \ + && {repo.git} commit -m 'commit2' \ + && {repo.git} push origin main + """) + + # memoize the revision + commit2_rev = client.succeed(f""" + {repo.git} rev-parse HEAD + """).strip() + + # construct the fetcher call + fetchGit_expr = f""" + builtins.fetchTree {{ + type = "git"; + url = "{repo.remote}"; + rev = "{commit2_rev}"; + }} + """ + + # fetch the repo via nix + fetched1 = client.succeed(f""" + nix eval --impure --raw --expr '({fetchGit_expr}).outPath' + """) + + # check that the size of ~/.cache/nix is less than 1M + cache_size = client.succeed(""" + du -s ~/.cache/nix + """).strip().split()[0] + assert int(cache_size) < 1024, f"cache size is {cache_size}K which is larger than 1M" + ''; +}