From 35bdb9cee7dbb7f8733cab1b1fe327f525496773 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 11 Jun 2024 16:05:57 +0200 Subject: [PATCH] Support hard links in tarballs Fixes #10395. --- src/libfetchers/git-utils.cc | 48 +++++++++++++++++++++++++++++++++-- src/libfetchers/git-utils.hh | 2 +- src/libutil/fs-sink.hh | 13 ++++++++++ src/libutil/tarfile.cc | 11 +++++--- src/libutil/tarfile.hh | 2 +- tests/functional/tarball.sh | 9 +++++++ tests/functional/tree.tar.gz | Bin 0 -> 298 bytes 7 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 tests/functional/tree.tar.gz diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 2ea1e15ed..32b35931a 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -115,10 +115,10 @@ git_oid hashToOID(const Hash & hash) return oid; } -Object lookupObject(git_repository * repo, const git_oid & oid) +Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t type = GIT_OBJECT_ANY) { Object obj; - if (git_object_lookup(Setter(obj), repo, &oid, GIT_OBJECT_ANY)) { + if (git_object_lookup(Setter(obj), repo, &oid, type)) { auto err = git_error_last(); throw Error("getting Git object '%s': %s", oid, err->message); } @@ -909,6 +909,50 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK); } + void createHardlink(const Path & path, const CanonPath & target) override + { + auto pathComponents = tokenizeString>(path, "/"); + if (!prepareDirs(pathComponents, false)) return; + + auto relTarget = CanonPath(path).parent()->makeRelative(target); + + auto dir = pendingDirs.rbegin(); + + // For each ../ component at the start, go up one directory. + std::string_view relTargetLeft(relTarget); + while (hasPrefix(relTargetLeft, "../")) { + if (dir == pendingDirs.rend()) + throw Error("invalid hard link target '%s'", target); + ++dir; + relTargetLeft = relTargetLeft.substr(3); + } + + // Look up the remainder of the target, starting at the + // top-most `git_treebuilder`. + std::variant curDir{dir->builder.get()}; + Object tree; // needed to keep `entry` alive + const git_tree_entry * entry = nullptr; + + for (auto & c : CanonPath(relTargetLeft)) { + if (auto builder = std::get_if(&curDir)) { + if (!(entry = git_treebuilder_get(*builder, std::string(c).c_str()))) + throw Error("cannot find hard link target '%s'", target); + curDir = *git_tree_entry_id(entry); + } else if (auto oid = std::get_if(&curDir)) { + tree = lookupObject(*repo, *oid, GIT_OBJECT_TREE); + if (!(entry = git_tree_entry_byname((const git_tree *) &*tree, std::string(c).c_str()))) + throw Error("cannot find hard link target '%s'", target); + curDir = *git_tree_entry_id(entry); + } + } + + assert(entry); + + addToTree(*pathComponents.rbegin(), + *git_tree_entry_id(entry), + git_tree_entry_filemode(entry)); + } + Hash sync() override { updateBuilders({}); diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index 29d799554..495916f62 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -7,7 +7,7 @@ namespace nix { namespace fetchers { struct PublicKey; } -struct GitFileSystemObjectSink : FileSystemObjectSink +struct GitFileSystemObjectSink : ExtendedFileSystemObjectSink { /** * Flush builder and return a final Git hash. diff --git a/src/libutil/fs-sink.hh b/src/libutil/fs-sink.hh index ae577819a..994f19960 100644 --- a/src/libutil/fs-sink.hh +++ b/src/libutil/fs-sink.hh @@ -41,6 +41,19 @@ struct FileSystemObjectSink virtual void createSymlink(const Path & path, const std::string & target) = 0; }; +/** + * An extension of `FileSystemObjectSink` that supports file types + * that are not supported by Nix's FSO model. + */ +struct ExtendedFileSystemObjectSink : FileSystemObjectSink +{ + /** + * Create a hard link. The target must be the path of a previously + * encountered file relative to the root of the FSO. + */ + virtual void createHardlink(const Path & path, const CanonPath & target) = 0; +}; + /** * Recursively copy file system objects from the source into the sink. */ diff --git a/src/libutil/tarfile.cc b/src/libutil/tarfile.cc index 6bb2bd2f3..e45cfaf85 100644 --- a/src/libutil/tarfile.cc +++ b/src/libutil/tarfile.cc @@ -163,7 +163,7 @@ void unpackTarfile(const Path & tarFile, const Path & destDir) extract_archive(archive, destDir); } -time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink) +time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & parseSink) { time_t lastModified = 0; @@ -183,7 +183,12 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin lastModified = std::max(lastModified, archive_entry_mtime(entry)); - switch (archive_entry_filetype(entry)) { + if (auto target = archive_entry_hardlink(entry)) { + parseSink.createHardlink(path, CanonPath(target)); + continue; + } + + switch (auto type = archive_entry_filetype(entry)) { case AE_IFDIR: parseSink.createDirectory(path); @@ -220,7 +225,7 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin } default: - throw Error("file '%s' in tarball has unsupported file type", path); + throw Error("file '%s' in tarball has unsupported file type %d", path, type); } } diff --git a/src/libutil/tarfile.hh b/src/libutil/tarfile.hh index 705d211e4..0517177db 100644 --- a/src/libutil/tarfile.hh +++ b/src/libutil/tarfile.hh @@ -41,6 +41,6 @@ void unpackTarfile(Source & source, const Path & destDir); void unpackTarfile(const Path & tarFile, const Path & destDir); -time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink); +time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & parseSink); } diff --git a/tests/functional/tarball.sh b/tests/functional/tarball.sh index ce162ddce..5d4749eb2 100755 --- a/tests/functional/tarball.sh +++ b/tests/functional/tarball.sh @@ -59,3 +59,12 @@ test_tarball() { test_tarball '' cat test_tarball .xz xz test_tarball .gz gzip + +# Test hard links. +path="$(nix flake prefetch --json "tarball+file://$(pwd)/tree.tar.gz" | jq -r .storePath)" +[[ $(cat "$path/a/b/foo") = bar ]] +[[ $(cat "$path/a/b/xyzzy") = bar ]] +[[ $(cat "$path/a/yyy") = bar ]] +[[ $(cat "$path/a/zzz") = bar ]] +[[ $(cat "$path/c/aap") = bar ]] +[[ $(cat "$path/fnord") = bar ]] diff --git a/tests/functional/tree.tar.gz b/tests/functional/tree.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1f1d996d8494c930ede3d0b66574c8e6efb014c GIT binary patch literal 298 zcmV+_0oDE=iwFP!000001MQdHYQ!KAMtzjLLC2rb=W)00RcVTwLX)R&bY%;LZb_DL z3;oWG1caG*bVjF~(vy;fRswSwbzrLB+POM5ly=@4Vr!jOq%~Qs1{Th%@_wFT9tM@t z%W=FpFXeNOg!(cS|50`aZ1K;Ui+|$+{P&>wUzSBKMiJ~UzJKswt0k+hC6HKZ9E)eQ}53c@Cj`>+G#*Y3^#PAOQ00000000000KmO`0`*Phd;ll_0G_ItkN^Mx literal 0 HcmV?d00001