diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 500064cee..ecc71ae47 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -115,10 +115,10 @@ git_oid hashToOID(const Hash & hash) return oid; } -Object lookupObject(git_repository * repo, const git_oid & oid) +Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t type = GIT_OBJECT_ANY) { Object obj; - if (git_object_lookup(Setter(obj), repo, &oid, GIT_OBJECT_ANY)) { + if (git_object_lookup(Setter(obj), repo, &oid, type)) { auto err = git_error_last(); throw Error("getting Git object '%s': %s", oid, err->message); } @@ -909,6 +909,61 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK); } + void createHardlink(const CanonPath & path, const CanonPath & target) override + { + std::vector pathComponents; + for (auto & c : path) + pathComponents.emplace_back(c); + + if (!prepareDirs(pathComponents, false)) return; + + // We can't just look up the path from the start of the root, since + // some parent directories may not have finished yet, so we compute + // a relative path that helps us find the right git_tree_builder or object. + auto relTarget = CanonPath(path).parent()->makeRelative(target); + + auto dir = pendingDirs.rbegin(); + + // For each ../ component at the start, go up one directory. + // CanonPath::makeRelative() always puts all .. elements at the start, + // so they're all handled by this loop: + std::string_view relTargetLeft(relTarget); + while (hasPrefix(relTargetLeft, "../")) { + if (dir == pendingDirs.rend()) + throw Error("invalid hard link target '%s' for path '%s'", target, path); + ++dir; + relTargetLeft = relTargetLeft.substr(3); + } + if (dir == pendingDirs.rend()) + throw Error("invalid hard link target '%s' for path '%s'", target, path); + + // Look up the remainder of the target, starting at the + // top-most `git_treebuilder`. + std::variant curDir{dir->builder.get()}; + Object tree; // needed to keep `entry` alive + const git_tree_entry * entry = nullptr; + + for (auto & c : CanonPath(relTargetLeft)) { + if (auto builder = std::get_if(&curDir)) { + assert(*builder); + if (!(entry = git_treebuilder_get(*builder, std::string(c).c_str()))) + throw Error("cannot find hard link target '%s' for path '%s'", target, path); + curDir = *git_tree_entry_id(entry); + } else if (auto oid = std::get_if(&curDir)) { + tree = lookupObject(*repo, *oid, GIT_OBJECT_TREE); + if (!(entry = git_tree_entry_byname((const git_tree *) &*tree, std::string(c).c_str()))) + throw Error("cannot find hard link target '%s' for path '%s'", target, path); + curDir = *git_tree_entry_id(entry); + } + } + + assert(entry); + + addToTree(*pathComponents.rbegin(), + *git_tree_entry_id(entry), + git_tree_entry_filemode(entry)); + } + Hash sync() override { updateBuilders({}); diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index 29d799554..495916f62 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -7,7 +7,7 @@ namespace nix { namespace fetchers { struct PublicKey; } -struct GitFileSystemObjectSink : FileSystemObjectSink +struct GitFileSystemObjectSink : ExtendedFileSystemObjectSink { /** * Flush builder and return a final Git hash. diff --git a/src/libutil/fs-sink.hh b/src/libutil/fs-sink.hh index cf7d34d22..774c0d942 100644 --- a/src/libutil/fs-sink.hh +++ b/src/libutil/fs-sink.hh @@ -41,6 +41,19 @@ struct FileSystemObjectSink virtual void createSymlink(const CanonPath & path, const std::string & target) = 0; }; +/** + * An extension of `FileSystemObjectSink` that supports file types + * that are not supported by Nix's FSO model. + */ +struct ExtendedFileSystemObjectSink : virtual FileSystemObjectSink +{ + /** + * Create a hard link. The target must be the path of a previously + * encountered file relative to the root of the FSO. + */ + virtual void createHardlink(const CanonPath & path, const CanonPath & target) = 0; +}; + /** * Recursively copy file system objects from the source into the sink. */ diff --git a/src/libutil/tarfile.cc b/src/libutil/tarfile.cc index f0df527b3..2e3236295 100644 --- a/src/libutil/tarfile.cc +++ b/src/libutil/tarfile.cc @@ -174,7 +174,7 @@ void unpackTarfile(const Path & tarFile, const Path & destDir) extract_archive(archive, destDir); } -time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink) +time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & parseSink) { time_t lastModified = 0; @@ -195,7 +195,12 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin lastModified = std::max(lastModified, archive_entry_mtime(entry)); - switch (archive_entry_filetype(entry)) { + if (auto target = archive_entry_hardlink(entry)) { + parseSink.createHardlink(cpath, CanonPath(target)); + continue; + } + + switch (auto type = archive_entry_filetype(entry)) { case AE_IFDIR: parseSink.createDirectory(cpath); @@ -232,7 +237,7 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin } default: - throw Error("file '%s' in tarball has unsupported file type", path); + throw Error("file '%s' in tarball has unsupported file type %d", path, type); } } diff --git a/src/libutil/tarfile.hh b/src/libutil/tarfile.hh index 705d211e4..0517177db 100644 --- a/src/libutil/tarfile.hh +++ b/src/libutil/tarfile.hh @@ -41,6 +41,6 @@ void unpackTarfile(Source & source, const Path & destDir); void unpackTarfile(const Path & tarFile, const Path & destDir); -time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink); +time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & parseSink); } diff --git a/tests/functional/tarball.sh b/tests/functional/tarball.sh index f999b7a10..ab357ac78 100755 --- a/tests/functional/tarball.sh +++ b/tests/functional/tarball.sh @@ -71,3 +71,15 @@ test_tarball() { test_tarball '' cat test_tarball .xz xz test_tarball .gz gzip + +# Test hard links. +# All entries in tree.tar.gz refer to the same file, and all have the same inode when unpacked by GNU tar. +# We don't preserve the hard links, because that's an optimization we think is not worth the complexity, +# so we only make sure that the contents are copied correctly. +path="$(nix flake prefetch --json "tarball+file://$(pwd)/tree.tar.gz" | jq -r .storePath)" +[[ $(cat "$path/a/b/foo") = bar ]] +[[ $(cat "$path/a/b/xyzzy") = bar ]] +[[ $(cat "$path/a/yyy") = bar ]] +[[ $(cat "$path/a/zzz") = bar ]] +[[ $(cat "$path/c/aap") = bar ]] +[[ $(cat "$path/fnord") = bar ]] diff --git a/tests/functional/tree.tar.gz b/tests/functional/tree.tar.gz new file mode 100644 index 000000000..f1f1d996d Binary files /dev/null and b/tests/functional/tree.tar.gz differ diff --git a/tests/unit/libfetchers/git-utils.cc b/tests/unit/libfetchers/git-utils.cc new file mode 100644 index 000000000..d3547ec6a --- /dev/null +++ b/tests/unit/libfetchers/git-utils.cc @@ -0,0 +1,112 @@ +#include "git-utils.hh" +#include "file-system.hh" +#include "gmock/gmock.h" +#include +#include +#include +#include +#include "fs-sink.hh" +#include "serialise.hh" + +namespace nix { + +class GitUtilsTest : public ::testing::Test +{ + // We use a single repository for all tests. + Path tmpDir; + std::unique_ptr delTmpDir; + +public: + void SetUp() override + { + tmpDir = createTempDir(); + delTmpDir = std::make_unique(tmpDir, true); + + // Create the repo with libgit2 + git_libgit2_init(); + git_repository * repo = nullptr; + auto r = git_repository_init(&repo, tmpDir.c_str(), 0); + ASSERT_EQ(r, 0); + git_repository_free(repo); + } + + void TearDown() override + { + // Destroy the AutoDelete, triggering removal + // not AutoDelete::reset(), which would cancel the deletion. + delTmpDir.reset(); + } + + ref openRepo() + { + return GitRepo::openRepo(tmpDir, true, false); + } +}; + +void writeString(CreateRegularFileSink & fileSink, std::string contents, bool executable) +{ + if (executable) + fileSink.isExecutable(); + fileSink.preallocateContents(contents.size()); + fileSink(contents); +} + +TEST_F(GitUtilsTest, sink_basic) +{ + auto repo = openRepo(); + auto sink = repo->getFileSystemObjectSink(); + + // TODO/Question: It seems a little odd that we use the tarball-like convention of requiring a top-level directory + // here + // The sync method does not document this behavior, should probably renamed because it's not very + // general, and I can't imagine that "non-conventional" archives or any other source to be handled by + // this sink. + + sink->createDirectory(CanonPath("foo-1.1")); + + sink->createRegularFile(CanonPath("foo-1.1/hello"), [](CreateRegularFileSink & fileSink) { + writeString(fileSink, "hello world", false); + }); + sink->createRegularFile(CanonPath("foo-1.1/bye"), [](CreateRegularFileSink & fileSink) { + writeString(fileSink, "thanks for all the fish", false); + }); + sink->createSymlink(CanonPath("foo-1.1/bye-link"), "bye"); + sink->createDirectory(CanonPath("foo-1.1/empty")); + sink->createDirectory(CanonPath("foo-1.1/links")); + sink->createHardlink(CanonPath("foo-1.1/links/foo"), CanonPath("foo-1.1/hello")); + + // sink->createHardlink("foo-1.1/links/foo-2", CanonPath("foo-1.1/hello")); + + auto result = sink->sync(); + auto accessor = repo->getAccessor(result, false); + auto entries = accessor->readDirectory(CanonPath::root); + ASSERT_EQ(entries.size(), 5); + ASSERT_EQ(accessor->readFile(CanonPath("hello")), "hello world"); + ASSERT_EQ(accessor->readFile(CanonPath("bye")), "thanks for all the fish"); + ASSERT_EQ(accessor->readLink(CanonPath("bye-link")), "bye"); + ASSERT_EQ(accessor->readDirectory(CanonPath("empty")).size(), 0); + ASSERT_EQ(accessor->readFile(CanonPath("links/foo")), "hello world"); +}; + +TEST_F(GitUtilsTest, sink_hardlink) +{ + auto repo = openRepo(); + auto sink = repo->getFileSystemObjectSink(); + + sink->createDirectory(CanonPath("foo-1.1")); + + sink->createRegularFile(CanonPath("foo-1.1/hello"), [](CreateRegularFileSink & fileSink) { + writeString(fileSink, "hello world", false); + }); + + try { + sink->createHardlink(CanonPath("foo-1.1/link"), CanonPath("hello")); + FAIL() << "Expected an exception"; + } catch (const nix::Error & e) { + ASSERT_THAT(e.msg(), testing::HasSubstr("invalid hard link target")); + ASSERT_THAT(e.msg(), testing::HasSubstr("/hello")); + ASSERT_THAT(e.msg(), testing::HasSubstr("foo-1.1/link")); + } +}; + +} // namespace nix diff --git a/tests/unit/libfetchers/local.mk b/tests/unit/libfetchers/local.mk index 286a59030..30aa142a5 100644 --- a/tests/unit/libfetchers/local.mk +++ b/tests/unit/libfetchers/local.mk @@ -29,7 +29,7 @@ libfetchers-tests_LIBS = \ libstore-test-support libutil-test-support \ libfetchers libstore libutil -libfetchers-tests_LDFLAGS := -lrapidcheck $(GTEST_LIBS) +libfetchers-tests_LDFLAGS := -lrapidcheck $(GTEST_LIBS) $(LIBGIT2_LIBS) ifdef HOST_WINDOWS # Increase the default reserved stack size to 65 MB so Nix doesn't run out of space diff --git a/tests/unit/libutil-support/tests/tracing-file-system-object-sink.cc b/tests/unit/libutil-support/tests/tracing-file-system-object-sink.cc new file mode 100644 index 000000000..122a09dcb --- /dev/null +++ b/tests/unit/libutil-support/tests/tracing-file-system-object-sink.cc @@ -0,0 +1,34 @@ +#include +#include "tracing-file-system-object-sink.hh" + +namespace nix::test { + +void TracingFileSystemObjectSink::createDirectory(const CanonPath & path) +{ + std::cerr << "createDirectory(" << path << ")\n"; + sink.createDirectory(path); +} + +void TracingFileSystemObjectSink::createRegularFile( + const CanonPath & path, std::function fn) +{ + std::cerr << "createRegularFile(" << path << ")\n"; + sink.createRegularFile(path, [&](CreateRegularFileSink & crf) { + // We could wrap this and trace about the chunks of data and such + fn(crf); + }); +} + +void TracingFileSystemObjectSink::createSymlink(const CanonPath & path, const std::string & target) +{ + std::cerr << "createSymlink(" << path << ", target: " << target << ")\n"; + sink.createSymlink(path, target); +} + +void TracingExtendedFileSystemObjectSink::createHardlink(const CanonPath & path, const CanonPath & target) +{ + std::cerr << "createHardlink(" << path << ", target: " << target << ")\n"; + sink.createHardlink(path, target); +} + +} // namespace nix::test diff --git a/tests/unit/libutil-support/tests/tracing-file-system-object-sink.hh b/tests/unit/libutil-support/tests/tracing-file-system-object-sink.hh new file mode 100644 index 000000000..895ac3664 --- /dev/null +++ b/tests/unit/libutil-support/tests/tracing-file-system-object-sink.hh @@ -0,0 +1,41 @@ +#pragma once +#include "fs-sink.hh" + +namespace nix::test { + +/** + * A `FileSystemObjectSink` that traces calls, writing to stderr. + */ +class TracingFileSystemObjectSink : public virtual FileSystemObjectSink +{ + FileSystemObjectSink & sink; +public: + TracingFileSystemObjectSink(FileSystemObjectSink & sink) + : sink(sink) + { + } + + void createDirectory(const CanonPath & path) override; + + void createRegularFile(const CanonPath & path, std::function fn) override; + + void createSymlink(const CanonPath & path, const std::string & target) override; +}; + +/** + * A `ExtendedFileSystemObjectSink` that traces calls, writing to stderr. + */ +class TracingExtendedFileSystemObjectSink : public TracingFileSystemObjectSink, public ExtendedFileSystemObjectSink +{ + ExtendedFileSystemObjectSink & sink; +public: + TracingExtendedFileSystemObjectSink(ExtendedFileSystemObjectSink & sink) + : TracingFileSystemObjectSink(sink) + , sink(sink) + { + } + + void createHardlink(const CanonPath & path, const CanonPath & target) override; +}; + +}