diff --git a/src/libutil/fs-sink.cc b/src/libutil/fs-sink.cc index 35ce0ac36..0ebd750f6 100644 --- a/src/libutil/fs-sink.cc +++ b/src/libutil/fs-sink.cc @@ -5,52 +5,6 @@ namespace nix { -void copyRecursive( - SourceAccessor & accessor, const CanonPath & from, - FileSystemObjectSink & sink, const Path & to) -{ - auto stat = accessor.lstat(from); - - switch (stat.type) { - case SourceAccessor::tSymlink: - { - sink.createSymlink(to, accessor.readLink(from)); - break; - } - - case SourceAccessor::tRegular: - { - sink.createRegularFile(to, [&](CreateRegularFileSink & crf) { - if (stat.isExecutable) - crf.isExecutable(); - accessor.readFile(from, crf, [&](uint64_t size) { - crf.preallocateContents(size); - }); - }); - break; - } - - case SourceAccessor::tDirectory: - { - sink.createDirectory(to); - for (auto & [name, _] : accessor.readDirectory(from)) { - copyRecursive( - accessor, from / name, - sink, to + "/" + name); - break; - } - break; - } - - case SourceAccessor::tMisc: - throw Error("file '%1%' has an unsupported type", from); - - default: - abort(); - } -} - - struct RestoreSinkSettings : Config { Setting preallocateContents{this, false, "preallocate-contents", diff --git a/src/libutil/fs-sink.hh b/src/libutil/fs-sink.hh index ae577819a..670b55c2b 100644 --- a/src/libutil/fs-sink.hh +++ b/src/libutil/fs-sink.hh @@ -41,13 +41,6 @@ struct FileSystemObjectSink virtual void createSymlink(const Path & path, const std::string & target) = 0; }; -/** - * Recursively copy file system objects from the source into the sink. - */ -void copyRecursive( - SourceAccessor & accessor, const CanonPath & sourcePath, - FileSystemObjectSink & sink, const Path & destPath); - /** * Ignore everything and do nothing */ diff --git a/src/libutil/git.cc b/src/libutil/git.cc index 5733531fa..029e1af44 100644 --- a/src/libutil/git.cc +++ b/src/libutil/git.cc @@ -5,302 +5,13 @@ #include #include // for strcasecmp -#include "signals.hh" -#include "config.hh" -#include "hash.hh" -#include "posix-source-accessor.hh" - #include "git.hh" -#include "serialise.hh" namespace nix::git { using namespace nix; using namespace std::string_literals; -std::optional decodeMode(RawMode m) { - switch (m) { - case (RawMode) Mode::Directory: - case (RawMode) Mode::Executable: - case (RawMode) Mode::Regular: - case (RawMode) Mode::Symlink: - return (Mode) m; - default: - return std::nullopt; - } -} - - -static std::string getStringUntil(Source & source, char byte) -{ - std::string s; - char n[1]; - source(std::string_view { n, 1 }); - while (*n != byte) { - s += *n; - source(std::string_view { n, 1 }); - } - return s; -} - - -static std::string getString(Source & source, int n) -{ - std::string v; - v.resize(n); - source(v); - return v; -} - -void parseBlob( - FileSystemObjectSink & sink, - const Path & sinkPath, - Source & source, - bool executable, - const ExperimentalFeatureSettings & xpSettings) -{ - xpSettings.require(Xp::GitHashing); - - sink.createRegularFile(sinkPath, [&](auto & crf) { - if (executable) - crf.isExecutable(); - - unsigned long long size = std::stoi(getStringUntil(source, 0)); - - crf.preallocateContents(size); - - unsigned long long left = size; - std::string buf; - buf.reserve(65536); - - while (left) { - checkInterrupt(); - buf.resize(std::min((unsigned long long)buf.capacity(), left)); - source(buf); - crf(buf); - left -= buf.size(); - } - }); -} - -void parseTree( - FileSystemObjectSink & sink, - const Path & sinkPath, - Source & source, - std::function hook, - const ExperimentalFeatureSettings & xpSettings) -{ - unsigned long long size = std::stoi(getStringUntil(source, 0)); - unsigned long long left = size; - - sink.createDirectory(sinkPath); - - while (left) { - std::string perms = getStringUntil(source, ' '); - left -= perms.size(); - left -= 1; - - RawMode rawMode = std::stoi(perms, 0, 8); - auto modeOpt = decodeMode(rawMode); - if (!modeOpt) - throw Error("Unknown Git permission: %o", perms); - auto mode = std::move(*modeOpt); - - std::string name = getStringUntil(source, '\0'); - left -= name.size(); - left -= 1; - - std::string hashs = getString(source, 20); - left -= 20; - - Hash hash(HashAlgorithm::SHA1); - std::copy(hashs.begin(), hashs.end(), hash.hash); - - hook(name, TreeEntry { - .mode = mode, - .hash = hash, - }); - } -} - -ObjectType parseObjectType( - Source & source, - const ExperimentalFeatureSettings & xpSettings) -{ - xpSettings.require(Xp::GitHashing); - - auto type = getString(source, 5); - - if (type == "blob ") { - return ObjectType::Blob; - } else if (type == "tree ") { - return ObjectType::Tree; - } else throw Error("input doesn't look like a Git object"); -} - -void parse( - FileSystemObjectSink & sink, - const Path & sinkPath, - Source & source, - bool executable, - std::function hook, - const ExperimentalFeatureSettings & xpSettings) -{ - xpSettings.require(Xp::GitHashing); - - auto type = parseObjectType(source, xpSettings); - - switch (type) { - case ObjectType::Blob: - parseBlob(sink, sinkPath, source, executable, xpSettings); - break; - case ObjectType::Tree: - parseTree(sink, sinkPath, source, hook, xpSettings); - break; - default: - assert(false); - }; -} - - -std::optional convertMode(SourceAccessor::Type type) -{ - switch (type) { - case SourceAccessor::tSymlink: return Mode::Symlink; - case SourceAccessor::tRegular: return Mode::Regular; - case SourceAccessor::tDirectory: return Mode::Directory; - case SourceAccessor::tMisc: return std::nullopt; - default: abort(); - } -} - - -void restore(FileSystemObjectSink & sink, Source & source, std::function hook) -{ - parse(sink, "", source, false, [&](Path name, TreeEntry entry) { - auto [accessor, from] = hook(entry.hash); - auto stat = accessor->lstat(from); - auto gotOpt = convertMode(stat.type); - if (!gotOpt) - throw Error("file '%s' (git hash %s) has an unsupported type", - from, - entry.hash.to_string(HashFormat::Base16, false)); - auto & got = *gotOpt; - if (got != entry.mode) - throw Error("git mode of file '%s' (git hash %s) is %o but expected %o", - from, - entry.hash.to_string(HashFormat::Base16, false), - (RawMode) got, - (RawMode) entry.mode); - copyRecursive( - *accessor, from, - sink, name); - }); -} - - -void dumpBlobPrefix( - uint64_t size, Sink & sink, - const ExperimentalFeatureSettings & xpSettings) -{ - xpSettings.require(Xp::GitHashing); - auto s = fmt("blob %d\0"s, std::to_string(size)); - sink(s); -} - - -void dumpTree(const Tree & entries, Sink & sink, - const ExperimentalFeatureSettings & xpSettings) -{ - xpSettings.require(Xp::GitHashing); - - std::string v1; - - for (auto & [name, entry] : entries) { - auto name2 = name; - if (entry.mode == Mode::Directory) { - assert(name2.back() == '/'); - name2.pop_back(); - } - v1 += fmt("%o %s\0"s, static_cast(entry.mode), name2); - std::copy(entry.hash.hash, entry.hash.hash + entry.hash.hashSize, std::back_inserter(v1)); - } - - { - auto s = fmt("tree %d\0"s, v1.size()); - sink(s); - } - - sink(v1); -} - - -Mode dump( - SourceAccessor & accessor, const CanonPath & path, - Sink & sink, - std::function hook, - PathFilter & filter, - const ExperimentalFeatureSettings & xpSettings) -{ - auto st = accessor.lstat(path); - - switch (st.type) { - case SourceAccessor::tRegular: - { - accessor.readFile(path, sink, [&](uint64_t size) { - dumpBlobPrefix(size, sink, xpSettings); - }); - return st.isExecutable - ? Mode::Executable - : Mode::Regular; - } - - case SourceAccessor::tDirectory: - { - Tree entries; - for (auto & [name, _] : accessor.readDirectory(path)) { - auto child = path / name; - if (!filter(child.abs())) continue; - - auto entry = hook(child); - - auto name2 = name; - if (entry.mode == Mode::Directory) - name2 += "/"; - - entries.insert_or_assign(std::move(name2), std::move(entry)); - } - dumpTree(entries, sink, xpSettings); - return Mode::Directory; - } - - case SourceAccessor::tSymlink: - case SourceAccessor::tMisc: - default: - throw Error("file '%1%' has an unsupported type", path); - } -} - - -TreeEntry dumpHash( - HashAlgorithm ha, - SourceAccessor & accessor, const CanonPath & path, PathFilter & filter) -{ - std::function hook; - hook = [&](const CanonPath & path) -> TreeEntry { - auto hashSink = HashSink(ha); - auto mode = dump(accessor, path, hashSink, hook, filter); - auto hash = hashSink.finish().first; - return { - .mode = mode, - .hash = hash, - }; - }; - - return hook(path); -} - - std::optional parseLsRemoteLine(std::string_view line) { const static std::regex line_regex("^(ref: *)?([^\\s]+)(?:\\t+(.*))?$"); diff --git a/src/libutil/git.hh b/src/libutil/git.hh index d9eb138e1..dea351929 100644 --- a/src/libutil/git.hh +++ b/src/libutil/git.hh @@ -5,160 +5,8 @@ #include #include -#include "types.hh" -#include "serialise.hh" -#include "hash.hh" -#include "source-accessor.hh" -#include "fs-sink.hh" - namespace nix::git { -enum struct ObjectType { - Blob, - Tree, - //Commit, - //Tag, -}; - -using RawMode = uint32_t; - -enum struct Mode : RawMode { - Directory = 0040000, - Regular = 0100644, - Executable = 0100755, - Symlink = 0120000, -}; - -std::optional decodeMode(RawMode m); - -/** - * An anonymous Git tree object entry (no name part). - */ -struct TreeEntry -{ - Mode mode; - Hash hash; - - GENERATE_CMP(TreeEntry, me->mode, me->hash); -}; - -/** - * A Git tree object, fully decoded and stored in memory. - * - * Directory names must end in a `/` for sake of sorting. See - * https://github.com/mirage/irmin/issues/352 - */ -using Tree = std::map; - -/** - * Callback for processing a child hash with `parse` - * - * The function should - * - * 1. Obtain the file system objects denoted by `gitHash` - * - * 2. Ensure they match `mode` - * - * 3. Feed them into the same sink `parse` was called with - * - * Implementations may seek to memoize resources (bandwidth, storage, - * etc.) for the same Git hash. - */ -using SinkHook = void(const Path & name, TreeEntry entry); - -/** - * Parse the "blob " or "tree " prefix. - * - * @throws if prefix not recognized - */ -ObjectType parseObjectType( - Source & source, - const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); - -void parseBlob( - FileSystemObjectSink & sink, const Path & sinkPath, - Source & source, - bool executable, - const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); - -void parseTree( - FileSystemObjectSink & sink, const Path & sinkPath, - Source & source, - std::function hook, - const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); - -/** - * Helper putting the previous three `parse*` functions together. - */ -void parse( - FileSystemObjectSink & sink, const Path & sinkPath, - Source & source, - bool executable, - std::function hook, - const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); - -/** - * Assists with writing a `SinkHook` step (2). - */ -std::optional convertMode(SourceAccessor::Type type); - -/** - * Simplified version of `SinkHook` for `restore`. - * - * Given a `Hash`, return a `SourceAccessor` and `CanonPath` pointing to - * the file system object with that path. - */ -using RestoreHook = std::pair(Hash); - -/** - * Wrapper around `parse` and `RestoreSink` - */ -void restore(FileSystemObjectSink & sink, Source & source, std::function hook); - -/** - * Dumps a single file to a sink - * - * @param xpSettings for testing purposes - */ -void dumpBlobPrefix( - uint64_t size, Sink & sink, - const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); - -/** - * Dumps a representation of a git tree to a sink - */ -void dumpTree( - const Tree & entries, Sink & sink, - const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); - -/** - * Callback for processing a child with `dump` - * - * The function should return the Git hash and mode of the file at the - * given path in the accessor passed to `dump`. - * - * Note that if the child is a directory, its child in must also be so - * processed in order to compute this information. - */ -using DumpHook = TreeEntry(const CanonPath & path); - -Mode dump( - SourceAccessor & accessor, const CanonPath & path, - Sink & sink, - std::function hook, - PathFilter & filter = defaultPathFilter, - const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); - -/** - * Recursively dumps path, hashing as we go. - * - * A smaller wrapper around `dump`. - */ -TreeEntry dumpHash( - HashAlgorithm ha, - SourceAccessor & accessor, const CanonPath & path, - PathFilter & filter = defaultPathFilter); - /** * A line from the output of `git ls-remote --symref`. * diff --git a/tests/unit/libutil/git.cc b/tests/unit/libutil/git.cc index 76ef86bcf..73bbd049e 100644 --- a/tests/unit/libutil/git.cc +++ b/tests/unit/libutil/git.cc @@ -9,211 +9,6 @@ namespace nix { using namespace git; -class GitTest : public CharacterizationTest -{ - Path unitTestData = getUnitTestData() + "/git"; - -public: - - Path goldenMaster(std::string_view testStem) const override { - return unitTestData + "/" + testStem; - } - - /** - * We set these in tests rather than the regular globals so we don't have - * to worry about race conditions if the tests run concurrently. - */ - ExperimentalFeatureSettings mockXpSettings; - -private: - - void SetUp() override - { - mockXpSettings.set("experimental-features", "git-hashing"); - } -}; - -TEST(GitMode, gitMode_directory) { - Mode m = Mode::Directory; - RawMode r = 0040000; - ASSERT_EQ(static_cast(m), r); - ASSERT_EQ(decodeMode(r), std::optional { m }); -}; - -TEST(GitMode, gitMode_executable) { - Mode m = Mode::Executable; - RawMode r = 0100755; - ASSERT_EQ(static_cast(m), r); - ASSERT_EQ(decodeMode(r), std::optional { m }); -}; - -TEST(GitMode, gitMode_regular) { - Mode m = Mode::Regular; - RawMode r = 0100644; - ASSERT_EQ(static_cast(m), r); - ASSERT_EQ(decodeMode(r), std::optional { m }); -}; - -TEST(GitMode, gitMode_symlink) { - Mode m = Mode::Symlink; - RawMode r = 0120000; - ASSERT_EQ(static_cast(m), r); - ASSERT_EQ(decodeMode(r), std::optional { m }); -}; - -TEST_F(GitTest, blob_read) { - readTest("hello-world-blob.bin", [&](const auto & encoded) { - StringSource in { encoded }; - StringSink out; - RegularFileSink out2 { out }; - ASSERT_EQ(parseObjectType(in, mockXpSettings), ObjectType::Blob); - parseBlob(out2, "", in, false, mockXpSettings); - - auto expected = readFile(goldenMaster("hello-world.bin")); - - ASSERT_EQ(out.s, expected); - }); -} - -TEST_F(GitTest, blob_write) { - writeTest("hello-world-blob.bin", [&]() { - auto decoded = readFile(goldenMaster("hello-world.bin")); - StringSink s; - dumpBlobPrefix(decoded.size(), s, mockXpSettings); - s(decoded); - return s.s; - }); -} - -/** - * This data is for "shallow" tree tests. However, we use "real" hashes - * so that we can check our test data in a small shell script test test - * (`tests/unit/libutil/data/git/check-data.sh`). - */ -const static Tree tree = { - { - "Foo", - { - .mode = Mode::Regular, - // hello world with special chars from above - .hash = Hash::parseAny("63ddb340119baf8492d2da53af47e8c7cfcd5eb2", HashAlgorithm::SHA1), - }, - }, - { - "bAr", - { - .mode = Mode::Executable, - // ditto - .hash = Hash::parseAny("63ddb340119baf8492d2da53af47e8c7cfcd5eb2", HashAlgorithm::SHA1), - }, - }, - { - "baZ/", - { - .mode = Mode::Directory, - // Empty directory hash - .hash = Hash::parseAny("4b825dc642cb6eb9a060e54bf8d69288fbee4904", HashAlgorithm::SHA1), - }, - }, -}; - -TEST_F(GitTest, tree_read) { - readTest("tree.bin", [&](const auto & encoded) { - StringSource in { encoded }; - NullFileSystemObjectSink out; - Tree got; - ASSERT_EQ(parseObjectType(in, mockXpSettings), ObjectType::Tree); - parseTree(out, "", in, [&](auto & name, auto entry) { - auto name2 = name; - if (entry.mode == Mode::Directory) - name2 += '/'; - got.insert_or_assign(name2, std::move(entry)); - }, mockXpSettings); - - ASSERT_EQ(got, tree); - }); -} - -TEST_F(GitTest, tree_write) { - writeTest("tree.bin", [&]() { - StringSink s; - dumpTree(tree, s, mockXpSettings); - return s.s; - }); -} - -TEST_F(GitTest, both_roundrip) { - using File = MemorySourceAccessor::File; - - MemorySourceAccessor files; - files.root = File::Directory { - .contents { - { - "foo", - File::Regular { - .contents = "hello\n\0\n\tworld!", - }, - }, - { - "bar", - File::Directory { - .contents = { - { - "baz", - File::Regular { - .executable = true, - .contents = "good day,\n\0\n\tworld!", - }, - }, - }, - }, - }, - }, - }; - - std::map cas; - - std::function dumpHook; - dumpHook = [&](const CanonPath & path) { - StringSink s; - HashSink hashSink { HashAlgorithm::SHA1 }; - TeeSink s2 { s, hashSink }; - auto mode = dump( - files, path, s2, dumpHook, - defaultPathFilter, mockXpSettings); - auto hash = hashSink.finish().first; - cas.insert_or_assign(hash, std::move(s.s)); - return TreeEntry { - .mode = mode, - .hash = hash, - }; - }; - - auto root = dumpHook(CanonPath::root); - - MemorySourceAccessor files2; - - MemorySink sinkFiles2 { files2 }; - - std::function mkSinkHook; - mkSinkHook = [&](auto prefix, auto & hash, auto executable) { - StringSource in { cas[hash] }; - parse( - sinkFiles2, prefix, in, executable, - [&](const Path & name, const auto & entry) { - mkSinkHook( - prefix + "/" + name, - entry.hash, - entry.mode == Mode::Executable); - }, - mockXpSettings); - }; - - mkSinkHook("", root.hash, false); - - ASSERT_EQ(files, files2); -} - TEST(GitLsRemote, parseSymrefLineWithReference) { auto line = "ref: refs/head/main HEAD"; auto res = parseLsRemoteLine(line);