From 04836c73e5589ec10bef08992a7ef815a7f7592c Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 21 Jan 2024 14:01:57 -0500 Subject: [PATCH 1/3] Merge `nativeCheckInputs` into `nativeBuildInputs` They were getting skipped for the test-against checks. --- package.nix | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/package.nix b/package.nix index d1d14d10e..1f895e301 100644 --- a/package.nix +++ b/package.nix @@ -209,6 +209,10 @@ in { (lib.getBin lowdown) mdbook mdbook-linkcheck + ] ++ lib.optionals doInstallCheck [ + git + mercurial + openssh ] ++ lib.optionals (doInstallCheck || enableManual) [ jq # Also for custom mdBook preprocessor. ] ++ lib.optional stdenv.hostPlatform.isLinux util-linux @@ -249,12 +253,6 @@ in { dontBuild = !attrs.doBuild; doCheck = attrs.doCheck; - nativeCheckInputs = [ - git - mercurial - openssh - ]; - disallowedReferences = [ boost ]; preConfigure = lib.optionalString (doBuild && ! stdenv.hostPlatform.isStatic) ( From 201551c937c3f816a23c4c2f36edba60619e42f9 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 4 Sep 2023 09:51:23 -0400 Subject: [PATCH 2/3] Add Git object hashing to the store layer Part of RFC 133 Extracted from our old IPFS branches. Co-Authored-By: Matthew Bauer Co-Authored-By: Carlo Nucera Co-authored-by: Robert Hensing Co-authored-by: Florian Klink --- Makefile | 1 + doc/manual/src/protocols/store-path.md | 9 +- perl/lib/Nix/Store.xs | 2 +- src/libexpr/primops.cc | 5 +- src/libstore/binary-cache-store.cc | 7 +- src/libstore/binary-cache-store.hh | 2 +- src/libstore/build/local-derivation-goal.cc | 34 ++++-- src/libstore/build/worker.cc | 4 +- src/libstore/content-address.cc | 11 ++ src/libstore/daemon.cc | 7 +- src/libstore/local-fs-store.hh | 2 +- src/libstore/local-store.cc | 85 ++++++++++++--- src/libstore/optimise-store.cc | 4 +- src/libstore/remote-store.cc | 1 + src/libstore/remote-store.hh | 2 +- src/libstore/store-api.cc | 45 +++++++- src/libstore/uds-remote-store.hh | 2 +- src/libutil/file-content-address.cc | 81 +++++++++++--- src/libutil/file-content-address.hh | 110 ++++++++++++++++---- src/nix-store/nix-store.cc | 2 +- src/nix/add-to-store.cc | 1 + src/nix/hash.cc | 47 +++++++-- tests/functional/git-hashing/common.sh | 11 ++ tests/functional/git-hashing/local.mk | 7 ++ tests/functional/git-hashing/simple.sh | 58 +++++++++++ tests/unit/libstore/content-address.cc | 2 + tests/unit/libutil/file-content-address.cc | 28 +++++ 27 files changed, 484 insertions(+), 86 deletions(-) create mode 100644 tests/functional/git-hashing/common.sh create mode 100644 tests/functional/git-hashing/local.mk create mode 100644 tests/functional/git-hashing/simple.sh diff --git a/Makefile b/Makefile index f8689c8cf..745e60aa5 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,7 @@ ifeq ($(ENABLE_FUNCTIONAL_TESTS), yes) makefiles += \ tests/functional/local.mk \ tests/functional/ca/local.mk \ + tests/functional/git-hashing/local.mk \ tests/functional/dyn-drv/local.mk \ tests/functional/test-libstoreconsumer/local.mk \ tests/functional/plugins/local.mk diff --git a/doc/manual/src/protocols/store-path.md b/doc/manual/src/protocols/store-path.md index fcf8038fc..565c4fa75 100644 --- a/doc/manual/src/protocols/store-path.md +++ b/doc/manual/src/protocols/store-path.md @@ -89,15 +89,20 @@ where - `rec` = one of: + - ```ebnf + | "" + ``` + (empty string) for hashes of the flat (single file) serialization + - ```ebnf | "r:" ``` hashes of the for [Nix Archive (NAR)] (arbitrary file system object) serialization - ```ebnf - | "" + | "git:" ``` - (empty string) for hashes of the flat (single file) serialization + hashes of the [Git blob/tree](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) [Merkel tree](https://en.wikipedia.org/wiki/Merkle_tree) format - ```ebnf algo = "md5" | "sha1" | "sha256" diff --git a/perl/lib/Nix/Store.xs b/perl/lib/Nix/Store.xs index 4a928594b..1c64cc66b 100644 --- a/perl/lib/Nix/Store.xs +++ b/perl/lib/Nix/Store.xs @@ -259,7 +259,7 @@ hashPath(char * algo, int base32, char * path) auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path); Hash h = hashPath( accessor, canonPath, - FileIngestionMethod::Recursive, parseHashAlgo(algo)).first; + FileIngestionMethod::Recursive, parseHashAlgo(algo)); auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false); XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0))); } catch (Error & e) { diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 850cc7a45..9ea266cf9 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1138,7 +1138,10 @@ drvName, Bindings * attrs, Value & v) auto handleHashMode = [&](const std::string_view s) { if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive; else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat; - else if (s == "text") { + else if (s == "git") { + experimentalFeatureSettings.require(Xp::GitHashing); + ingestionMethod = FileIngestionMethod::Git; + } else if (s == "text") { experimentalFeatureSettings.require(Xp::DynamicDerivations); ingestionMethod = TextIngestionMethod {}; } else diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 189d1d305..d6047dd7e 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -324,6 +324,7 @@ StorePath BinaryCacheStore::addToStoreFromDump( nar = dump2.s; break; case FileIngestionMethod::Flat: + { // The dump is Flat, so we need to convert it to NAR with a // single file. StringSink s; @@ -331,6 +332,10 @@ StorePath BinaryCacheStore::addToStoreFromDump( nar = std::move(s.s); break; } + case FileIngestionMethod::Git: + unsupported("addToStoreFromDump"); + break; + } } else { // Otherwise, we have to do th same hashing as NAR so our single // hash will suffice for both purposes. @@ -450,7 +455,7 @@ StorePath BinaryCacheStore::addToStore( non-recursive+sha256 so we can just use the default implementation of this method in terms of addToStoreFromDump. */ - auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first; + auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter); auto source = sinkToSource([&](Sink & sink) { accessor.dumpPath(path, sink, filter); diff --git a/src/libstore/binary-cache-store.hh b/src/libstore/binary-cache-store.hh index 00ab73905..76de2d11a 100644 --- a/src/libstore/binary-cache-store.hh +++ b/src/libstore/binary-cache-store.hh @@ -147,7 +147,7 @@ public: void narFromPath(const StorePath & path, Sink & sink) override; - ref getFSAccessor(bool requireValidPath) override; + ref getFSAccessor(bool requireValidPath = true) override; void addSignatures(const StorePath & storePath, const StringSet & sigs) override; diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index b373c74b2..d92966a74 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -8,6 +8,7 @@ #include "finally.hh" #include "util.hh" #include "archive.hh" +#include "git.hh" #include "compression.hh" #include "daemon.hh" #include "topo-sort.hh" @@ -2457,15 +2458,28 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() rewriteOutput(outputRewrites); /* FIXME optimize and deduplicate with addToStore */ std::string oldHashPart { scratchPath->hashPart() }; - auto got = ({ - HashModuloSink caSink { outputHash.hashAlgo, oldHashPart }; + auto got = [&]{ PosixSourceAccessor accessor; - dumpPath( - accessor, CanonPath { actualPath }, - caSink, - outputHash.method.getFileIngestionMethod()); - caSink.finish().first; - }); + auto fim = outputHash.method.getFileIngestionMethod(); + switch (fim) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + { + HashModuloSink caSink { outputHash.hashAlgo, oldHashPart }; + auto fim = outputHash.method.getFileIngestionMethod(); + dumpPath( + accessor, CanonPath { actualPath }, + caSink, + (FileSerialisationMethod) fim); + return caSink.finish().first; + } + case FileIngestionMethod::Git: { + return git::dumpHash( + outputHash.hashAlgo, accessor, + CanonPath { tmpDir + "/tmp" }).hash; + } + } + }(); ValidPathInfo newInfo0 { worker.store, @@ -2491,7 +2505,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() PosixSourceAccessor accessor; HashResult narHashAndSize = hashPath( accessor, CanonPath { actualPath }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256); + FileSerialisationMethod::Recursive, HashAlgorithm::SHA256); newInfo0.narHash = narHashAndSize.first; newInfo0.narSize = narHashAndSize.second; } @@ -2515,7 +2529,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() PosixSourceAccessor accessor; HashResult narHashAndSize = hashPath( accessor, CanonPath { actualPath }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256); + FileSerialisationMethod::Recursive, HashAlgorithm::SHA256); ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first }; newInfo0.narSize = narHashAndSize.second; auto refs = rewriteRefs(); diff --git a/src/libstore/build/worker.cc b/src/libstore/build/worker.cc index 3a34f4006..815ded3d5 100644 --- a/src/libstore/build/worker.cc +++ b/src/libstore/build/worker.cc @@ -529,11 +529,11 @@ bool Worker::pathContentsGood(const StorePath & path) if (!pathExists(store.printStorePath(path))) res = false; else { - HashResult current = hashPath( + Hash current = hashPath( *store.getFSAccessor(), CanonPath { store.printStorePath(path) }, FileIngestionMethod::Recursive, info->narHash.algo); Hash nullHash(HashAlgorithm::SHA256); - res = info->narHash == nullHash || info->narHash == current.first; + res = info->narHash == nullHash || info->narHash == current; } pathContentsGoodCache.insert_or_assign(path, res); if (!res) diff --git a/src/libstore/content-address.cc b/src/libstore/content-address.cc index 4e3d2f64d..4ed4f2de5 100644 --- a/src/libstore/content-address.cc +++ b/src/libstore/content-address.cc @@ -11,6 +11,9 @@ std::string_view makeFileIngestionPrefix(FileIngestionMethod m) return ""; case FileIngestionMethod::Recursive: return "r:"; + case FileIngestionMethod::Git: + experimentalFeatureSettings.require(Xp::GitHashing); + return "git:"; default: throw Error("impossible, caught both cases"); } @@ -51,6 +54,10 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m) if (splitPrefix(m, "r:")) { return FileIngestionMethod::Recursive; } + else if (splitPrefix(m, "git:")) { + experimentalFeatureSettings.require(Xp::GitHashing); + return FileIngestionMethod::Git; + } else if (splitPrefix(m, "text:")) { return TextIngestionMethod {}; } @@ -131,6 +138,10 @@ static std::pair parseContentAddressMethodP auto method = FileIngestionMethod::Flat; if (splitPrefix(rest, "r:")) method = FileIngestionMethod::Recursive; + else if (splitPrefix(rest, "git:")) { + experimentalFeatureSettings.require(Xp::GitHashing); + method = FileIngestionMethod::Git; + } HashAlgorithm hashAlgo = parseHashAlgorithm_(); return { std::move(method), diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index cf5020dfe..873065e14 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -13,6 +13,7 @@ #include "archive.hh" #include "derivations.hh" #include "args.hh" +#include "git.hh" namespace nix::daemon { @@ -443,13 +444,17 @@ static void performOp(TunnelLogger * logger, ref store, TeeSource savedNARSource(from, saved); NullFileSystemObjectSink sink; /* just parse the NAR */ parseDump(sink, savedNARSource); - } else { + } else if (method == FileIngestionMethod::Flat) { /* Incrementally parse the NAR file, stripping the metadata, and streaming the sole file we expect into `saved`. */ RegularFileSink savedRegular { saved }; parseDump(savedRegular, from); if (!savedRegular.regular) throw Error("regular file expected"); + } else { + /* Should have validated above that no other file ingestion + method was used. */ + assert(false); } }); logger->startWork(); diff --git a/src/libstore/local-fs-store.hh b/src/libstore/local-fs-store.hh index bf855b67e..8fb081200 100644 --- a/src/libstore/local-fs-store.hh +++ b/src/libstore/local-fs-store.hh @@ -43,7 +43,7 @@ public: LocalFSStore(const Params & params); void narFromPath(const StorePath & path, Sink & sink) override; - ref getFSAccessor(bool requireValidPath) override; + ref getFSAccessor(bool requireValidPath = true) override; /** * Creates symlink from the `gcRoot` to the `storePath` and diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 2c22bfe31..5f35cf3a8 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1,5 +1,6 @@ #include "local-store.hh" #include "globals.hh" +#include "git.hh" #include "archive.hh" #include "pathlocks.hh" #include "worker-protocol.hh" @@ -1097,19 +1098,29 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, if (info.ca) { auto & specified = *info.ca; auto actualHash = ({ - HashModuloSink caSink { - specified.hash.algo, - std::string { info.path.hashPart() }, - }; - PosixSourceAccessor accessor; - dumpPath( - *getFSAccessor(false), - CanonPath { printStorePath(info.path) }, - caSink, - specified.method.getFileIngestionMethod()); + auto accessor = getFSAccessor(false); + CanonPath path { printStorePath(info.path) }; + Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++ + auto fim = specified.method.getFileIngestionMethod(); + switch (fim) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + { + HashModuloSink caSink { + specified.hash.algo, + std::string { info.path.hashPart() }, + }; + dumpPath(*accessor, path, caSink, (FileSerialisationMethod) fim); + h = caSink.finish().first; + break; + } + case FileIngestionMethod::Git: + h = git::dumpHash(specified.hash.algo, *accessor, path).hash; + break; + } ContentAddress { .method = specified.method, - .hash = caSink.finish().first, + .hash = std::move(h), }; }); if (specified.hash != actualHash.hash) { @@ -1199,7 +1210,30 @@ StorePath LocalStore::addToStoreFromDump( delTempDir = std::make_unique(tempDir); tempPath = tempDir + "/x"; - restorePath(tempPath, bothSource, method.getFileIngestionMethod()); + auto fim = method.getFileIngestionMethod(); + switch (fim) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + restorePath(tempPath, bothSource, (FileSerialisationMethod) fim); + break; + case FileIngestionMethod::Git: { + RestoreSink sink; + sink.dstPath = tempPath; + auto accessor = getFSAccessor(); + git::restore(sink, bothSource, [&](Hash childHash) { + return std::pair { + &*accessor, + CanonPath { + printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo { + .method = FileIngestionMethod::Git, + .hash = childHash, + })) + }, + }; + }); + break; + } + } dumpBuffer.reset(); dump = {}; @@ -1238,7 +1272,30 @@ StorePath LocalStore::addToStoreFromDump( if (inMemory) { StringSource dumpSource { dump }; /* Restore from the buffer in memory. */ - restorePath(realPath, dumpSource, method.getFileIngestionMethod()); + auto fim = method.getFileIngestionMethod(); + switch (fim) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + restorePath(realPath, dumpSource, (FileSerialisationMethod) fim); + break; + case FileIngestionMethod::Git: { + RestoreSink sink; + sink.dstPath = realPath; + auto accessor = getFSAccessor(); + git::restore(sink, dumpSource, [&](Hash childHash) { + return std::pair { + &*accessor, + CanonPath { + printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo { + .method = FileIngestionMethod::Git, + .hash = childHash, + })) + }, + }; + }); + break; + } + } } else { /* Move the temporary path we restored above. */ moveFile(tempPath, realPath); @@ -1367,7 +1424,7 @@ bool LocalStore::verifyStore(bool checkContents, RepairFlag repair) PosixSourceAccessor accessor; std::string hash = hashPath( accessor, CanonPath { linkPath }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first.to_string(HashFormat::Nix32, false); + FileIngestionMethod::Recursive, HashAlgorithm::SHA256).to_string(HashFormat::Nix32, false); if (hash != link.name) { printError("link '%s' was modified! expected hash '%s', got '%s'", linkPath, link.name, hash); diff --git a/src/libstore/optimise-store.cc b/src/libstore/optimise-store.cc index 78e4f6d86..daaaaf073 100644 --- a/src/libstore/optimise-store.cc +++ b/src/libstore/optimise-store.cc @@ -151,7 +151,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats, PosixSourceAccessor accessor; hashPath( accessor, CanonPath { path }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first; + FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first; }); debug("'%1%' has hash '%2%'", path, hash.to_string(HashFormat::Nix32, true)); @@ -166,7 +166,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats, PosixSourceAccessor accessor; hashPath( accessor, CanonPath { linkPath }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first; + FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first; }))) { // XXX: Consider overwriting linkPath with our valid version. diff --git a/src/libstore/remote-store.cc b/src/libstore/remote-store.cc index fadef45ff..0cae84828 100644 --- a/src/libstore/remote-store.cc +++ b/src/libstore/remote-store.cc @@ -13,6 +13,7 @@ #include "derivations.hh" #include "pool.hh" #include "finally.hh" +#include "git.hh" #include "logging.hh" #include "callback.hh" #include "filetransfer.hh" diff --git a/src/libstore/remote-store.hh b/src/libstore/remote-store.hh index 87704985b..c51a21375 100644 --- a/src/libstore/remote-store.hh +++ b/src/libstore/remote-store.hh @@ -184,7 +184,7 @@ protected: friend struct ConnectionHandle; - virtual ref getFSAccessor(bool requireValidPath) override; + virtual ref getFSAccessor(bool requireValidPath = true) override; virtual void narFromPath(const StorePath & path, Sink & sink) override; diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 4238cbbf5..c44612ec5 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -12,7 +12,9 @@ #include "references.hh" #include "archive.hh" #include "callback.hh" +#include "git.hh" #include "remote-store.hh" +#include "posix-source-accessor.hh" // FIXME this should not be here, see TODO below on // `addMultipleToStore`. #include "worker-protocol.hh" @@ -119,6 +121,9 @@ static std::string makeType( StorePath StoreDirConfig::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const { + if (info.method == FileIngestionMethod::Git && info.hash.algo != HashAlgorithm::SHA1) + throw Error("Git file ingestion must use SHA-1 hash"); + if (info.hash.algo == HashAlgorithm::SHA256 && info.method == FileIngestionMethod::Recursive) { return makeStorePath(makeType(*this, "source", info.references), info.hash, name); } else { @@ -166,7 +171,7 @@ std::pair StoreDirConfig::computeStorePath( const StorePathSet & references, PathFilter & filter) const { - auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first; + auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter); return { makeFixedOutputPathFromCA( name, @@ -193,7 +198,37 @@ StorePath Store::addToStore( RepairFlag repair) { auto source = sinkToSource([&](Sink & sink) { - dumpPath(accessor, path, sink, method.getFileIngestionMethod(), filter); + auto fim = method.getFileIngestionMethod(); + switch (fim) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + { + dumpPath(accessor, path, sink, (FileSerialisationMethod) fim, filter); + break; + } + case FileIngestionMethod::Git: + { + git::dump( + accessor, path, + sink, + // recursively add to store if path is a directory + [&](const CanonPath & path) -> git::TreeEntry { + auto storePath = addToStore("git", accessor, path, method, hashAlgo, references, filter, repair); + auto info = queryPathInfo(storePath); + assert(info->ca); + assert(info->ca->method == FileIngestionMethod::Git); + auto stat = getFSAccessor()->lstat(CanonPath(printStorePath(storePath))); + auto gitModeOpt = git::convertMode(stat.type); + assert(gitModeOpt); + return { + .mode = *gitModeOpt, + .hash = info->ca->hash, + }; + }, + filter); + break; + } + } }); return addToStoreFromDump(*source, name, method, hashAlgo, references, repair); } @@ -355,9 +390,7 @@ ValidPathInfo Store::addToStoreSlow( NullFileSystemObjectSink blank; auto & parseSink = method.getFileIngestionMethod() == FileIngestionMethod::Flat ? (FileSystemObjectSink &) fileSink - : method.getFileIngestionMethod() == FileIngestionMethod::Recursive - ? (FileSystemObjectSink &) blank - : (abort(), (FileSystemObjectSink &)*(FileSystemObjectSink *)nullptr); // handled both cases + : (FileSystemObjectSink &) blank; // for recursive or git we do recursive /* The information that flows from tapped (besides being replicated in narSink), is now put in parseSink. */ @@ -369,6 +402,8 @@ ValidPathInfo Store::addToStoreSlow( auto hash = method == FileIngestionMethod::Recursive && hashAlgo == HashAlgorithm::SHA256 ? narHash + : method == FileIngestionMethod::Git + ? git::dumpHash(hashAlgo, accessor, srcPath).hash : caHashSink.finish().first; if (expectedCAHash && expectedCAHash != hash) diff --git a/src/libstore/uds-remote-store.hh b/src/libstore/uds-remote-store.hh index a5ac9080a..8bce8994a 100644 --- a/src/libstore/uds-remote-store.hh +++ b/src/libstore/uds-remote-store.hh @@ -35,7 +35,7 @@ public: static std::set uriSchemes() { return {"unix"}; } - ref getFSAccessor(bool requireValidPath) override + ref getFSAccessor(bool requireValidPath = true) override { return LocalFSStore::getFSAccessor(requireValidPath); } void narFromPath(const StorePath & path, Sink & sink) override diff --git a/src/libutil/file-content-address.cc b/src/libutil/file-content-address.cc index 2339024a2..471bda6a0 100644 --- a/src/libutil/file-content-address.cc +++ b/src/libutil/file-content-address.cc @@ -1,16 +1,53 @@ #include "file-content-address.hh" #include "archive.hh" +#include "git.hh" namespace nix { -FileIngestionMethod parseFileIngestionMethod(std::string_view input) +static std::optional parseFileSerialisationMethodOpt(std::string_view input) { if (input == "flat") { - return FileIngestionMethod::Flat; + return FileSerialisationMethod::Flat; } else if (input == "nar") { - return FileIngestionMethod::Recursive; + return FileSerialisationMethod::Recursive; } else { - throw UsageError("Unknown file ingestion method '%s', expect `flat` or `nar`"); + return std::nullopt; + } +} + +FileSerialisationMethod parseFileSerialisationMethod(std::string_view input) +{ + auto ret = parseFileSerialisationMethodOpt(input); + if (ret) + return *ret; + else + throw UsageError("Unknown file serialiation method '%s', expect `flat` or `nar`"); +} + + +FileIngestionMethod parseFileIngestionMethod(std::string_view input) +{ + if (input == "git") { + return FileIngestionMethod::Git; + } else { + auto ret = parseFileSerialisationMethodOpt(input); + if (ret) + return static_cast(*ret); + else + throw UsageError("Unknown file ingestion method '%s', expect `flat`, `nar`, or `git`"); + } +} + + +std::string_view renderFileSerialisationMethod(FileSerialisationMethod method) +{ + switch (method) { + case FileSerialisationMethod::Flat: + return "flat"; + case FileSerialisationMethod::Recursive: + return "nar"; + default: + assert(false); } } @@ -19,9 +56,11 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method) { switch (method) { case FileIngestionMethod::Flat: - return "flat"; case FileIngestionMethod::Recursive: - return "nar"; + return renderFileSerialisationMethod( + static_cast(method)); + case FileIngestionMethod::Git: + return "git"; default: abort(); } @@ -31,14 +70,14 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method) void dumpPath( SourceAccessor & accessor, const CanonPath & path, Sink & sink, - FileIngestionMethod method, + FileSerialisationMethod method, PathFilter & filter) { switch (method) { - case FileIngestionMethod::Flat: + case FileSerialisationMethod::Flat: accessor.readFile(path, sink); break; - case FileIngestionMethod::Recursive: + case FileSerialisationMethod::Recursive: accessor.dumpPath(path, sink, filter); break; } @@ -48,13 +87,13 @@ void dumpPath( void restorePath( const Path & path, Source & source, - FileIngestionMethod method) + FileSerialisationMethod method) { switch (method) { - case FileIngestionMethod::Flat: + case FileSerialisationMethod::Flat: writeFile(path, source); break; - case FileIngestionMethod::Recursive: + case FileSerialisationMethod::Recursive: restorePath(path, source); break; } @@ -63,7 +102,7 @@ void restorePath( HashResult hashPath( SourceAccessor & accessor, const CanonPath & path, - FileIngestionMethod method, HashAlgorithm ha, + FileSerialisationMethod method, HashAlgorithm ha, PathFilter & filter) { HashSink sink { ha }; @@ -71,4 +110,20 @@ HashResult hashPath( return sink.finish(); } + +Hash hashPath( + SourceAccessor & accessor, const CanonPath & path, + FileIngestionMethod method, HashAlgorithm ht, + PathFilter & filter) +{ + switch (method) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + return hashPath(accessor, path, (FileSerialisationMethod) method, ht, filter).first; + case FileIngestionMethod::Git: + return git::dumpHash(ht, accessor, path, filter).hash; + } + +} + } diff --git a/src/libutil/file-content-address.hh b/src/libutil/file-content-address.hh index 9a7dae8c6..b361ab243 100644 --- a/src/libutil/file-content-address.hh +++ b/src/libutil/file-content-address.hh @@ -8,37 +8,38 @@ namespace nix { /** - * An enumeration of the main ways we can serialize file system + * An enumeration of the ways we can serialize file system * objects. */ -enum struct FileIngestionMethod : uint8_t { +enum struct FileSerialisationMethod : uint8_t { /** - * Flat-file hashing. Directly ingest the contents of a single file + * Flat-file. The contents of a single file exactly. */ - Flat = 0, + Flat, + /** - * Recursive (or NAR) hashing. Serializes the file-system object in - * Nix Archive format and ingest that. + * Nix Archive. Serializes the file-system object in + * Nix Archive format. */ - Recursive = 1, + Recursive, }; /** - * Parse a `FileIngestionMethod` by name. Choice of: + * Parse a `FileSerialisationMethod` by name. Choice of: * - * - `flat`: `FileIngestionMethod::Flat` - * - `nar`: `FileIngestionMethod::Recursive` + * - `flat`: `FileSerialisationMethod::Flat` + * - `nar`: `FileSerialisationMethod::Recursive` * - * Oppostite of `renderFileIngestionMethod`. + * Opposite of `renderFileSerialisationMethod`. */ -FileIngestionMethod parseFileIngestionMethod(std::string_view input); +FileSerialisationMethod parseFileSerialisationMethod(std::string_view input); /** - * Render a `FileIngestionMethod` by name. + * Render a `FileSerialisationMethod` by name. * - * Oppostite of `parseFileIngestionMethod`. + * Opposite of `parseFileSerialisationMethod`. */ -std::string_view renderFileIngestionMethod(FileIngestionMethod method); +std::string_view renderFileSerialisationMethod(FileSerialisationMethod method); /** * Dump a serialization of the given file system object. @@ -46,26 +47,97 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method); void dumpPath( SourceAccessor & accessor, const CanonPath & path, Sink & sink, - FileIngestionMethod method, + FileSerialisationMethod method, PathFilter & filter = defaultPathFilter); /** - * Restore a serialization of the given file system object. + * Restore a serialisation of the given file system object. * * @TODO use an arbitrary `FileSystemObjectSink`. */ void restorePath( const Path & path, Source & source, - FileIngestionMethod method); + FileSerialisationMethod method); + /** * Compute the hash of the given file system object according to the * given method. * - * The hash is defined as (essentially) hashString(ha, dumpPath(path)). + * the hash is defined as (in pseudocode): + * + * ``` + * hashString(ha, dumpPath(...)) + * ``` */ HashResult hashPath( + SourceAccessor & accessor, const CanonPath & path, + FileSerialisationMethod method, HashAlgorithm ha, + PathFilter & filter = defaultPathFilter); + +/** + * An enumeration of the ways we can ingest file system + * objects, producing a hash or digest. + */ +enum struct FileIngestionMethod : uint8_t { + /** + * Hash `FileSerialisationMethod::Flat` serialisation. + */ + Flat, + + /** + * Hash `FileSerialisationMethod::Git` serialisation. + */ + Recursive, + + /** + * Git hashing. In particular files are hashed as git "blobs", and + * directories are hashed as git "trees". + * + * Unlike `Flat` and `Recursive`, this is not a hash of a single + * serialisation but a [Merkle + * DAG](https://en.wikipedia.org/wiki/Merkle_tree) of multiple + * rounds of serialisation and hashing. + * + * @note Git's data model is slightly different, in that a plain + * file doesn't have an executable bit, directory entries do + * instead. We decide treat a bare file as non-executable by fiat, + * as we do with `FileIngestionMethod::Flat` which also lacks this + * information. Thus, Git can encode some but all of Nix's "File + * System Objects", and this sort of hashing is likewise partial. + */ + Git, +}; + +/** + * Parse a `FileIngestionMethod` by name. Choice of: + * + * - `flat`: `FileIngestionMethod::Flat` + * - `nar`: `FileIngestionMethod::Recursive` + * - `git`: `FileIngestionMethod::Git` + * + * Opposite of `renderFileIngestionMethod`. + */ +FileIngestionMethod parseFileIngestionMethod(std::string_view input); + +/** + * Render a `FileIngestionMethod` by name. + * + * Opposite of `parseFileIngestionMethod`. + */ +std::string_view renderFileIngestionMethod(FileIngestionMethod method); + +/** + * Compute the hash of the given file system object according to the + * given method. + * + * Unlike the other `hashPath`, this works on an arbitrary + * `FileIngestionMethod` instead of `FileSerialisationMethod`, but + * doesn't return the size as this is this is not a both simple and + * useful defined for a merkle format. + */ +Hash hashPath( SourceAccessor & accessor, const CanonPath & path, FileIngestionMethod method, HashAlgorithm ha, PathFilter & filter = defaultPathFilter); diff --git a/src/nix-store/nix-store.cc b/src/nix-store/nix-store.cc index 99dbfe6e3..7c8905da6 100644 --- a/src/nix-store/nix-store.cc +++ b/src/nix-store/nix-store.cc @@ -555,7 +555,7 @@ static void registerValidity(bool reregister, bool hashGiven, bool canonicalise) HashResult hash = hashPath( *store->getFSAccessor(false), CanonPath { store->printStorePath(info->path) }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256); + FileSerialisationMethod::Recursive, HashAlgorithm::SHA256); info->narHash = hash.first; info->narSize = hash.second; } diff --git a/src/nix/add-to-store.cc b/src/nix/add-to-store.cc index ca2daecab..02154715f 100644 --- a/src/nix/add-to-store.cc +++ b/src/nix/add-to-store.cc @@ -2,6 +2,7 @@ #include "common-args.hh" #include "store-api.hh" #include "archive.hh" +#include "git.hh" #include "posix-source-accessor.hh" #include "misc-store-flags.hh" diff --git a/src/nix/hash.cc b/src/nix/hash.cc index 98d227f0e..f849bf0cf 100644 --- a/src/nix/hash.cc +++ b/src/nix/hash.cc @@ -5,6 +5,7 @@ #include "shared.hh" #include "references.hh" #include "archive.hh" +#include "git.hh" #include "posix-source-accessor.hh" #include "misc-store-flags.hh" @@ -66,9 +67,11 @@ struct CmdHashBase : Command { switch (mode) { case FileIngestionMethod::Flat: - return "print cryptographic hash of a regular file"; + return "print cryptographic hash of a regular file"; case FileIngestionMethod::Recursive: return "print cryptographic hash of the NAR serialisation of a path"; + case FileIngestionMethod::Git: + return "print cryptographic hash of the Git serialisation of a path"; default: assert(false); }; @@ -77,17 +80,41 @@ struct CmdHashBase : Command void run() override { for (auto path : paths) { + auto makeSink = [&]() -> std::unique_ptr { + if (modulus) + return std::make_unique(hashAlgo, *modulus); + else + return std::make_unique(hashAlgo); + }; - std::unique_ptr hashSink; - if (modulus) - hashSink = std::make_unique(hashAlgo, *modulus); - else - hashSink = std::make_unique(hashAlgo); + auto [accessor_, canonPath] = PosixSourceAccessor::createAtRoot(path); + auto & accessor = accessor_; + Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++ + switch (mode) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + { + auto hashSink = makeSink(); + dumpPath(accessor, canonPath, *hashSink, (FileSerialisationMethod) mode); + h = hashSink->finish().first; + break; + } + case FileIngestionMethod::Git: { + std::function hook; + hook = [&](const CanonPath & path) -> git::TreeEntry { + auto hashSink = makeSink(); + auto mode = dump(accessor, path, *hashSink, hook); + auto hash = hashSink->finish().first; + return { + .mode = mode, + .hash = hash, + }; + }; + h = hook(canonPath).hash; + break; + } + } - auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path); - dumpPath(accessor, canonPath, *hashSink, mode); - - Hash h = hashSink->finish().first; if (truncate && h.hashSize > 20) h = compressHash(h, 20); logger->cout(h.to_string(hashFormat, hashFormat == HashFormat::SRI)); } diff --git a/tests/functional/git-hashing/common.sh b/tests/functional/git-hashing/common.sh new file mode 100644 index 000000000..5de96e74f --- /dev/null +++ b/tests/functional/git-hashing/common.sh @@ -0,0 +1,11 @@ +source ../common.sh + +clearStore +clearCache + +# Need backend to support git-hashing too +requireDaemonNewerThan "2.18.0pre20230908" + +enableFeatures "git-hashing" + +restartDaemon diff --git a/tests/functional/git-hashing/local.mk b/tests/functional/git-hashing/local.mk new file mode 100644 index 000000000..ebec01940 --- /dev/null +++ b/tests/functional/git-hashing/local.mk @@ -0,0 +1,7 @@ +git-hashing-tests := \ + $(d)/simple.sh + +install-tests-groups += git-hashing + +clean-files += \ + $(d)/config.nix diff --git a/tests/functional/git-hashing/simple.sh b/tests/functional/git-hashing/simple.sh new file mode 100644 index 000000000..74b0220f8 --- /dev/null +++ b/tests/functional/git-hashing/simple.sh @@ -0,0 +1,58 @@ +source common.sh + +repo="$TEST_ROOT/scratch" +git init "$repo" + +git -C "$repo" config user.email "you@example.com" +git -C "$repo" config user.name "Your Name" + +try () { + hash=$(nix hash path --mode git --format base16 --algo sha1 $TEST_ROOT/hash-path) + [[ "$hash" == "$1" ]] + + git -C "$repo" rm -rf hash-path || true + cp -r "$TEST_ROOT/hash-path" "$TEST_ROOT/scratch/hash-path" + git -C "$repo" add hash-path + git -C "$repo" commit -m "x" + git -C "$repo" status + hash2=$(git -C "$TEST_ROOT/scratch" rev-parse HEAD:hash-path) + [[ "$hash2" = "$1" ]] +} + +# blob +rm -rf $TEST_ROOT/hash-path +echo "Hello World" > $TEST_ROOT/hash-path +try "557db03de997c86a4a028e1ebd3a1ceb225be238" + +# tree with children +rm -rf $TEST_ROOT/hash-path +mkdir $TEST_ROOT/hash-path +echo "Hello World" > $TEST_ROOT/hash-path/hello +echo "Run Hello World" > $TEST_ROOT/hash-path/executable +chmod +x $TEST_ROOT/hash-path/executable +try "e5c0a11a556801a5c9dcf330ca9d7e2c572697f4" + +rm -rf $TEST_ROOT/dummy1 +echo Hello World! > $TEST_ROOT/dummy1 +path1=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy1) +hash1=$(nix-store -q --hash $path1) +test "$hash1" = "sha256:1brffhvj2c0z6x8qismd43m0iy8dsgfmy10bgg9w11szway2wp9v" + +rm -rf $TEST_ROOT/dummy2 +mkdir -p $TEST_ROOT/dummy2 +echo Hello World! > $TEST_ROOT/dummy2/hello +path2=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy2) +hash2=$(nix-store -q --hash $path2) +test "$hash2" = "sha256:1vhv7zxam7x277q0y0jcypm7hwhccbzss81vkdgf0ww5sm2am4y0" + +rm -rf $TEST_ROOT/dummy3 +mkdir -p $TEST_ROOT/dummy3 +mkdir -p $TEST_ROOT/dummy3/dir +touch $TEST_ROOT/dummy3/dir/file +echo Hello World! > $TEST_ROOT/dummy3/dir/file +touch $TEST_ROOT/dummy3/dir/executable +chmod +x $TEST_ROOT/dummy3/dir/executable +echo Run Hello World! > $TEST_ROOT/dummy3/dir/executable +path3=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy3) +hash3=$(nix-store -q --hash $path3) +test "$hash3" = "sha256:08y3nm3mvn9qvskqnf13lfgax5lh73krxz4fcjd5cp202ggpw9nv" diff --git a/tests/unit/libstore/content-address.cc b/tests/unit/libstore/content-address.cc index 98c1eace3..cc1c7fcc6 100644 --- a/tests/unit/libstore/content-address.cc +++ b/tests/unit/libstore/content-address.cc @@ -13,6 +13,7 @@ TEST(ContentAddressMethod, testRoundTripPrintParse_1) { ContentAddressMethod { TextIngestionMethod {} }, ContentAddressMethod { FileIngestionMethod::Flat }, ContentAddressMethod { FileIngestionMethod::Recursive }, + ContentAddressMethod { FileIngestionMethod::Git }, }) { EXPECT_EQ(ContentAddressMethod::parse(cam.render()), cam); } @@ -23,6 +24,7 @@ TEST(ContentAddressMethod, testRoundTripPrintParse_2) { "text", "flat", "nar", + "git", }) { EXPECT_EQ(ContentAddressMethod::parse(camS).render(), camS); } diff --git a/tests/unit/libutil/file-content-address.cc b/tests/unit/libutil/file-content-address.cc index 2e819ce40..294e39806 100644 --- a/tests/unit/libutil/file-content-address.cc +++ b/tests/unit/libutil/file-content-address.cc @@ -4,6 +4,32 @@ namespace nix { +/* ---------------------------------------------------------------------------- + * parseFileSerialisationMethod, renderFileSerialisationMethod + * --------------------------------------------------------------------------*/ + +TEST(FileSerialisationMethod, testRoundTripPrintParse_1) { + for (const FileSerialisationMethod fim : { + FileSerialisationMethod::Flat, + FileSerialisationMethod::Recursive, + }) { + EXPECT_EQ(parseFileSerialisationMethod(renderFileSerialisationMethod(fim)), fim); + } +} + +TEST(FileSerialisationMethod, testRoundTripPrintParse_2) { + for (const std::string_view fimS : { + "flat", + "nar", + }) { + EXPECT_EQ(renderFileSerialisationMethod(parseFileSerialisationMethod(fimS)), fimS); + } +} + +TEST(FileSerialisationMethod, testParseFileSerialisationMethodOptException) { + EXPECT_THROW(parseFileSerialisationMethod("narwhal"), UsageError); +} + /* ---------------------------------------------------------------------------- * parseFileIngestionMethod, renderFileIngestionMethod * --------------------------------------------------------------------------*/ @@ -12,6 +38,7 @@ TEST(FileIngestionMethod, testRoundTripPrintParse_1) { for (const FileIngestionMethod fim : { FileIngestionMethod::Flat, FileIngestionMethod::Recursive, + FileIngestionMethod::Git, }) { EXPECT_EQ(parseFileIngestionMethod(renderFileIngestionMethod(fim)), fim); } @@ -21,6 +48,7 @@ TEST(FileIngestionMethod, testRoundTripPrintParse_2) { for (const std::string_view fimS : { "flat", "nar", + "git", }) { EXPECT_EQ(renderFileIngestionMethod(parseFileIngestionMethod(fimS)), fimS); } From d4ad1fcf303f6f34ebb30a82ebe6f99c26bef8cb Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 18 Jan 2024 23:57:26 -0500 Subject: [PATCH 3/3] Avoid creating temporary store object for git over the wire Instead, serialize as NAR and send that over, then rehash sever side. This is alorithmically simpler, but comes at the cost of a newer parameter to `Store::addToStoreFromDump`. Co-authored-by: Eelco Dolstra --- src/libexpr/primops.cc | 2 +- src/libstore/binary-cache-store.cc | 27 +++++--- src/libstore/binary-cache-store.hh | 3 +- src/libstore/build/local-derivation-goal.cc | 5 +- src/libstore/daemon.cc | 59 ++++++++-------- src/libstore/derivations.cc | 2 +- src/libstore/dummy-store.cc | 3 +- src/libstore/legacy-ssh-store.hh | 3 +- src/libstore/local-store.cc | 77 ++++++++------------- src/libstore/local-store.hh | 3 +- src/libstore/remote-store.cc | 20 +++++- src/libstore/remote-store.hh | 3 +- src/libstore/store-api.cc | 47 ++++--------- src/libstore/store-api.hh | 17 +++-- src/nix-env/user-env.cc | 2 +- src/nix/develop.cc | 2 +- 16 files changed, 137 insertions(+), 138 deletions(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 9ea266cf9..78f7f71ed 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -2092,7 +2092,7 @@ static void prim_toFile(EvalState & state, const PosIdx pos, Value * * args, Val }) : ({ StringSource s { contents }; - state.store->addToStoreFromDump(s, name, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair); + state.store->addToStoreFromDump(s, name, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair); }); /* Note: we don't need to add `context' to the context of the diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index d6047dd7e..bea2bb370 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -305,7 +305,8 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource StorePath BinaryCacheStore::addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) @@ -313,17 +314,26 @@ StorePath BinaryCacheStore::addToStoreFromDump( std::optional caHash; std::string nar; + // Calculating Git hash from NAR stream not yet implemented. May not + // be possible to implement in single-pass if the NAR is in an + // inconvenient order. Could fetch after uploading, however. + if (hashMethod.getFileIngestionMethod() == FileIngestionMethod::Git) + unsupported("addToStoreFromDump"); + if (auto * dump2p = dynamic_cast(&dump)) { auto & dump2 = *dump2p; // Hack, this gives us a "replayable" source so we can compute // multiple hashes more easily. - caHash = hashString(HashAlgorithm::SHA256, dump2.s); - switch (method.getFileIngestionMethod()) { - case FileIngestionMethod::Recursive: + // + // Only calculate if the dump is in the right format, however. + if (static_cast(dumpMethod) == hashMethod.getFileIngestionMethod()) + caHash = hashString(HashAlgorithm::SHA256, dump2.s); + switch (dumpMethod) { + case FileSerialisationMethod::Recursive: // The dump is already NAR in this case, just use it. nar = dump2.s; break; - case FileIngestionMethod::Flat: + case FileSerialisationMethod::Flat: { // The dump is Flat, so we need to convert it to NAR with a // single file. @@ -332,14 +342,11 @@ StorePath BinaryCacheStore::addToStoreFromDump( nar = std::move(s.s); break; } - case FileIngestionMethod::Git: - unsupported("addToStoreFromDump"); - break; } } else { // Otherwise, we have to do th same hashing as NAR so our single // hash will suffice for both purposes. - if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) + if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) unsupported("addToStoreFromDump"); } StringSource narDump { nar }; @@ -354,7 +361,7 @@ StorePath BinaryCacheStore::addToStoreFromDump( *this, name, ContentAddressWithReferences::fromParts( - method, + hashMethod, caHash ? *caHash : nar.first, { .others = references, diff --git a/src/libstore/binary-cache-store.hh b/src/libstore/binary-cache-store.hh index 76de2d11a..7c2828309 100644 --- a/src/libstore/binary-cache-store.hh +++ b/src/libstore/binary-cache-store.hh @@ -125,7 +125,8 @@ public: StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) override; diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index d92966a74..a9b8de123 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -1312,12 +1312,13 @@ struct RestrictedStore : public virtual RestrictedStoreConfig, public virtual In StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) override { - auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, references, repair); + auto path = next->addToStoreFromDump(dump, name, dumpMethod, hashMethod, hashAlgo, references, repair); goal.addDependency(path); return path; } diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index 873065e14..e1337f51d 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -401,11 +401,23 @@ static void performOp(TunnelLogger * logger, ref store, logger->startWork(); auto pathInfo = [&]() { // NB: FramedSource must be out of scope before logger->stopWork(); - auto [contentAddressMethod, hashAlgo_] = ContentAddressMethod::parseWithAlgo(camStr); - auto hashAlgo = hashAlgo_; // work around clang bug + auto [contentAddressMethod, hashAlgo] = ContentAddressMethod::parseWithAlgo(camStr); FramedSource source(from); + FileSerialisationMethod dumpMethod; + switch (contentAddressMethod.getFileIngestionMethod()) { + case FileIngestionMethod::Flat: + dumpMethod = FileSerialisationMethod::Flat; + break; + case FileIngestionMethod::Recursive: + dumpMethod = FileSerialisationMethod::Recursive; + break; + case FileIngestionMethod::Git: + // Use NAR; Git is not a serialization method + dumpMethod = FileSerialisationMethod::Recursive; + break; + } // TODO these two steps are essentially RemoteStore::addCAToStore. Move it up to Store. - auto path = store->addToStoreFromDump(source, name, contentAddressMethod, hashAlgo, refs, repair); + auto path = store->addToStoreFromDump(source, name, dumpMethod, contentAddressMethod, hashAlgo, refs, repair); return store->queryPathInfo(path); }(); logger->stopWork(); @@ -431,34 +443,23 @@ static void performOp(TunnelLogger * logger, ref store, hashAlgo = parseHashAlgo(hashAlgoRaw); } + // Old protocol always sends NAR, regardless of hashing method auto dumpSource = sinkToSource([&](Sink & saved) { - if (method == FileIngestionMethod::Recursive) { - /* We parse the NAR dump through into `saved` unmodified, - so why all this extra work? We still parse the NAR so - that we aren't sending arbitrary data to `saved` - unwittingly`, and we know when the NAR ends so we don't - consume the rest of `from` and can't parse another - command. (We don't trust `addToStoreFromDump` to not - eagerly consume the entire stream it's given, past the - length of the Nar. */ - TeeSource savedNARSource(from, saved); - NullFileSystemObjectSink sink; /* just parse the NAR */ - parseDump(sink, savedNARSource); - } else if (method == FileIngestionMethod::Flat) { - /* Incrementally parse the NAR file, stripping the - metadata, and streaming the sole file we expect into - `saved`. */ - RegularFileSink savedRegular { saved }; - parseDump(savedRegular, from); - if (!savedRegular.regular) throw Error("regular file expected"); - } else { - /* Should have validated above that no other file ingestion - method was used. */ - assert(false); - } + /* We parse the NAR dump through into `saved` unmodified, + so why all this extra work? We still parse the NAR so + that we aren't sending arbitrary data to `saved` + unwittingly`, and we know when the NAR ends so we don't + consume the rest of `from` and can't parse another + command. (We don't trust `addToStoreFromDump` to not + eagerly consume the entire stream it's given, past the + length of the Nar. */ + TeeSource savedNARSource(from, saved); + NullFileSystemObjectSink sink; /* just parse the NAR */ + parseDump(sink, savedNARSource); }); logger->startWork(); - auto path = store->addToStoreFromDump(*dumpSource, baseName, method, hashAlgo); + auto path = store->addToStoreFromDump( + *dumpSource, baseName, FileSerialisationMethod::Recursive, method, hashAlgo); logger->stopWork(); to << store->printStorePath(path); @@ -490,7 +491,7 @@ static void performOp(TunnelLogger * logger, ref store, logger->startWork(); auto path = ({ StringSource source { s }; - store->addToStoreFromDump(source, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair); + store->addToStoreFromDump(source, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair); }); logger->stopWork(); to << store->printStorePath(path); diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 305ed5b42..df14e979f 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -150,7 +150,7 @@ StorePath writeDerivation(Store & store, }) : ({ StringSource s { contents }; - store.addToStoreFromDump(s, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair); + store.addToStoreFromDump(s, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair); }); } diff --git a/src/libstore/dummy-store.cc b/src/libstore/dummy-store.cc index e4f13b8f4..30f23cff9 100644 --- a/src/libstore/dummy-store.cc +++ b/src/libstore/dummy-store.cc @@ -61,7 +61,8 @@ struct DummyStore : public virtual DummyStoreConfig, public virtual Store virtual StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) override diff --git a/src/libstore/legacy-ssh-store.hh b/src/libstore/legacy-ssh-store.hh index ae890177b..ca2f115d2 100644 --- a/src/libstore/legacy-ssh-store.hh +++ b/src/libstore/legacy-ssh-store.hh @@ -72,7 +72,8 @@ struct LegacySSHStore : public virtual LegacySSHStoreConfig, public virtual Stor virtual StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) override diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 5f35cf3a8..56f8c5dd8 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1148,7 +1148,8 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, StorePath LocalStore::addToStoreFromDump( Source & source0, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) @@ -1201,7 +1202,13 @@ StorePath LocalStore::addToStoreFromDump( Path tempDir; AutoCloseFD tempDirFd; - if (!inMemory) { + bool methodsMatch = (FileIngestionMethod) dumpMethod == hashMethod; + + /* If the methods don't match, our streaming hash of the dump is the + wrong sort, and we need to rehash. */ + bool inMemoryAndDontNeedRestore = inMemory && methodsMatch; + + if (!inMemoryAndDontNeedRestore) { /* Drain what we pulled so far, and then keep on pulling */ StringSource dumpSource { dump }; ChainSource bothSource { dumpSource, source }; @@ -1210,40 +1217,23 @@ StorePath LocalStore::addToStoreFromDump( delTempDir = std::make_unique(tempDir); tempPath = tempDir + "/x"; - auto fim = method.getFileIngestionMethod(); - switch (fim) { - case FileIngestionMethod::Flat: - case FileIngestionMethod::Recursive: - restorePath(tempPath, bothSource, (FileSerialisationMethod) fim); - break; - case FileIngestionMethod::Git: { - RestoreSink sink; - sink.dstPath = tempPath; - auto accessor = getFSAccessor(); - git::restore(sink, bothSource, [&](Hash childHash) { - return std::pair { - &*accessor, - CanonPath { - printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo { - .method = FileIngestionMethod::Git, - .hash = childHash, - })) - }, - }; - }); - break; - } - } + restorePath(tempPath, bothSource, dumpMethod); dumpBuffer.reset(); dump = {}; } - auto [hash, size] = hashSink->finish(); + auto [dumpHash, size] = hashSink->finish(); + + PosixSourceAccessor accessor; auto desc = ContentAddressWithReferences::fromParts( - method, - hash, + hashMethod, + methodsMatch + ? dumpHash + : hashPath( + accessor, CanonPath { tempPath }, + hashMethod.getFileIngestionMethod(), hashAlgo), { .others = references, // caller is not capable of creating a self-reference, because this is content-addressed without modulus @@ -1269,32 +1259,19 @@ StorePath LocalStore::addToStoreFromDump( autoGC(); - if (inMemory) { + if (inMemoryAndDontNeedRestore) { StringSource dumpSource { dump }; /* Restore from the buffer in memory. */ - auto fim = method.getFileIngestionMethod(); + auto fim = hashMethod.getFileIngestionMethod(); switch (fim) { case FileIngestionMethod::Flat: case FileIngestionMethod::Recursive: restorePath(realPath, dumpSource, (FileSerialisationMethod) fim); break; - case FileIngestionMethod::Git: { - RestoreSink sink; - sink.dstPath = realPath; - auto accessor = getFSAccessor(); - git::restore(sink, dumpSource, [&](Hash childHash) { - return std::pair { - &*accessor, - CanonPath { - printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo { - .method = FileIngestionMethod::Git, - .hash = childHash, - })) - }, - }; - }); - break; - } + case FileIngestionMethod::Git: + // doesn't correspond to serialization method, so + // this should be unreachable + assert(false); } } else { /* Move the temporary path we restored above. */ @@ -1303,8 +1280,8 @@ StorePath LocalStore::addToStoreFromDump( /* For computing the nar hash. In recursive SHA-256 mode, this is the same as the store hash, so no need to do it again. */ - auto narHash = std::pair { hash, size }; - if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) { + auto narHash = std::pair { dumpHash, size }; + if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) { HashSink narSink { HashAlgorithm::SHA256 }; dumpPath(realPath, narSink); narHash = narSink.finish(); diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index ba56d3ead..7eff1d690 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -180,7 +180,8 @@ public: StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) override; diff --git a/src/libstore/remote-store.cc b/src/libstore/remote-store.cc index 0cae84828..8dfe8adda 100644 --- a/src/libstore/remote-store.cc +++ b/src/libstore/remote-store.cc @@ -509,12 +509,28 @@ ref RemoteStore::addCAToStore( StorePath RemoteStore::addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) { - return addCAToStore(dump, name, method, hashAlgo, references, repair)->path; + FileSerialisationMethod fsm; + switch (hashMethod.getFileIngestionMethod()) { + case FileIngestionMethod::Flat: + fsm = FileSerialisationMethod::Flat; + break; + case FileIngestionMethod::Recursive: + fsm = FileSerialisationMethod::Recursive; + break; + case FileIngestionMethod::Git: + // Use NAR; Git is not a serialization method + fsm = FileSerialisationMethod::Recursive; + break; + } + if (fsm != dumpMethod) + unsupported("RemoteStore::addToStoreFromDump doesn't support this `dumpMethod` `hashMethod` combination"); + return addCAToStore(dump, name, hashMethod, hashAlgo, references, repair)->path; } diff --git a/src/libstore/remote-store.hh b/src/libstore/remote-store.hh index c51a21375..d630adc08 100644 --- a/src/libstore/remote-store.hh +++ b/src/libstore/remote-store.hh @@ -87,7 +87,8 @@ public: StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) override; diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index c44612ec5..4356296d4 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -197,40 +197,23 @@ StorePath Store::addToStore( PathFilter & filter, RepairFlag repair) { + FileSerialisationMethod fsm; + switch (method.getFileIngestionMethod()) { + case FileIngestionMethod::Flat: + fsm = FileSerialisationMethod::Flat; + break; + case FileIngestionMethod::Recursive: + fsm = FileSerialisationMethod::Recursive; + break; + case FileIngestionMethod::Git: + // Use NAR; Git is not a serialization method + fsm = FileSerialisationMethod::Recursive; + break; + } auto source = sinkToSource([&](Sink & sink) { - auto fim = method.getFileIngestionMethod(); - switch (fim) { - case FileIngestionMethod::Flat: - case FileIngestionMethod::Recursive: - { - dumpPath(accessor, path, sink, (FileSerialisationMethod) fim, filter); - break; - } - case FileIngestionMethod::Git: - { - git::dump( - accessor, path, - sink, - // recursively add to store if path is a directory - [&](const CanonPath & path) -> git::TreeEntry { - auto storePath = addToStore("git", accessor, path, method, hashAlgo, references, filter, repair); - auto info = queryPathInfo(storePath); - assert(info->ca); - assert(info->ca->method == FileIngestionMethod::Git); - auto stat = getFSAccessor()->lstat(CanonPath(printStorePath(storePath))); - auto gitModeOpt = git::convertMode(stat.type); - assert(gitModeOpt); - return { - .mode = *gitModeOpt, - .hash = info->ca->hash, - }; - }, - filter); - break; - } - } + dumpPath(accessor, path, sink, fsm, filter); }); - return addToStoreFromDump(*source, name, method, hashAlgo, references, repair); + return addToStoreFromDump(*source, name, fsm, method, hashAlgo, references, repair); } void Store::addMultipleToStore( diff --git a/src/libstore/store-api.hh b/src/libstore/store-api.hh index 5163070b2..5f683a211 100644 --- a/src/libstore/store-api.hh +++ b/src/libstore/store-api.hh @@ -466,14 +466,23 @@ public: * in `dump`, which is either a NAR serialisation (if recursive == * true) or simply the contents of a regular file (if recursive == * false). - * `dump` may be drained * - * \todo remove? + * `dump` may be drained. + * + * @param dumpMethod What serialisation format is `dump`, i.e. how + * to deserialize it. Must either match hashMethod or be + * `FileSerialisationMethod::Recursive`. + * + * @param hashMethod How content addressing? Need not match be the + * same as `dumpMethod`. + * + * @todo remove? */ virtual StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) = 0; @@ -772,7 +781,7 @@ protected: * Helper for methods that are not unsupported: this is used for * default definitions for virtual methods that are meant to be overriden. * - * \todo Using this should be a last resort. It is better to make + * @todo Using this should be a last resort. It is better to make * the method "virtual pure" and/or move it to a subclass. */ [[noreturn]] void unsupported(const std::string & op) diff --git a/src/nix-env/user-env.cc b/src/nix-env/user-env.cc index 2f9c988d5..8bebe2b9e 100644 --- a/src/nix-env/user-env.cc +++ b/src/nix-env/user-env.cc @@ -113,7 +113,7 @@ bool createUserEnv(EvalState & state, PackageInfos & elems, std::string str2 = str.str(); StringSource source { str2 }; state.store->addToStoreFromDump( - source, "env-manifest.nix", TextIngestionMethod {}, HashAlgorithm::SHA256, references); + source, "env-manifest.nix", FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references); }); /* Get the environment builder expression. */ diff --git a/src/nix/develop.cc b/src/nix/develop.cc index 403178a5d..c1842f2d5 100644 --- a/src/nix/develop.cc +++ b/src/nix/develop.cc @@ -226,7 +226,7 @@ static StorePath getDerivationEnvironment(ref store, ref evalStore auto getEnvShPath = ({ StringSource source { getEnvSh }; evalStore->addToStoreFromDump( - source, "get-env.sh", TextIngestionMethod {}, HashAlgorithm::SHA256, {}); + source, "get-env.sh", FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, {}); }); drv.args = {store->printStorePath(getEnvShPath)};