diff --git a/perl/lib/Nix/Store.xs b/perl/lib/Nix/Store.xs index ee211ef64..e751c2be1 100644 --- a/perl/lib/Nix/Store.xs +++ b/perl/lib/Nix/Store.xs @@ -258,7 +258,7 @@ hashPath(char * algo, int base32, char * path) try { Hash h = hashPath( PosixSourceAccessor::createAtRoot(path), - FileIngestionMethod::Recursive, parseHashAlgo(algo)); + FileIngestionMethod::Recursive, parseHashAlgo(algo)).first; auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false); XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0))); } catch (Error & e) { diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 5153ca64f..67d00f364 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -453,7 +453,7 @@ StorePath BinaryCacheStore::addToStore( non-recursive+sha256 so we can just use the default implementation of this method in terms of addToStoreFromDump. */ - auto h = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter); + auto h = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter).first; auto source = sinkToSource([&](Sink & sink) { path.dumpPath(sink, filter); diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 108933422..dc53a07f1 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -1262,6 +1262,16 @@ public: store paths of the latest Nix release. )" }; + + Setting largePathWarningThreshold{ + this, + std::numeric_limits::max(), + "large-path-warning-threshold", + R"( + Warn when copying a path larger than this number of bytes to the Nix store + (as determined by its NAR serialisation). + )" + }; }; diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 800e69309..33c4d7372 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1272,7 +1272,7 @@ StorePath LocalStore::addToStoreFromDump( ? dumpHash : hashPath( PosixSourceAccessor::createAtRoot(tempPath), - hashMethod.getFileIngestionMethod(), hashAlgo), + hashMethod.getFileIngestionMethod(), hashAlgo).first, { .others = references, // caller is not capable of creating a self-reference, because this is content-addressed without modulus @@ -1412,7 +1412,7 @@ bool LocalStore::verifyStore(bool checkContents, RepairFlag repair) PosixSourceAccessor accessor; std::string hash = hashPath( PosixSourceAccessor::createAtRoot(link.path()), - FileIngestionMethod::Recursive, HashAlgorithm::SHA256).to_string(HashFormat::Nix32, false); + FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first.to_string(HashFormat::Nix32, false); if (hash != name.string()) { printError("link '%s' was modified! expected hash '%s', got '%s'", link.path(), name, hash); diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 419c55e92..a2095e02e 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -169,7 +169,9 @@ std::pair StoreDirConfig::computeStorePath( const StorePathSet & references, PathFilter & filter) const { - auto h = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter); + auto [h, size] = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter); + if (size && *size >= settings.largePathWarningThreshold) + warn("hashed large path '%s' (%d bytes)", path, *size); return { makeFixedOutputPathFromCA( name, @@ -210,7 +212,11 @@ StorePath Store::addToStore( auto source = sinkToSource([&](Sink & sink) { dumpPath(path, sink, fsm, filter); }); - return addToStoreFromDump(*source, name, fsm, method, hashAlgo, references, repair); + LengthSource lengthSource(*source); + auto storePath = addToStoreFromDump(lengthSource, name, fsm, method, hashAlgo, references, repair); + if (lengthSource.total >= settings.largePathWarningThreshold) + warn("copied large path '%s' to the store (%d bytes)", path, lengthSource.total); + return storePath; } void Store::addMultipleToStore( diff --git a/src/libstore/unix/build/worker.cc b/src/libstore/unix/build/worker.cc index 03fc280a4..2cca06213 100644 --- a/src/libstore/unix/build/worker.cc +++ b/src/libstore/unix/build/worker.cc @@ -529,9 +529,9 @@ bool Worker::pathContentsGood(const StorePath & path) if (!pathExists(store.printStorePath(path))) res = false; else { - Hash current = hashPath( + auto current = hashPath( {store.getFSAccessor(), CanonPath(store.printStorePath(path))}, - FileIngestionMethod::Recursive, info->narHash.algo); + FileIngestionMethod::Recursive, info->narHash.algo).first; Hash nullHash(HashAlgorithm::SHA256); res = info->narHash == nullHash || info->narHash == current; } diff --git a/src/libutil/file-content-address.cc b/src/libutil/file-content-address.cc index 769042d00..8b1e3117a 100644 --- a/src/libutil/file-content-address.cc +++ b/src/libutil/file-content-address.cc @@ -112,17 +112,19 @@ HashResult hashPath( } -Hash hashPath( +std::pair> hashPath( const SourcePath & path, FileIngestionMethod method, HashAlgorithm ht, PathFilter & filter) { switch (method) { case FileIngestionMethod::Flat: - case FileIngestionMethod::Recursive: - return hashPath(path, (FileSerialisationMethod) method, ht, filter).first; + case FileIngestionMethod::Recursive: { + auto res = hashPath(path, (FileSerialisationMethod) method, ht, filter); + return {res.first, {res.second}}; + } case FileIngestionMethod::Git: - return git::dumpHash(ht, path, filter).hash; + return {git::dumpHash(ht, path, filter).hash, std::nullopt}; } assert(false); } diff --git a/src/libutil/file-content-address.hh b/src/libutil/file-content-address.hh index 145a8fb1f..cd63be551 100644 --- a/src/libutil/file-content-address.hh +++ b/src/libutil/file-content-address.hh @@ -132,14 +132,15 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method); /** * Compute the hash of the given file system object according to the - * given method. + * given method, and for some ingestion methods, the size of the + * serialisation. * * Unlike the other `hashPath`, this works on an arbitrary * `FileIngestionMethod` instead of `FileSerialisationMethod`, but - * doesn't return the size as this is this is not a both simple and + * may not return the size as this is this is not a both simple and * useful defined for a merkle format. */ -Hash hashPath( +std::pair> hashPath( const SourcePath & path, FileIngestionMethod method, HashAlgorithm ha, PathFilter & filter = defaultPathFilter); diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 6249ddaf5..18f4a79c3 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -283,6 +283,26 @@ struct LengthSink : Sink } }; +/** + * A wrapper source that counts the number of bytes read from it. + */ +struct LengthSource : Source +{ + Source & next; + + LengthSource(Source & next) : next(next) + { } + + uint64_t total = 0; + + size_t read(char * data, size_t len) override + { + auto n = next.read(data, len); + total += n; + return n; + } +}; + /** * Convert a function into a sink. */