From e0012b97abb4c97ccf7fb20299d7b62dd906e89d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 29 Jul 2024 14:26:25 +0200 Subject: [PATCH] Split tarball-specific logic from GitFileSystemObjectSink --- src/libfetchers/git-utils.cc | 33 ++++++++++++++++------------- src/libfetchers/git-utils.hh | 11 ++++++++++ src/libfetchers/github.cc | 5 +++-- src/libfetchers/tarball.cc | 6 ++++-- tests/unit/libfetchers/git-utils.cc | 2 +- 5 files changed, 37 insertions(+), 20 deletions(-) diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 032d8e0bd..c2d4fe6aa 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -486,6 +486,24 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this return narHash; } + + Hash dereferenceSingletonDirectory(const Hash & oid_) override + { + auto oid = hashToOID(oid_); + + /* If the root directory contains */ + auto _tree = lookupObject(*this, oid, GIT_OBJECT_TREE); + auto tree = (const git_tree *) &*_tree; + + if (git_tree_entrycount(tree) == 1) { + auto entry = git_tree_entry_byindex(tree, 0); + auto mode = git_tree_entry_filemode(entry); + if (mode == GIT_FILEMODE_BLOB || mode == GIT_FILEMODE_TREE) + oid = *git_tree_entry_id(entry); + } + + return toHash(oid); + } }; ref GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare) @@ -991,21 +1009,6 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink auto [oid, _name] = popBuilder(); - /* If the root directory contains a single entry that is a - directory or a non-executable regular file, return that as - the top-level object. We don't do this for executables - because they don't have a tree hash in the Git object - model. */ - auto _tree = lookupObject(*repo, oid, GIT_OBJECT_TREE); - auto tree = (const git_tree *) &*_tree; - - if (git_tree_entrycount(tree) == 1) { - auto entry = git_tree_entry_byindex(tree, 0); - auto mode = git_tree_entry_filemode(entry); - if (mode == GIT_FILEMODE_BLOB || mode == GIT_FILEMODE_TREE) - oid = *git_tree_entry_id(entry); - } - return toHash(oid); } }; diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index 495916f62..644f22a07 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -98,6 +98,17 @@ struct GitRepo * serialisation. This is memoised on-disk. */ virtual Hash treeHashToNarHash(const Hash & treeHash) = 0; + + /** + * If the specified Git object is a directory with a single entry + * that is a directory or a non-executable regular file, return + * the ID of that object. + * + * Note: We don't do this for executable files because they don't + * have a tree hash in the Git object model that distinguishes + * them from non-executable files. + */ + virtual Hash dereferenceSingletonDirectory(const Hash & oid) = 0; }; ref getTarballCache(); diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 2968d2df2..a2ac9247a 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -255,11 +255,12 @@ struct GitArchiveInputScheme : InputScheme }); TarArchive archive { *source }; - auto parseSink = getTarballCache()->getFileSystemObjectSink(); + auto tarballCache = getTarballCache(); + auto parseSink = tarballCache->getFileSystemObjectSink(); auto lastModified = unpackTarfileToSink(archive, *parseSink); TarballInfo tarballInfo { - .treeHash = parseSink->sync(), + .treeHash = tarballCache->dereferenceSingletonDirectory(parseSink->sync()), .lastModified = lastModified }; diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 55db3eafb..b09f628a4 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -164,7 +164,8 @@ DownloadTarballResult downloadTarball( TarArchive{path}; }) : TarArchive{*source}; - auto parseSink = getTarballCache()->getFileSystemObjectSink(); + auto tarballCache = getTarballCache(); + auto parseSink = tarballCache->getFileSystemObjectSink(); auto lastModified = unpackTarfileToSink(archive, *parseSink); auto res(_res->lock()); @@ -177,7 +178,8 @@ DownloadTarballResult downloadTarball( infoAttrs = cached->value; } else { infoAttrs.insert_or_assign("etag", res->etag); - infoAttrs.insert_or_assign("treeHash", parseSink->sync().gitRev()); + infoAttrs.insert_or_assign("treeHash", + tarballCache->dereferenceSingletonDirectory(parseSink->sync()).gitRev()); infoAttrs.insert_or_assign("lastModified", uint64_t(lastModified)); if (res->immutableUrl) infoAttrs.insert_or_assign("immutableUrl", *res->immutableUrl); diff --git a/tests/unit/libfetchers/git-utils.cc b/tests/unit/libfetchers/git-utils.cc index f0d38d50c..de5110cc3 100644 --- a/tests/unit/libfetchers/git-utils.cc +++ b/tests/unit/libfetchers/git-utils.cc @@ -77,7 +77,7 @@ TEST_F(GitUtilsTest, sink_basic) // sink->createHardlink("foo-1.1/links/foo-2", CanonPath("foo-1.1/hello")); - auto result = sink->sync(); + auto result = repo->dereferenceSingletonDirectory(sink->sync()); auto accessor = repo->getAccessor(result, false); auto entries = accessor->readDirectory(CanonPath::root); ASSERT_EQ(entries.size(), 5);