mirror of
https://github.com/NixOS/nix
synced 2024-10-18 00:16:11 -04:00
Merge ba417a2172
into d5c45952ac
This commit is contained in:
commit
23405e7178
398
src/libfetchers/git-lfs-fetch.hh
Normal file
398
src/libfetchers/git-lfs-fetch.hh
Normal file
|
@ -0,0 +1,398 @@
|
|||
#include <array>
|
||||
#include <cstdlib>
|
||||
#include <curl/curl.h>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <git2.h>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace lfs {
|
||||
|
||||
// git-lfs metadata about a file
|
||||
struct Md {
|
||||
std::string path; // fs path relative to repo root, no ./ prefix
|
||||
std::string oid; // git-lfs managed object id. you give this to the lfs server
|
||||
// for downloads
|
||||
size_t size; // in bytes
|
||||
};
|
||||
|
||||
std::string exec_command(const std::string &cmd) {
|
||||
std::cout << cmd << std::endl;
|
||||
std::string data;
|
||||
std::array<char, 256> buffer;
|
||||
std::string result;
|
||||
std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd.c_str(), "r"),
|
||||
pclose);
|
||||
if (!pipe) {
|
||||
throw std::runtime_error("popen() failed!");
|
||||
}
|
||||
while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
|
||||
result += buffer.data();
|
||||
}
|
||||
if (result.back() == '\n') {
|
||||
result.pop_back();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string get_lfs_api_token(const std::string &host,
|
||||
const std::string &path) {
|
||||
std::string command =
|
||||
"ssh git@" + host + " git-lfs-authenticate " + path + " download";
|
||||
std::string output = exec_command(command);
|
||||
std::string res;
|
||||
|
||||
if (!output.empty()) {
|
||||
nlohmann::json query_resp = nlohmann::json::parse(output);
|
||||
res = query_resp["header"]["Authorization"].get<std::string>();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
std::tuple<std::string, std::string, std::string>
|
||||
get_lfs_endpoint_url(git_repository *repo) {
|
||||
int err;
|
||||
git_config *config = nullptr;
|
||||
// using git_repository_config instead of _snapshot makes the
|
||||
// git_config_get_string call fail why????
|
||||
if (git_repository_config_snapshot(&config, repo) < 0) {
|
||||
// Handle error if necessary
|
||||
git_config_free(config);
|
||||
std::cerr << "no config" << std::endl;
|
||||
return {"", "", ""};
|
||||
}
|
||||
|
||||
const char *url_c_str;
|
||||
err = git_config_get_string(&url_c_str, config, "remote.origin.url");
|
||||
if (err < 0) {
|
||||
// Handle error if necessary
|
||||
std::cerr << "no remote.origin.url: " << err << std::endl;
|
||||
git_config_free(config);
|
||||
return {"", "", ""};
|
||||
}
|
||||
std::string url = std::string(url_c_str);
|
||||
std::cerr << "url_c_str: " << url_c_str << std::endl;
|
||||
|
||||
if (url.back() == '/') {
|
||||
url.pop_back();
|
||||
}
|
||||
|
||||
// idk what this was for man
|
||||
// if (url.compare(url.length() - 4, 4, ".git") != 0) {
|
||||
// url += "/info/lfs";
|
||||
//} else {
|
||||
// url += ".git/info/lfs";
|
||||
//}
|
||||
|
||||
// Parse the URL
|
||||
std::string scheme, host, path;
|
||||
if (url.find("https://") != 0) {
|
||||
size_t at_pos = url.find('@');
|
||||
if (at_pos != std::string::npos) {
|
||||
host = url.substr(at_pos + 1);
|
||||
size_t colon_pos = host.find(':');
|
||||
path = host.substr(colon_pos + 1);
|
||||
host = host.substr(0, colon_pos);
|
||||
scheme = "https";
|
||||
url = scheme + "://" + host + "/" + path;
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_tuple(url, host, path);
|
||||
}
|
||||
|
||||
std::string git_attr_value_to_string(git_attr_value_t value) {
|
||||
switch (value) {
|
||||
case GIT_ATTR_VALUE_UNSPECIFIED:
|
||||
return "GIT_ATTR_VALUE_UNSPECIFIED";
|
||||
case GIT_ATTR_VALUE_TRUE:
|
||||
return "GIT_ATTR_VALUE_TRUE";
|
||||
case GIT_ATTR_VALUE_FALSE:
|
||||
return "GIT_ATTR_VALUE_FALSE";
|
||||
case GIT_ATTR_VALUE_STRING:
|
||||
return "GIT_ATTR_VALUE_STRING";
|
||||
default:
|
||||
return "Unknown value";
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<std::vector<std::string>> find_lfs_files(git_repository *repo) {
|
||||
git_index *index;
|
||||
int error;
|
||||
error = git_repository_index(&index, repo);
|
||||
if (error < 0) {
|
||||
const git_error *e = git_error_last();
|
||||
std::cerr << "Error reading index: " << e->message << std::endl;
|
||||
git_repository_free(repo);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<std::vector<std::string>> out =
|
||||
std::make_unique<std::vector<std::string>>();
|
||||
size_t entry_count = git_index_entrycount(index);
|
||||
for (size_t i = 0; i < entry_count; ++i) {
|
||||
const git_index_entry *entry = git_index_get_byindex(index, i);
|
||||
if (entry) {
|
||||
const char *value;
|
||||
if (git_attr_get(&value, repo, GIT_ATTR_CHECK_INDEX_ONLY, entry->path,
|
||||
"filter") == 0) {
|
||||
auto value_type = git_attr_value(value);
|
||||
if (value_type == GIT_ATTR_VALUE_STRING && strcmp(value, "lfs") == 0) {
|
||||
out->push_back(entry->path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
std::string get_obj_content(git_repository *repo, std::string path) {
|
||||
// Get the HEAD commit
|
||||
git_object *obj = nullptr;
|
||||
if (git_revparse_single(&obj, repo, "HEAD^{commit}") != 0) {
|
||||
std::cerr << "Failed to find HEAD" << std::endl;
|
||||
return "";
|
||||
}
|
||||
|
||||
git_commit *commit = nullptr;
|
||||
git_commit_lookup(&commit, repo, git_object_id(obj));
|
||||
git_object_free(obj);
|
||||
|
||||
git_tree *tree = nullptr;
|
||||
if (git_commit_tree(&tree, commit) != 0) {
|
||||
std::cerr << "Failed to get tree from commit" << std::endl;
|
||||
git_commit_free(commit);
|
||||
return "";
|
||||
}
|
||||
|
||||
git_tree_entry *entry;
|
||||
if (git_tree_entry_bypath(&entry, tree, path.c_str()) != 0) {
|
||||
std::cerr << "Failed to find " << path << " in the tree" << std::endl;
|
||||
git_tree_free(tree);
|
||||
git_commit_free(commit);
|
||||
return "";
|
||||
}
|
||||
|
||||
git_blob *blob = nullptr;
|
||||
if (git_blob_lookup(&blob, repo, git_tree_entry_id(entry)) != 0) {
|
||||
std::cerr << "Failed to lookup blob" << std::endl;
|
||||
git_tree_entry_free(
|
||||
const_cast<git_tree_entry *>(entry)); // Free entry after use
|
||||
git_tree_free(tree);
|
||||
git_commit_free(commit);
|
||||
return "";
|
||||
}
|
||||
|
||||
auto content = git_blob_rawcontent(blob);
|
||||
git_object_free(obj);
|
||||
git_tree_free(tree);
|
||||
git_commit_free(commit);
|
||||
|
||||
// is this copy elided? i wont pretend to understand
|
||||
// https://en.cppreference.com/w/cpp/language/copy_elision
|
||||
return std::string(static_cast<const char *>(content));
|
||||
}
|
||||
|
||||
std::tuple<std::string, size_t> parse_lfs_metadata(const std::string &content) {
|
||||
std::istringstream iss(content);
|
||||
std::string line;
|
||||
std::string oid;
|
||||
size_t size = 0;
|
||||
|
||||
while (getline(iss, line)) {
|
||||
std::size_t pos = line.find("oid sha256:");
|
||||
if (pos != std::string::npos) {
|
||||
oid = line.substr(pos + 11); // Extract hash after "oid sha256:"
|
||||
continue;
|
||||
}
|
||||
|
||||
pos = line.find("size ");
|
||||
if (pos != std::string::npos) {
|
||||
std::string sizeStr =
|
||||
line.substr(pos + 5); // Extract size number after "size "
|
||||
size = std::stol(sizeStr);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_tuple(oid, size);
|
||||
}
|
||||
|
||||
// path, oid, size
|
||||
std::vector<Md> parse_lfs_files(git_repository *repo) {
|
||||
const auto files = find_lfs_files(repo);
|
||||
std::vector<Md> out;
|
||||
for (const auto &file : *files) {
|
||||
std::cerr << file;
|
||||
auto content = get_obj_content(repo, file);
|
||||
auto [oid, size] = parse_lfs_metadata(content);
|
||||
out.push_back(Md{file, oid, size});
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
static size_t WriteCallback(void *contents, size_t size, size_t nmemb,
|
||||
std::string *s) {
|
||||
size_t newLength = size * nmemb;
|
||||
try {
|
||||
s->append((char *)contents, newLength);
|
||||
} catch (std::bad_alloc &e) {
|
||||
// Handle memory bad_alloc error
|
||||
return 0;
|
||||
}
|
||||
return newLength;
|
||||
}
|
||||
|
||||
nlohmann::json oids_to_payload(const std::vector<Md> &items) {
|
||||
nlohmann::json j_array = nlohmann::json::array();
|
||||
for (const auto &md : items) {
|
||||
j_array.push_back({{"oid", md.oid}, {"size", md.size}});
|
||||
}
|
||||
return j_array;
|
||||
}
|
||||
|
||||
std::vector<nlohmann::json> fetch_urls(const std::string &lfs_url,
|
||||
const std::string &token,
|
||||
const std::vector<Md> &metadatas) {
|
||||
std::vector<nlohmann::json> objects;
|
||||
|
||||
nlohmann::json oid_list = oids_to_payload(metadatas);
|
||||
nlohmann::json data = {
|
||||
{"operation", "download"},
|
||||
};
|
||||
data["objects"] = oid_list;
|
||||
auto data_str = data.dump();
|
||||
std::cerr << "data_str: " + data_str << std::endl;
|
||||
|
||||
CURL *curl = curl_easy_init();
|
||||
std::string response_string;
|
||||
std::string header_string;
|
||||
auto lfs_url_batch = lfs_url + "/info/lfs/objects/batch";
|
||||
curl_easy_setopt(curl, CURLOPT_URL, lfs_url_batch.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data_str.c_str());
|
||||
|
||||
// Set headers
|
||||
struct curl_slist *headers = NULL;
|
||||
auto auth_header = "Authorization: " + token;
|
||||
headers = curl_slist_append(headers, auth_header.c_str());
|
||||
headers =
|
||||
curl_slist_append(headers, "Content-Type: application/vnd.git-lfs+json");
|
||||
headers = curl_slist_append(headers, "Accept: application/vnd.git-lfs+json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
curl_easy_setopt(curl, CURLOPT_POST, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
|
||||
|
||||
CURLcode res = curl_easy_perform(curl);
|
||||
// Check for errors
|
||||
if (res != CURLE_OK)
|
||||
fprintf(stderr, "curl_easy_perform() failed: %s\n",
|
||||
curl_easy_strerror(res));
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
try {
|
||||
std::cerr << "resp: " << response_string << std::endl;
|
||||
// example resp here:
|
||||
// {"objects":[{"oid":"f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","size":10000000,"actions":{"download":{"href":"https://gitlab.com/b-camacho/test-lfs.git/gitlab-lfs/objects/f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","header":{"Authorization":"Basic
|
||||
// Yi1jYW1hY2hvOmV5SjBlWEFpT2lKS1YxUWlMQ0poYkdjaU9pSklVekkxTmlKOS5leUprWVhSaElqcDdJbUZqZEc5eUlqb2lZaTFqWVcxaFkyaHZJbjBzSW1wMGFTSTZJbUptTURZNFpXVTFMVEprWmpVdE5HWm1ZUzFpWWpRMExUSXpNVEV3WVRReU1qWmtaaUlzSW1saGRDSTZNVGN4TkRZeE16ZzBOU3dpYm1KbUlqb3hOekUwTmpFek9EUXdMQ0psZUhBaU9qRTNNVFEyTWpFd05EVjkuZk9yMDNkYjBWSTFXQzFZaTBKRmJUNnJTTHJPZlBwVW9lYllkT0NQZlJ4QQ=="}}},"authenticated":true}]}
|
||||
auto resp = nlohmann::json::parse(response_string);
|
||||
if (resp.contains("objects")) {
|
||||
objects.insert(objects.end(), resp["objects"].begin(),
|
||||
resp["objects"].end());
|
||||
} else {
|
||||
throw std::runtime_error("Response does not contain 'objects'");
|
||||
}
|
||||
} catch (std::exception &e) {
|
||||
std::cerr << "Failed to parse JSON or invalid response: " << e.what()
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
return objects;
|
||||
}
|
||||
|
||||
static size_t WriteData(void *ptr, size_t size, size_t nmemb, void *stream) {
|
||||
size_t written = fwrite(ptr, size, nmemb, (FILE *)stream);
|
||||
return written;
|
||||
}
|
||||
|
||||
void download_file(const std::string &url, const std::string &auth_header,
|
||||
const std::string &output_filename) {
|
||||
CURL *curl;
|
||||
FILE *fp;
|
||||
CURLcode res;
|
||||
|
||||
curl = curl_easy_init();
|
||||
if (curl) {
|
||||
fp = fopen(output_filename.c_str(), "wb");
|
||||
if (fp == nullptr) {
|
||||
std::cerr << "Failed to open file for writing: " << output_filename
|
||||
<< std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteData);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
|
||||
struct curl_slist *headers = nullptr;
|
||||
const std::string auth_header_prepend = "Authorization: " + auth_header;
|
||||
headers = curl_slist_append(headers, auth_header_prepend.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
res = curl_easy_perform(curl);
|
||||
if (res != CURLE_OK) {
|
||||
std::cerr << "curl_easy_perform() failed: " << curl_easy_strerror(res)
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
curl_slist_free_all(headers);
|
||||
curl_easy_cleanup(curl);
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
void download_files(nlohmann::json objects, std::string dir) {
|
||||
for (auto &obj : objects) {
|
||||
std::string oid = obj["oid"];
|
||||
std::string url = obj["actions"]["download"]["href"];
|
||||
std::string auth_header =
|
||||
obj["actions"]["download"]["header"]["Authorization"];
|
||||
download_file(url, auth_header, dir + "/" + oid);
|
||||
}
|
||||
}
|
||||
|
||||
// moves files from temporary download dir to final location
|
||||
void move_files(const std::vector<Md> &metadata,
|
||||
const std::string &sourceDir, const std::string &repoRoot) {
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
for (const auto &md : metadata) {
|
||||
fs::path srcFile =
|
||||
fs::path(sourceDir) / md.oid; // Construct the source file path
|
||||
fs::path destFile =
|
||||
fs::path(repoRoot) / md.path; // Construct the destination file path
|
||||
|
||||
// Move the file
|
||||
try {
|
||||
fs::rename(srcFile, destFile);
|
||||
} catch (const fs::filesystem_error &e) {
|
||||
std::cerr << "Error moving file " << srcFile << " to " << destFile << ": "
|
||||
<< e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace lfs
|
|
@ -1,4 +1,5 @@
|
|||
#include "git-utils.hh"
|
||||
#include "git-lfs-fetch.hh"
|
||||
#include "cache.hh"
|
||||
#include "finally.hh"
|
||||
#include "processes.hh"
|
||||
|
@ -659,6 +660,17 @@ ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create,
|
|||
/**
|
||||
* Raw git tree input accessor.
|
||||
*/
|
||||
|
||||
static int attr_callback(const char *name, const char *value, void *payload) {
|
||||
warn("got an attribute! it's %s = %s", name, value);
|
||||
return 0;
|
||||
//// Check if the attribute is a filter attribute
|
||||
//if (strncmp(name, "filter.", 7) == 0) {
|
||||
// printf("Filter attribute: %s\n", name);
|
||||
//}
|
||||
//return 0; // Continue iterating
|
||||
}
|
||||
|
||||
struct GitSourceAccessor : SourceAccessor
|
||||
{
|
||||
ref<GitRepoImpl> repo;
|
||||
|
@ -674,6 +686,15 @@ struct GitSourceAccessor : SourceAccessor
|
|||
{
|
||||
auto blob = getBlob(path, symlink);
|
||||
|
||||
int error;
|
||||
// read filters here, perform smudge
|
||||
|
||||
// TODO: fix git_attr_foreach here, it can't seem to parse `.gitattributes` here even though it should
|
||||
warn("on path %s", path.abs().c_str());
|
||||
if ((error = git_attr_foreach(&(*(*repo).repo), GIT_ATTR_CHECK_INCLUDE_HEAD, path.rel_c_str(), attr_callback, NULL)) < 0) {
|
||||
warn("git_attr_foreach: %s", git_error_last()->message);
|
||||
}
|
||||
|
||||
auto data = std::string_view((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get()));
|
||||
|
||||
return std::string(data);
|
||||
|
@ -1251,6 +1272,20 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules
|
|||
return result;
|
||||
}
|
||||
|
||||
void GitRepoImpl::smudgeLfs() {
|
||||
const auto metadatas = lfs::parse_lfs_files(this->repo);
|
||||
const auto [url, host, path] = lfs::get_lfs_endpoint_url(this->repo);
|
||||
// TODO: handle public lfs repos without ssh?
|
||||
const auto token = lfs::get_lfs_api_token(host, path);
|
||||
auto urls = lfs::fetch_urls(url, token, metadatas);
|
||||
std::cerr << "Got urls! ";
|
||||
for (const auto &url : urls) {
|
||||
std::cerr << url << std::endl;
|
||||
}
|
||||
std::cerr << "Fetching actual data" << std::endl;
|
||||
lfs::download_files(urls, this->path);
|
||||
}
|
||||
|
||||
ref<GitRepo> getTarballCache()
|
||||
{
|
||||
static auto repoDir = std::filesystem::path(getCacheDir()) / "tarball-cache";
|
||||
|
|
|
@ -74,8 +74,14 @@ struct GitRepo
|
|||
*/
|
||||
virtual std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) = 0;
|
||||
|
||||
virtual void smudgeLfs() = 0;
|
||||
|
||||
virtual std::string resolveSubmoduleUrl(const std::string & url) = 0;
|
||||
|
||||
virtual std::string resolveSubmoduleUrl(
|
||||
const std::string & url,
|
||||
const std::string & base) = 0;
|
||||
|
||||
virtual bool hasObject(const Hash & oid) = 0;
|
||||
|
||||
virtual ref<SourceAccessor> getAccessor(const Hash & rev, bool exportIgnore) = 0;
|
||||
|
|
|
@ -184,7 +184,7 @@ struct GitInputScheme : InputScheme
|
|||
for (auto & [name, value] : url.query) {
|
||||
if (name == "rev" || name == "ref" || name == "keytype" || name == "publicKey" || name == "publicKeys")
|
||||
attrs.emplace(name, value);
|
||||
else if (name == "shallow" || name == "submodules" || name == "exportIgnore" || name == "allRefs" || name == "verifyCommit")
|
||||
else if (name == "shallow" || name == "submodules" || name == "lfs" || name == "exportIgnore" || name == "allRefs" || name == "verifyCommit")
|
||||
attrs.emplace(name, Explicit<bool> { value == "1" });
|
||||
else
|
||||
url2.query.emplace(name, value);
|
||||
|
@ -209,6 +209,7 @@ struct GitInputScheme : InputScheme
|
|||
"rev",
|
||||
"shallow",
|
||||
"submodules",
|
||||
"lfs",
|
||||
"exportIgnore",
|
||||
"lastModified",
|
||||
"revCount",
|
||||
|
@ -394,6 +395,11 @@ struct GitInputScheme : InputScheme
|
|||
return maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
|
||||
}
|
||||
|
||||
bool getLfsAttr(const Input & input) const
|
||||
{
|
||||
return maybeGetBoolAttr(input.attrs, "lfs").value_or(false);
|
||||
}
|
||||
|
||||
bool getExportIgnoreAttr(const Input & input) const
|
||||
{
|
||||
return maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false);
|
||||
|
@ -671,6 +677,12 @@ struct GitInputScheme : InputScheme
|
|||
}
|
||||
}
|
||||
|
||||
if (getLfsAttr(input)) {
|
||||
warn("lfs attr set on %s", input.to_string());
|
||||
// urlencoded `?lfs=1` param is set,
|
||||
repo->smudgeLfs();
|
||||
}
|
||||
|
||||
assert(!origRev || origRev == rev);
|
||||
if (!getShallowAttr(input))
|
||||
input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount"));
|
||||
|
|
|
@ -73,6 +73,7 @@ headers = files(
|
|||
'fetch-to-store.hh',
|
||||
'filtering-source-accessor.hh',
|
||||
'git-utils.hh',
|
||||
'git-lfs-fetch.hh',
|
||||
'mounted-source-accessor.hh',
|
||||
'fetchers.hh',
|
||||
'registry.hh',
|
||||
|
|
Loading…
Reference in a new issue