diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 97554e9cf..1f79a8d2d 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -16,7 +16,7 @@ #include "machines.hh" #include "daemon.hh" #include "worker-protocol.hh" -#include "cgroup.hh" +#include "user-lock.hh" #include #include @@ -504,154 +504,6 @@ void handleDiffHook( } } -////////////////////////////////////////////////////////////////////// - - -class UserLock -{ -private: - Path fnUserLock; - AutoCloseFD fdUserLock; - - bool isEnabled = false; - uid_t uid = 0; - gid_t gid = 0; - std::vector supplementaryGIDs; - -public: - UserLock(); - - void kill(); - - uid_t getUID() { assert(uid); return uid; } - gid_t getGID() { assert(gid); return gid; } - uint32_t getIDCount() { return settings.idsPerBuild; } - std::vector getSupplementaryGIDs() { return supplementaryGIDs; } - - bool findFreeUser(); - - bool enabled() { return isEnabled; } - -}; - - -UserLock::UserLock() -{ -#if 0 - assert(settings.buildUsersGroup != ""); - createDirs(settings.nixStateDir + "/userpool"); -#endif -} - -bool UserLock::findFreeUser() { - if (enabled()) return true; - -#if 0 - /* Get the members of the build-users-group. */ - struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str()); - if (!gr) - throw Error("the group '%1%' specified in 'build-users-group' does not exist", - settings.buildUsersGroup); - gid = gr->gr_gid; - - /* Copy the result of getgrnam. */ - Strings users; - for (char * * p = gr->gr_mem; *p; ++p) { - debug("found build user '%1%'", *p); - users.push_back(*p); - } - - if (users.empty()) - throw Error("the build users group '%1%' has no members", - settings.buildUsersGroup); - - /* Find a user account that isn't currently in use for another - build. */ - for (auto & i : users) { - debug("trying user '%1%'", i); - - struct passwd * pw = getpwnam(i.c_str()); - if (!pw) - throw Error("the user '%1%' in the group '%2%' does not exist", - i, settings.buildUsersGroup); - - - fnUserLock = (format("%1%/userpool/%2%") % settings.nixStateDir % pw->pw_uid).str(); - - AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); - if (!fd) - throw SysError("opening user lock '%1%'", fnUserLock); - - if (lockFile(fd.get(), ltWrite, false)) { - fdUserLock = std::move(fd); - user = i; - uid = pw->pw_uid; - - /* Sanity check... */ - if (uid == getuid() || uid == geteuid()) - throw Error("the Nix user should not be a member of '%1%'", - settings.buildUsersGroup); - -#if __linux__ - /* Get the list of supplementary groups of this build user. This - is usually either empty or contains a group such as "kvm". */ - supplementaryGIDs.resize(10); - int ngroups = supplementaryGIDs.size(); - int err = getgrouplist(pw->pw_name, pw->pw_gid, - supplementaryGIDs.data(), &ngroups); - if (err == -1) - throw Error("failed to get list of supplementary groups for '%1%'", pw->pw_name); - - supplementaryGIDs.resize(ngroups); -#endif - - isEnabled = true; - return true; - } - } - - return false; -#endif - - assert(settings.startId > 0); - assert(settings.startId % settings.idsPerBuild == 0); - assert(settings.uidCount % settings.idsPerBuild == 0); - assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits::max()); - - // FIXME: check whether the id range overlaps any known users - - size_t nrSlots = settings.uidCount / settings.idsPerBuild; - - for (size_t i = 0; i < nrSlots; i++) { - debug("trying user slot '%d'", i); - - createDirs(settings.nixStateDir + "/userpool"); - - fnUserLock = fmt("%s/userpool/slot-%d", settings.nixStateDir, i); - - AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); - if (!fd) - throw SysError("opening user lock '%1%'", fnUserLock); - - if (lockFile(fd.get(), ltWrite, false)) { - fdUserLock = std::move(fd); - uid = settings.startId + i * settings.idsPerBuild; - gid = settings.startId + i * settings.idsPerBuild; - return true; - } - } - - return false; -} - -void UserLock::kill() -{ - // FIXME: use a cgroup to kill all processes in the build? -#if 0 - killUser(uid); -#endif -} - ////////////////////////////////////////////////////////////////////// @@ -840,6 +692,13 @@ private: Path chrootRootDir; + /* Whether to give the build more than 1 UID. */ + bool useUidRange = false; + + /* Whether to make the 'systemd' cgroup controller available to + the build. */ + bool useSystemdCgroup = false; + /* RAII object to delete the chroot directory. */ std::shared_ptr autoDelChroot; @@ -896,8 +755,8 @@ private: result. */ std::map prevInfos; - const uid_t sandboxUid = 1000; - const gid_t sandboxGid = 100; + uid_t sandboxUid = -1; + gid_t sandboxGid = -1; const static Path homeDir; @@ -1445,6 +1304,7 @@ void DerivationGoal::inputsRealised() result = BuildResult(); } + void DerivationGoal::started() { auto msg = fmt( buildMode == bmRepair ? "repairing outputs of '%s'" : @@ -1459,6 +1319,7 @@ void DerivationGoal::started() { worker.updateProgress(); } + void DerivationGoal::tryToBuild() { trace("trying to build"); @@ -1556,25 +1417,28 @@ void DerivationGoal::tryToBuild() worker.wakeUp(shared_from_this()); } + void DerivationGoal::tryLocalBuild() { /* If `build-users-group' is not empty, then we have to build as one of the members of that group. */ - if ((settings.buildUsersGroup != "" || settings.startId.get() != 0) && getuid() == 0) { + static bool useBuildUsers = (settings.buildUsersGroup != "" || settings.startId.get() != 0) && getuid() == 0; + if (useBuildUsers) { #if defined(__linux__) || defined(__APPLE__) - if (!buildUser) buildUser = std::make_unique(); + if (!buildUser) + buildUser = acquireUserLock(); - if (buildUser->findFreeUser()) { - /* Make sure that no other processes are executing under this - uid. */ - buildUser->kill(); - } else { + if (!buildUser) { if (!actLock) actLock = std::make_unique(*logger, lvlWarn, actBuildWaiting, fmt("waiting for UID to build '%s'", yellowtxt(worker.store.printStorePath(drvPath)))); worker.waitForAWhile(shared_from_this()); return; } + + /* Make sure that no other processes are executing under this + uid. */ + buildUser->kill(); #else /* Don't know how to block the creation of setuid/setgid binaries on this platform. */ @@ -2087,6 +1951,9 @@ void DerivationGoal::startBuilder() } } + useUidRange = parsedDrv->getRequiredSystemFeatures().count("uid-range"); + useSystemdCgroup = parsedDrv->getRequiredSystemFeatures().count("systemd-cgroup"); + if (useChroot) { /* Allow a user-configurable set of directories from the @@ -2166,7 +2033,7 @@ void DerivationGoal::startBuilder() printMsg(lvlChatty, format("setting up chroot environment in '%1%'") % chrootRootDir); - if (mkdir(chrootRootDir.c_str(), 0755) == -1) + if (mkdir(chrootRootDir.c_str(), useUidRange ? 0755 : 0750) == -1) throw SysError("cannot create '%1%'", chrootRootDir); // FIXME: only make root writable for user namespace builds. @@ -2186,6 +2053,12 @@ void DerivationGoal::startBuilder() createDirs(chrootRootDir + "/etc"); chownToBuilder(chrootRootDir + "/etc"); + if (useUidRange && (!buildUser || buildUser->getUIDCount() < 65536)) + throw Error("feature 'uid-range' requires '%s' to be enabled", settings.autoAllocateUids.name); + + sandboxUid = useUidRange ? 0 : 1000; + sandboxGid = useUidRange ? 0 : 100; + writeFile(chrootRootDir + "/etc/passwd", fmt( "root:x:0:0:Nix build user:%3%:/noshell\n" "nixbld:x:%1%:%2%:Nix build user:%3%:/noshell\n" @@ -2238,12 +2111,32 @@ void DerivationGoal::startBuilder() for (auto & i : drv->outputs) dirsInChroot.erase(worker.store.printStorePath(i.second.path)); -#elif __APPLE__ - /* We don't really have any parent prep work to do (yet?) - All work happens in the child, instead. */ + if (useSystemdCgroup) { + settings.requireExperimentalFeature("systemd-cgroup"); + std::optional cgroup; + if (!buildUser || !(cgroup = buildUser->getCgroup())) + throw Error("feature 'systemd-cgroup' requires 'auto-allocate-uids = true' in nix.conf"); + chownToBuilder(*cgroup); + chownToBuilder(*cgroup + "/cgroup.procs"); + } + #else - throw Error("sandboxing builds is not supported on this platform"); + if (useUidRange) + throw Error("feature 'uid-range' is not supported on this platform"); + if (useSystemdCgroup) + throw Error("feature 'systemd-cgroup' is not supported on this platform"); + #if __APPLE__ + /* We don't really have any parent prep work to do (yet?) + All work happens in the child, instead. */ + #else + throw Error("sandboxing builds is not supported on this platform"); + #endif #endif + } else { + if (useUidRange) + throw Error("feature 'uid-range' is only supported in sandboxed builds"); + if (useSystemdCgroup) + throw Error("feature 'systemd-cgroup' is only supported in sandboxed builds"); } if (needsHashRewrite()) { @@ -2375,31 +2268,6 @@ void DerivationGoal::startBuilder() #if __linux__ if (useChroot) { - /* Create a systemd cgroup since that's the minimum required - by systemd-nspawn. */ - // FIXME: do we want to use the parent cgroup? We should - // always use the same cgroup regardless of whether we're the - // daemon or run from a user session via sudo. - auto ourCgroups = getCgroups("/proc/self/cgroup"); - auto systemdCgroup = ourCgroups["systemd"]; - if (systemdCgroup == "") - throw Error("'systemd' cgroup does not exist"); - - auto hostCgroup = canonPath("/sys/fs/cgroup/systemd/" + systemdCgroup); - - if (!pathExists(hostCgroup)) - throw Error("expected cgroup directory '%s'", hostCgroup); - - auto childCgroup = fmt("%s/nix-%d", hostCgroup, buildUser->getUID()); - - destroyCgroup(childCgroup); - - if (mkdir(childCgroup.c_str(), 0755) == -1) - throw SysError("creating cgroup '%s'", childCgroup); - - chownToBuilder(childCgroup); - chownToBuilder(childCgroup + "/cgroup.procs"); - /* Set up private namespaces for the build: - The PID namespace causes the build to start as PID 1. @@ -2508,15 +2376,16 @@ void DerivationGoal::startBuilder() the calling user (if build users are disabled). */ uid_t hostUid = buildUser ? buildUser->getUID() : getuid(); uid_t hostGid = buildUser ? buildUser->getGID() : getgid(); - uint32_t nrIds = settings.idsPerBuild; // FIXME + uint32_t nrIds = buildUser && useUidRange ? buildUser->getUIDCount() : 1; writeFile("/proc/" + std::to_string(pid) + "/uid_map", - fmt("%d %d %d", /* sandboxUid */ 0, hostUid, nrIds)); + fmt("%d %d %d", sandboxUid, hostUid, nrIds)); - //writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); + if (!useUidRange) + writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); writeFile("/proc/" + std::to_string(pid) + "/gid_map", - fmt("%d %d %d", /* sandboxGid */ 0, hostGid, nrIds)); + fmt("%d %d %d", sandboxGid, hostGid, nrIds)); /* Save the mount namespace of the child. We have to do this *before* the child does a chroot. */ @@ -2525,7 +2394,10 @@ void DerivationGoal::startBuilder() throw SysError("getting sandbox mount namespace"); /* Move the child into its own cgroup. */ - writeFile(childCgroup + "/cgroup.procs", fmt("%d", (pid_t) pid)); + if (buildUser) { + if (auto cgroup = buildUser->getCgroup()) + writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid)); + } /* Signal the builder that we've updated its user namespace. */ writeFull(userNamespaceSync.writeSide.get(), "1"); @@ -3361,7 +3233,7 @@ void DerivationGoal::runChild() /* Unshare the cgroup namespace. This means /proc/self/cgroup will show the child's cgroup as '/' rather than whatever it is in the parent. */ - if (unshare(CLONE_NEWCGROUP) == -1) + if (useSystemdCgroup && unshare(CLONE_NEWCGROUP) == -1) throw SysError("unsharing cgroup namespace"); /* Do the chroot(). */ @@ -3386,16 +3258,10 @@ void DerivationGoal::runChild() /* Switch to the sandbox uid/gid in the user namespace, which corresponds to the build user or calling user in the parent namespace. */ -#if 0 if (setgid(sandboxGid) == -1) throw SysError("setgid failed"); if (setuid(sandboxUid) == -1) throw SysError("setuid failed"); -#endif - if (setgid(0) == -1) - throw SysError("setgid failed"); - if (setuid(0) == -1) - throw SysError("setuid failed"); setUser = false; } @@ -3789,7 +3655,7 @@ void DerivationGoal::registerOutputs() something like that. */ canonicalisePathMetaData( actualPath, - buildUser ? std::optional(std::make_pair(buildUser->getUID(), buildUser->getUID() + buildUser->getIDCount() - 1)) : std::nullopt, + buildUser ? std::optional(buildUser->getUIDRange()) : std::nullopt, inodesSeen); /* FIXME: this is in-memory. */ @@ -3866,7 +3732,7 @@ void DerivationGoal::registerOutputs() all files are owned by the build user, if applicable. */ canonicalisePathMetaData(actualPath, buildUser && !rewritten - ? std::optional(std::make_pair(buildUser->getUID(), buildUser->getUID() + buildUser->getIDCount() - 1)) + ? std::optional(buildUser->getUIDRange()) : std::nullopt, inodesSeen); diff --git a/src/libstore/cgroup.cc b/src/libstore/cgroup.cc index 887facdca..9e5e937df 100644 --- a/src/libstore/cgroup.cc +++ b/src/libstore/cgroup.cc @@ -4,6 +4,7 @@ #include "util.hh" #include +#include #include @@ -19,7 +20,7 @@ std::map getCgroups(const Path & cgroupFile) if (!std::regex_match(line, match, regex)) throw Error("invalid line '%s' in '%s'", line, cgroupFile); - std::string name = hasPrefix(match[2], "name=") ? std::string(match[2], 5) : match[2]; + std::string name = hasPrefix(std::string(match[2]), "name=") ? std::string(match[2], 5) : match[2]; cgroups.insert_or_assign(name, match[3]); } @@ -28,6 +29,8 @@ std::map getCgroups(const Path & cgroupFile) void destroyCgroup(const Path & cgroup) { + if (!pathExists(cgroup)) return; + for (auto & entry : readDirectory(cgroup)) { if (entry.type != DT_DIR) continue; destroyCgroup(cgroup + "/" + entry.name); @@ -35,6 +38,8 @@ void destroyCgroup(const Path & cgroup) int round = 1; + std::unordered_set pidsShown; + while (true) { auto pids = tokenizeString>(readFile(cgroup + "/cgroup.procs")); @@ -46,13 +51,23 @@ void destroyCgroup(const Path & cgroup) for (auto & pid_s : pids) { pid_t pid; if (!string2Int(pid_s, pid)) throw Error("invalid pid '%s'", pid); + if (pidsShown.insert(pid).second) { + try { + auto cmdline = readFile(fmt("/proc/%d/cmdline", pid)); + using namespace std::string_literals; + warn("killing stray builder process %d (%s)...", + pid, trim(replaceStrings(cmdline, "\0"s, " "))); + } catch (SysError &) { + } + } // FIXME: pid wraparound if (kill(pid, SIGKILL) == -1 && errno != ESRCH) throw SysError("killing member %d of cgroup '%s'", pid, cgroup); } auto sleep = std::chrono::milliseconds((int) std::pow(2.0, std::min(round, 10))); - printError("waiting for %d ms for cgroup '%s' to become empty", sleep.count(), cgroup); + if (sleep.count() > 100) + printError("waiting for %d ms for cgroup '%s' to become empty", sleep.count(), cgroup); std::this_thread::sleep_for(sleep); round++; } diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 89db072b0..5cf73c7b4 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -149,10 +149,13 @@ public: "The Unix group that contains the build users."}; #if __linux__ + Setting autoAllocateUids{this, false, "auto-allocate-uids", + "Whether to allocate UIDs for builders automatically."}; + const uint32_t idsPerBuild = 1 << 16; Setting startId{this, 872415232, "start-id", - "The first UID and GID to use for dynamic ID allocation. (0 means disable.)"}; + "The first UID and GID to use for dynamic ID allocation."}; Setting uidCount{this, idsPerBuild * 128, "id-count", "The number of UIDs/GIDs to use for dynamic ID allocation."}; diff --git a/src/libstore/user-lock.cc b/src/libstore/user-lock.cc new file mode 100644 index 000000000..8a09df4d1 --- /dev/null +++ b/src/libstore/user-lock.cc @@ -0,0 +1,212 @@ +#include "user-lock.hh" +#include "globals.hh" +#include "pathlocks.hh" +#include "cgroup.hh" + +namespace nix { + +struct SimpleUserLock : UserLock +{ + AutoCloseFD fdUserLock; + uid_t uid; + gid_t gid; + std::vector supplementaryGIDs; + + void kill() override + { + killUser(uid); + } + + std::pair getUIDRange() override + { + assert(uid); + return {uid, uid}; + } + + gid_t getGID() override { assert(gid); return gid; } + + std::vector getSupplementaryGIDs() override { return supplementaryGIDs; } + + static std::unique_ptr acquire() + { + assert(settings.buildUsersGroup != ""); + createDirs(settings.nixStateDir + "/userpool"); + + /* Get the members of the build-users-group. */ + struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str()); + if (!gr) + throw Error("the group '%s' specified in 'build-users-group' does not exist", settings.buildUsersGroup); + + /* Copy the result of getgrnam. */ + Strings users; + for (char * * p = gr->gr_mem; *p; ++p) { + debug("found build user '%s'", *p); + users.push_back(*p); + } + + if (users.empty()) + throw Error("the build users group '%s' has no members", settings.buildUsersGroup); + + /* Find a user account that isn't currently in use for another + build. */ + for (auto & i : users) { + debug("trying user '%s'", i); + + struct passwd * pw = getpwnam(i.c_str()); + if (!pw) + throw Error("the user '%s' in the group '%s' does not exist", i, settings.buildUsersGroup); + + auto fnUserLock = fmt("%s/userpool/%s", settings.nixStateDir,pw->pw_uid); + + AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (!fd) + throw SysError("opening user lock '%s'", fnUserLock); + + if (lockFile(fd.get(), ltWrite, false)) { + auto lock = std::make_unique(); + + lock->fdUserLock = std::move(fd); + lock->uid = pw->pw_uid; + lock->gid = gr->gr_gid; + + /* Sanity check... */ + if (lock->uid == getuid() || lock->uid == geteuid()) + throw Error("the Nix user should not be a member of '%s'", settings.buildUsersGroup); + + #if __linux__ + /* Get the list of supplementary groups of this build + user. This is usually either empty or contains a + group such as "kvm". */ + lock->supplementaryGIDs.resize(10); + int ngroups = lock->supplementaryGIDs.size(); + int err = getgrouplist(pw->pw_name, pw->pw_gid, + lock->supplementaryGIDs.data(), &ngroups); + if (err == -1) + throw Error("failed to get list of supplementary groups for '%s'", pw->pw_name); + + lock->supplementaryGIDs.resize(ngroups); + #endif + + return lock; + } + } + + return nullptr; + } +}; + +#if __linux__ +struct CgroupUserLock : UserLock +{ + AutoCloseFD fdUserLock; + uid_t uid; + + void kill() override + { + if (cgroup) { + destroyCgroup(*cgroup); + cgroup.reset(); + } + } + + std::pair getUIDRange() override + { + assert(uid); + return {uid, uid + settings.idsPerBuild - 1}; + } + + gid_t getGID() override + { + // We use the same GID ranges as for the UIDs. + assert(uid); + return uid; + } + + std::vector getSupplementaryGIDs() override { return {}; } // FIXME + + static std::unique_ptr acquire() + { + settings.requireExperimentalFeature("auto-allocate-uids"); + assert(settings.startId > 0); + assert(settings.startId % settings.idsPerBuild == 0); + assert(settings.uidCount % settings.idsPerBuild == 0); + assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits::max()); + + // FIXME: check whether the id range overlaps any known users + + createDirs(settings.nixStateDir + "/userpool2"); + + size_t nrSlots = settings.uidCount / settings.idsPerBuild; + + for (size_t i = 0; i < nrSlots; i++) { + debug("trying user slot '%d'", i); + + createDirs(settings.nixStateDir + "/userpool2"); + + auto fnUserLock = fmt("%s/userpool2/slot-%d", settings.nixStateDir, i); + + AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (!fd) + throw SysError("opening user lock '%s'", fnUserLock); + + if (lockFile(fd.get(), ltWrite, false)) { + auto lock = std::make_unique(); + lock->fdUserLock = std::move(fd); + lock->uid = settings.startId + i * settings.idsPerBuild; + auto s = drainFD(lock->fdUserLock.get()); + if (s != "") lock->cgroup = s; + return lock; + } + } + + return nullptr; + } + + std::optional cgroup; + + std::optional getCgroup() override + { + if (!cgroup) { + /* Create a systemd cgroup since that's the minimum + required by systemd-nspawn. */ + auto ourCgroups = getCgroups("/proc/self/cgroup"); + auto systemdCgroup = ourCgroups["systemd"]; + if (systemdCgroup == "") + throw Error("'systemd' cgroup does not exist"); + + auto hostCgroup = canonPath("/sys/fs/cgroup/systemd/" + systemdCgroup); + + if (!pathExists(hostCgroup)) + throw Error("expected cgroup directory '%s'", hostCgroup); + + cgroup = fmt("%s/nix-%d", hostCgroup, uid); + + destroyCgroup(*cgroup); + + if (mkdir(cgroup->c_str(), 0755) == -1) + throw SysError("creating cgroup '%s'", *cgroup); + + /* Record the cgroup in the lock file. This ensures that + if we subsequently get executed under a different parent + cgroup, we kill the previous cgroup first. */ + if (ftruncate(fdUserLock.get(), 0) == -1) + throw Error("truncating user lock"); + writeFull(fdUserLock.get(), *cgroup); + } + + return cgroup; + }; +}; +#endif + +std::unique_ptr acquireUserLock() +{ + #if __linux__ + if (settings.autoAllocateUids) + return CgroupUserLock::acquire(); + else + #endif + return SimpleUserLock::acquire(); +} + +} diff --git a/src/libstore/user-lock.hh b/src/libstore/user-lock.hh new file mode 100644 index 000000000..88d068689 --- /dev/null +++ b/src/libstore/user-lock.hh @@ -0,0 +1,39 @@ +#pragma once + +#include "types.hh" + +namespace nix { + +struct UserLock +{ + virtual ~UserLock() { } + + /* Get the first and last UID. */ + virtual std::pair getUIDRange() = 0; + + /* Get the first UID. */ + uid_t getUID() + { + return getUIDRange().first; + } + + uid_t getUIDCount() + { + return getUIDRange().second - getUIDRange().first + 1; + } + + virtual gid_t getGID() = 0; + + virtual std::vector getSupplementaryGIDs() = 0; + + /* Kill any processes currently executing as this user. */ + virtual void kill() = 0; + + virtual std::optional getCgroup() { return {}; }; +}; + +/* Acquire a user lock. Note that this may return nullptr if no user + is available. */ +std::unique_ptr acquireUserLock(); + +}