From 92f525ecf4ea8a9bd356acd1d3845074b1e5b918 Mon Sep 17 00:00:00 2001
From: Eelco Dolstra <e.dolstra@tudelft.nl>
Date: Wed, 25 Mar 2009 21:05:42 +0000
Subject: [PATCH] * Negative caching, i.e. caching of build failures.  Disabled
 by   default.  This is mostly useful for Hydra.

---
 nix.conf.example            | 16 +++++++++++
 src/libstore/build.cc       | 56 +++++++++++++++++++++++++++++++++----
 src/libstore/local-store.cc | 22 +++++++++++++++
 src/libstore/local-store.hh |  7 +++++
 tests/Makefile.am           |  3 +-
 tests/negative-caching.nix  | 21 ++++++++++++++
 tests/negative-caching.sh   | 22 +++++++++++++++
 7 files changed, 140 insertions(+), 7 deletions(-)
 create mode 100644 tests/negative-caching.nix
 create mode 100644 tests/negative-caching.sh
diff --git a/nix.conf.example b/nix.conf.example
index e7f8adcab..5b6d5b87f 100644
--- a/nix.conf.example
+++ b/nix.conf.example
@@ -154,6 +154,22 @@
 #build-chroot-dirs = /dev /dev/pts /proc
 
 
+### Option `build-cache-failure'
+#
+# If this option is enabled, Nix will do negative caching; that is, it
+# will remember failed builds, and won't attempt to try to build them
+# again if you ask for it.  Negative caching is disabled by default
+# because Nix cannot distinguish between permanent build errors (e.g.,
+# a syntax error in a source file) and transient build errors (e.g., a
+# full disk), as they both cause the builder to return a non-zero exit
+# code.  You can clear the cache by doing `rm -f
+# /nix/var/nix/db/failed/*'.
+#
+# Example:
+#   build-cache-failure = true
+#build-cache-failure = false
+
+
 ### Option `system'
 #
 # This option specifies the canonical Nix system name of the current
diff --git a/src/libstore/build.cc b/src/libstore/build.cc
index aa7143299..8c5c6cc36 100644
--- a/src/libstore/build.cc
+++ b/src/libstore/build.cc
@@ -209,6 +209,8 @@ private:
     
 public:
 
+    bool cacheFailure;
+
     LocalStore & store;
 
     Worker(LocalStore & store);
@@ -667,6 +669,9 @@ private:
 
     /* RAII object to delete the chroot directory. */
     boost::shared_ptr<AutoDelete> autoDelChroot;
+
+    /* Whether this is a fixed-output derivation. */
+    bool fixedOutput;
     
     typedef void (DerivationGoal::*GoalState)();
     GoalState state;
@@ -725,6 +730,9 @@ private:
     /* Return the set of (in)valid paths. */
     PathSet checkPathValidity(bool returnValid);
 
+    /* Abort the goal if `path' failed to build. */
+    bool pathFailed(const Path & path);
+
     /* Forcibly kill the child process, if any. */
     void killChild();
 };
@@ -836,6 +844,11 @@ void DerivationGoal::haveDerivation()
         return;
     }
 
+    /* Check whether any output previously failed to build.  If so,
+       don't bother. */
+    foreach (PathSet::iterator, i, invalidOutputs)
+        if (pathFailed(*i)) return;
+        
     /* We are first going to try to create the invalid output paths
        through substitutes.  If that doesn't work, we'll build
        them. */
@@ -1008,6 +1021,12 @@ void DerivationGoal::tryToBuild()
         }
     }
 
+    /* Check again whether any output previously failed to build,
+       because some other process may have tried and failed before we
+       acquired the lock. */
+    foreach (DerivationOutputs::iterator, i, drv.outputs)
+        if (pathFailed(i->second.path)) return;
+
     /* Is the build hook willing to accept this job? */
     usingBuildHook = true;
     switch (tryBuildHook()) {
@@ -1093,9 +1112,7 @@ void DerivationGoal::buildDone()
         /* Some cleanup per path.  We do this here and not in
            computeClosure() for convenience when the build has
            failed. */
-        for (DerivationOutputs::iterator i = drv.outputs.begin(); 
-             i != drv.outputs.end(); ++i)
-        {
+        foreach (DerivationOutputs::iterator, i, drv.outputs) {
             Path path = i->second.path;
 
             if (useChroot && pathExists(chrootRootDir + path)) {
@@ -1147,6 +1164,7 @@ void DerivationGoal::buildDone()
         printMsg(lvlError, e.msg());
         outputLocks.unlock();
         buildUser.release();
+        
         if (printBuildTrace) {
             /* When using a build hook, the hook will return a
                remote build failure using exit code 100.  Anything
@@ -1158,6 +1176,16 @@ void DerivationGoal::buildDone()
                 printMsg(lvlError, format("@ build-failed %1% %2% %3% %4%")
                     % drvPath % drv.outputs["out"].path % 1 % e.msg());
         }
+
+        /* Register the outputs of this build as "failed" so we won't
+           try to build them again (negative caching).  However, don't
+           do this for fixed-output derivations, since they're likely
+           to fail for transient reasons (e.g., fetchurl not being
+           able to access the network). */
+        if (worker.cacheFailure && !fixedOutput)
+            foreach (DerivationOutputs::iterator, i, drv.outputs)
+                worker.store.registerFailedPath(i->second.path);
+        
         amDone(ecFailed);
         return;
     }
@@ -1451,9 +1479,8 @@ void DerivationGoal::startBuilder()
        derivation, tell the builder, so that for instance `fetchurl'
        can skip checking the output.  On older Nixes, this environment
        variable won't be set, so `fetchurl' will do the check. */
-    bool fixedOutput = true;
-    for (DerivationOutputs::iterator i = drv.outputs.begin(); 
-         i != drv.outputs.end(); ++i)
+    fixedOutput = true;
+    foreach (DerivationOutputs::iterator, i, drv.outputs)
         if (i->second.hash == "") fixedOutput = false;
     if (fixedOutput) 
         env["NIX_OUTPUT_CHECKED"] = "1";
@@ -2035,6 +2062,22 @@ PathSet DerivationGoal::checkPathValidity(bool returnValid)
 }
 
 
+bool DerivationGoal::pathFailed(const Path & path)
+{
+    if (!worker.cacheFailure) return false;
+    
+    if (!worker.store.hasPathFailed(path)) return false;
+
+    printMsg(lvlError, format("builder for `%1%' failed previously (cached)") % path);
+    
+    if (printBuildTrace)
+        printMsg(lvlError, format("@ build-failed %1% %2% cached") % drvPath % path);
+    
+    amDone(ecFailed);
+
+    return true;
+}
+
 
 //////////////////////////////////////////////////////////////////////
 
@@ -2392,6 +2435,7 @@ Worker::Worker(LocalStore & store)
     working = true;
     nrChildren = 0;
     lastWokenUp = 0;
+    cacheFailure = queryBoolSetting("build-cache-failure", false);
 }
 
 
diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index fdfc85346..7ab7e4e8e 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -65,6 +65,7 @@ LocalStore::LocalStore()
 
     createDirs(nixDBPath + "/info");
     createDirs(nixDBPath + "/referrer");
+    createDirs(nixDBPath + "/failed");
 
     int curSchema = getSchema();
     if (curSchema > nixSchemaVersion)
@@ -196,6 +197,13 @@ static Path referrersFileFor(const Path & path)
 }
 
 
+static Path failedFileFor(const Path & path)
+{
+    string baseName = baseNameOf(path);
+    return (format("%1%/failed/%2%") % nixDBPath % baseName).str();
+}
+
+
 static Path tmpFileForAtomicUpdate(const Path & path)
 {
     return (format("%1%/.%2%.%3%") % dirOf(path) % getpid() % baseNameOf(path)).str();
@@ -335,6 +343,20 @@ void LocalStore::registerValidPath(const ValidPathInfo & info, bool ignoreValidi
 }
 
 
+void LocalStore::registerFailedPath(const Path & path)
+{
+    /* Write an empty file in the .../failed directory to denote the
+       failure of the builder for `path'. */
+    writeFile(failedFileFor(path), "");
+}
+
+
+bool LocalStore::hasPathFailed(const Path & path)
+{
+    return pathExists(failedFileFor(path));
+}
+
+
 Hash parseHashField(const Path & path, const string & s)
 {
     string::size_type colon = s.find(':');
diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh
index 20a0b45af..1cacfee33 100644
--- a/src/libstore/local-store.hh
+++ b/src/libstore/local-store.hh
@@ -144,6 +144,13 @@ public:
 
     void registerValidPaths(const ValidPathInfos & infos);
 
+    /* Register that the build of a derivation with output `path' has
+       failed. */
+    void registerFailedPath(const Path & path);
+
+    /* Query whether `path' previously failed to build. */
+    bool hasPathFailed(const Path & path);
+
 private:
 
     Path schemaPath;
diff --git a/tests/Makefile.am b/tests/Makefile.am
index a16051f73..beb7852db 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -7,7 +7,7 @@ TESTS = init.sh hash.sh lang.sh add.sh simple.sh dependencies.sh \
   fallback.sh nix-push.sh gc.sh gc-concurrent.sh verify.sh nix-pull.sh \
   referrers.sh user-envs.sh logging.sh nix-build.sh misc.sh fixed.sh \
   gc-runtime.sh install-package.sh check-refs.sh filter-source.sh \
-  remote-store.sh export.sh export-graph.sh
+  remote-store.sh export.sh export-graph.sh negative-caching.sh
 
 XFAIL_TESTS =
 
@@ -30,5 +30,6 @@ EXTRA_DIST = $(TESTS) \
   check-refs.nix \
   filter-source.nix \
   export-graph.nix \
+  negative-caching.nix \
   $(wildcard lang/*.nix) $(wildcard lang/*.exp) $(wildcard lang/*.exp.xml) $(wildcard lang/*.flags) \
   common.sh.in
diff --git a/tests/negative-caching.nix b/tests/negative-caching.nix
new file mode 100644
index 000000000..10df67a74
--- /dev/null
+++ b/tests/negative-caching.nix
@@ -0,0 +1,21 @@
+with import ./config.nix;
+
+rec {
+
+  fail = mkDerivation {
+    name = "fail";
+    builder = builtins.toFile "builder.sh" "echo FAIL; exit 1";
+  };
+
+  succeed = mkDerivation {
+    name = "succeed";
+    builder = builtins.toFile "builder.sh" "echo SUCCEED; mkdir $out";
+  };
+
+  depOnFail = mkDerivation {
+    name = "dep-on-fail";
+    builder = builtins.toFile "builder.sh" "echo URGH; mkdir $out";
+    inputs = [fail succeed];
+  };
+
+}
diff --git a/tests/negative-caching.sh b/tests/negative-caching.sh
new file mode 100644
index 000000000..e618e9443
--- /dev/null
+++ b/tests/negative-caching.sh
@@ -0,0 +1,22 @@
+source common.sh
+
+clearStore
+
+set +e
+
+opts="--option build-cache-failure true --print-build-trace"
+
+# This build should fail, and the failure should be cached.
+log=$($nixbuild $opts negative-caching.nix -A fail 2>&1) && fail "should fail"
+echo "$log" | grep -q "@ build-failed" || fail "no build-failed trace"
+
+# Do it again.  The build shouldn't be tried again.
+log=$($nixbuild $opts negative-caching.nix -A fail 2>&1) && fail "should fail"
+echo "$log" | grep -q "FAIL" && fail "failed build not cached"
+echo "$log" | grep -q "@ build-failed .* cached" || fail "trace doesn't say cached"
+
+# Check that --keep-going works properly with cached failures.
+log=$($nixbuild $opts --keep-going negative-caching.nix -A depOnFail 2>&1) && fail "should fail"
+echo "$log" | grep -q "FAIL" && fail "failed build not cached (2)"
+echo "$log" | grep -q "@ build-failed .* cached" || fail "trace doesn't say cached (2)"
+echo "$log" | grep -q "@ build-succeeded .*-succeed" || fail "didn't keep going"