diff --git a/src/libutil/compression.cc b/src/libutil/compression.cc index d06f1f87b..d17401f27 100644 --- a/src/libutil/compression.cc +++ b/src/libutil/compression.cc @@ -12,8 +12,6 @@ #include #include -#include - namespace nix { static const int COMPRESSION_LEVEL_DEFAULT = -1; @@ -40,20 +38,26 @@ struct ArchiveDecompressionSource : Source { std::unique_ptr archive = 0; Source & src; - ArchiveDecompressionSource(Source & src) : src(src) {} + std::optional compressionMethod; + ArchiveDecompressionSource(Source & src, std::optional compressionMethod = std::nullopt) + : src(src) + , compressionMethod(std::move(compressionMethod)) + { + } ~ArchiveDecompressionSource() override {} - size_t read(char * data, size_t len) override { + size_t read(char * data, size_t len) override + { struct archive_entry * ae; if (!archive) { - archive = std::make_unique(src, true); - this->archive->check(archive_read_next_header(this->archive->archive, &ae), - "failed to read header (%s)"); + archive = std::make_unique(src, /*raw*/ true, compressionMethod); + this->archive->check(archive_read_next_header(this->archive->archive, &ae), "failed to read header (%s)"); if (archive_filter_count(this->archive->archive) < 2) { throw CompressionError("input compression not recognized"); } } ssize_t result = archive_read_data(this->archive->archive, data, len); - if (result > 0) return result; + if (result > 0) + return result; if (result == 0) { throw EndOfFile("reached end of compressed file"); } @@ -67,16 +71,19 @@ struct ArchiveCompressionSink : CompressionSink Sink & nextSink; struct archive * archive; - ArchiveCompressionSink(Sink & nextSink, std::string format, bool parallel, int level = COMPRESSION_LEVEL_DEFAULT) : nextSink(nextSink) + ArchiveCompressionSink(Sink & nextSink, std::string format, bool parallel, int level = COMPRESSION_LEVEL_DEFAULT) + : nextSink(nextSink) { archive = archive_write_new(); - if (!archive) throw Error("failed to initialize libarchive"); + if (!archive) + throw Error("failed to initialize libarchive"); check(archive_write_add_filter_by_name(archive, format.c_str()), "couldn't initialize compression (%s)"); check(archive_write_set_format_raw(archive)); if (parallel) check(archive_write_set_filter_option(archive, format.c_str(), "threads", "0")); if (level != COMPRESSION_LEVEL_DEFAULT) - check(archive_write_set_filter_option(archive, format.c_str(), "compression-level", std::to_string(level).c_str())); + check(archive_write_set_filter_option( + archive, format.c_str(), "compression-level", std::to_string(level).c_str())); // disable internal buffering check(archive_write_set_bytes_per_block(archive, 0)); // disable output padding @@ -86,7 +93,8 @@ struct ArchiveCompressionSink : CompressionSink ~ArchiveCompressionSink() override { - if (archive) archive_write_free(archive); + if (archive) + archive_write_free(archive); } void finish() override @@ -106,7 +114,8 @@ struct ArchiveCompressionSink : CompressionSink void writeUnbuffered(std::string_view data) override { ssize_t result = archive_write_data(archive, data.data(), data.length()); - if (result <= 0) check(result); + if (result <= 0) + check(result); } private: @@ -130,13 +139,20 @@ private: struct NoneSink : CompressionSink { Sink & nextSink; - NoneSink(Sink & nextSink, int level = COMPRESSION_LEVEL_DEFAULT) : nextSink(nextSink) + NoneSink(Sink & nextSink, int level = COMPRESSION_LEVEL_DEFAULT) + : nextSink(nextSink) { if (level != COMPRESSION_LEVEL_DEFAULT) warn("requested compression level '%d' not supported by compression method 'none'", level); } - void finish() override { flush(); } - void writeUnbuffered(std::string_view data) override { nextSink(data); } + void finish() override + { + flush(); + } + void writeUnbuffered(std::string_view data) override + { + nextSink(data); + } }; struct BrotliDecompressionSink : ChunkedCompressionSink @@ -145,7 +161,8 @@ struct BrotliDecompressionSink : ChunkedCompressionSink BrotliDecoderState * state; bool finished = false; - BrotliDecompressionSink(Sink & nextSink) : nextSink(nextSink) + BrotliDecompressionSink(Sink & nextSink) + : nextSink(nextSink) { state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); if (!state) @@ -173,10 +190,7 @@ struct BrotliDecompressionSink : ChunkedCompressionSink while (!finished && (!data.data() || avail_in)) { checkInterrupt(); - if (!BrotliDecoderDecompressStream(state, - &avail_in, &next_in, - &avail_out, &next_out, - nullptr)) + if (!BrotliDecoderDecompressStream(state, &avail_in, &next_in, &avail_out, &next_out, nullptr)) throw CompressionError("error while decompressing brotli file"); if (avail_out < sizeof(outbuf) || avail_in == 0) { @@ -206,8 +220,8 @@ std::unique_ptr makeDecompressionSink(const std::string & method, Si else if (method == "br") return std::make_unique(nextSink); else - return sourceToSink([&](Source & source) { - auto decompressionSource = std::make_unique(source); + return sourceToSink([method, &nextSink](Source & source) { + auto decompressionSource = std::make_unique(source, method); decompressionSource->drainInto(nextSink); }); } @@ -219,7 +233,8 @@ struct BrotliCompressionSink : ChunkedCompressionSink BrotliEncoderState * state; bool finished = false; - BrotliCompressionSink(Sink & nextSink) : nextSink(nextSink) + BrotliCompressionSink(Sink & nextSink) + : nextSink(nextSink) { state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); if (!state) @@ -247,11 +262,9 @@ struct BrotliCompressionSink : ChunkedCompressionSink while (!finished && (!data.data() || avail_in)) { checkInterrupt(); - if (!BrotliEncoderCompressStream(state, - data.data() ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH, - &avail_in, &next_in, - &avail_out, &next_out, - nullptr)) + if (!BrotliEncoderCompressStream( + state, data.data() ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH, &avail_in, &next_in, + &avail_out, &next_out, nullptr)) throw CompressionError("error while compressing brotli compression"); if (avail_out < sizeof(outbuf) || avail_in == 0) { @@ -267,9 +280,8 @@ struct BrotliCompressionSink : ChunkedCompressionSink ref makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel, int level) { - std::vector la_supports = { - "bzip2", "compress", "grzip", "gzip", "lrzip", "lz4", "lzip", "lzma", "lzop", "xz", "zstd" - }; + std::vector la_supports = {"bzip2", "compress", "grzip", "gzip", "lrzip", "lz4", + "lzip", "lzma", "lzop", "xz", "zstd"}; if (std::find(la_supports.begin(), la_supports.end(), method) != la_supports.end()) { return make_ref(nextSink, method, parallel, level); } diff --git a/src/libutil/compression.hh b/src/libutil/compression.hh index 4e53a7b3c..e0c531b1f 100644 --- a/src/libutil/compression.hh +++ b/src/libutil/compression.hh @@ -11,7 +11,7 @@ namespace nix { struct CompressionSink : BufferedSink, FinishSink { - using BufferedSink::operator (); + using BufferedSink::operator(); using BufferedSink::writeUnbuffered; using FinishSink::finish; }; @@ -22,7 +22,8 @@ std::unique_ptr makeDecompressionSink(const std::string & method, Si std::string compress(const std::string & method, std::string_view in, const bool parallel = false, int level = -1); -ref makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel = false, int level = -1); +ref +makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel = false, int level = -1); MakeError(UnknownCompressionMethod, Error); diff --git a/src/libutil/tarfile.cc b/src/libutil/tarfile.cc index 3bb6694f8..6bb2bd2f3 100644 --- a/src/libutil/tarfile.cc +++ b/src/libutil/tarfile.cc @@ -1,18 +1,21 @@ #include #include +#include "finally.hh" #include "serialise.hh" #include "tarfile.hh" #include "file-system.hh" namespace nix { -static int callback_open(struct archive *, void * self) +namespace { + +int callback_open(struct archive *, void * self) { return ARCHIVE_OK; } -static ssize_t callback_read(struct archive * archive, void * _self, const void * * buffer) +ssize_t callback_read(struct archive * archive, void * _self, const void ** buffer) { auto self = (TarArchive *) _self; *buffer = self->buffer.data(); @@ -27,41 +30,71 @@ static ssize_t callback_read(struct archive * archive, void * _self, const void } } -static int callback_close(struct archive *, void * self) +int callback_close(struct archive *, void * self) { return ARCHIVE_OK; } -void TarArchive::check(int err, const std::string & reason) +void checkLibArchive(archive * archive, int err, const std::string & reason) { if (err == ARCHIVE_EOF) throw EndOfFile("reached end of archive"); else if (err != ARCHIVE_OK) - throw Error(reason, archive_error_string(this->archive)); + throw Error(reason, archive_error_string(archive)); } -TarArchive::TarArchive(Source & source, bool raw) : buffer(65536) +constexpr auto defaultBufferSize = std::size_t{65536}; +} + +void TarArchive::check(int err, const std::string & reason) { - this->archive = archive_read_new(); - this->source = &source; + checkLibArchive(archive, err, reason); +} + +/// @brief Get filter_code from its name. +/// +/// libarchive does not provide a convenience function like archive_write_add_filter_by_name but for reading. +/// Instead it's necessary to use this kludge to convert method -> code and +/// then use archive_read_support_filter_by_code. Arguably this is better than +/// hand-rolling the equivalent function that is better implemented in libarchive. +int getArchiveFilterCodeByName(const std::string & method) +{ + auto * ar = archive_write_new(); + auto cleanup = Finally{[&ar]() { checkLibArchive(ar, archive_write_close(ar), "failed to close archive: %s"); }}; + auto err = archive_write_add_filter_by_name(ar, method.c_str()); + checkLibArchive(ar, err, "failed to get libarchive filter by name: %s"); + auto code = archive_filter_code(ar, 0); + return code; +} + +TarArchive::TarArchive(Source & source, bool raw, std::optional compression_method) + : archive{archive_read_new()} + , source{&source} + , buffer(defaultBufferSize) +{ + if (!compression_method) { + archive_read_support_filter_all(archive); + } else { + archive_read_support_filter_by_code(archive, getArchiveFilterCodeByName(*compression_method)); + } if (!raw) { - archive_read_support_filter_all(archive); archive_read_support_format_all(archive); } else { - archive_read_support_filter_all(archive); archive_read_support_format_raw(archive); archive_read_support_format_empty(archive); } + archive_read_set_option(archive, NULL, "mac-ext", NULL); - check(archive_read_open(archive, (void *)this, callback_open, callback_read, callback_close), "Failed to open archive (%s)"); + check( + archive_read_open(archive, (void *) this, callback_open, callback_read, callback_close), + "Failed to open archive (%s)"); } - TarArchive::TarArchive(const Path & path) + : archive{archive_read_new()} + , buffer(defaultBufferSize) { - this->archive = archive_read_new(); - archive_read_support_filter_all(archive); archive_read_support_format_all(archive); archive_read_set_option(archive, NULL, "mac-ext", NULL); @@ -75,19 +108,19 @@ void TarArchive::close() TarArchive::~TarArchive() { - if (this->archive) archive_read_free(this->archive); + if (this->archive) + archive_read_free(this->archive); } static void extract_archive(TarArchive & archive, const Path & destDir) { - int flags = ARCHIVE_EXTRACT_TIME - | ARCHIVE_EXTRACT_SECURE_SYMLINKS - | ARCHIVE_EXTRACT_SECURE_NODOTDOT; + int flags = ARCHIVE_EXTRACT_TIME | ARCHIVE_EXTRACT_SECURE_SYMLINKS | ARCHIVE_EXTRACT_SECURE_NODOTDOT; for (;;) { struct archive_entry * entry; int r = archive_read_next_header(archive.archive, &entry); - if (r == ARCHIVE_EOF) break; + if (r == ARCHIVE_EOF) + break; auto name = archive_entry_pathname(entry); if (!name) throw Error("cannot get archive member name: %s", archive_error_string(archive.archive)); @@ -96,18 +129,16 @@ static void extract_archive(TarArchive & archive, const Path & destDir) else archive.check(r); - archive_entry_copy_pathname(entry, - (destDir + "/" + name).c_str()); + archive_entry_copy_pathname(entry, (destDir + "/" + name).c_str()); // sources can and do contain dirs with no rx bits if (archive_entry_filetype(entry) == AE_IFDIR && (archive_entry_mode(entry) & 0500) != 0500) archive_entry_set_mode(entry, archive_entry_mode(entry) | 0500); // Patch hardlink path - const char *original_hardlink = archive_entry_hardlink(entry); + const char * original_hardlink = archive_entry_hardlink(entry); if (original_hardlink) { - archive_entry_copy_hardlink(entry, - (destDir + "/" + original_hardlink).c_str()); + archive_entry_copy_hardlink(entry, (destDir + "/" + original_hardlink).c_str()); } archive.check(archive_read_extract(archive.archive, entry, flags)); @@ -140,7 +171,8 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin // FIXME: merge with extract_archive struct archive_entry * entry; int r = archive_read_next_header(archive.archive, &entry); - if (r == ARCHIVE_EOF) break; + if (r == ARCHIVE_EOF) + break; auto path = archive_entry_pathname(entry); if (!path) throw Error("cannot get archive member name: %s", archive_error_string(archive.archive)); @@ -167,8 +199,9 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin auto n = archive_read_data(archive.archive, buf.data(), buf.size()); if (n < 0) throw Error("cannot read file '%s' from tarball", path); - if (n == 0) break; - crf(std::string_view { + if (n == 0) + break; + crf(std::string_view{ (const char *) buf.data(), (size_t) n, }); diff --git a/src/libutil/tarfile.hh b/src/libutil/tarfile.hh index 6a9c42149..705d211e4 100644 --- a/src/libutil/tarfile.hh +++ b/src/libutil/tarfile.hh @@ -7,25 +7,36 @@ namespace nix { -struct TarArchive { +struct TarArchive +{ struct archive * archive; Source * source; std::vector buffer; void check(int err, const std::string & reason = "failed to extract archive (%s)"); - TarArchive(Source & source, bool raw = false); + explicit TarArchive(const Path & path); - TarArchive(const Path & path); + /// @brief Create a generic archive from source. + /// @param source - Input byte stream. + /// @param raw - Whether to enable raw file support. For more info look in docs: + /// https://manpages.debian.org/stretch/libarchive-dev/archive_read_format.3.en.html + /// @param compression_method - Primary compression method to use. std::nullopt means 'all'. + TarArchive(Source & source, bool raw = false, std::optional compression_method = std::nullopt); - /// disable copy constructor + /// Disable copy constructor. Explicitly default move assignment/constructor. TarArchive(const TarArchive &) = delete; + TarArchive & operator=(const TarArchive &) = delete; + TarArchive(TarArchive &&) = default; + TarArchive & operator=(TarArchive &&) = default; void close(); ~TarArchive(); }; +int getArchiveFilterCodeByName(const std::string & method); + void unpackTarfile(Source & source, const Path & destDir); void unpackTarfile(const Path & tarFile, const Path & destDir); diff --git a/tests/nixos/default.nix b/tests/nixos/default.nix index 98de31e13..627728424 100644 --- a/tests/nixos/default.nix +++ b/tests/nixos/default.nix @@ -158,4 +158,6 @@ in fetch-git = runNixOSTestFor "x86_64-linux" ./fetch-git; ca-fd-leak = runNixOSTestFor "x86_64-linux" ./ca-fd-leak; + + gzip-content-encoding = runNixOSTestFor "x86_64-linux" ./gzip-content-encoding.nix; } diff --git a/tests/nixos/gzip-content-encoding.nix b/tests/nixos/gzip-content-encoding.nix new file mode 100644 index 000000000..a5a0033fd --- /dev/null +++ b/tests/nixos/gzip-content-encoding.nix @@ -0,0 +1,71 @@ +# Test that compressed files fetched from server with compressed responses +# do not get excessively decompressed. +# E.g. fetching a zstd compressed tarball from a server, +# which compresses the response with `Content-Encoding: gzip`. +# The expected result is that the fetched file is a zstd archive. + +{ lib, config, ... }: + +let + pkgs = config.nodes.machine.nixpkgs.pkgs; + + ztdCompressedFile = pkgs.stdenv.mkDerivation { + name = "dummy-zstd-compressed-archive"; + dontUnpack = true; + nativeBuildInputs = with pkgs; [ zstd ]; + buildPhase = '' + mkdir archive + for _ in {1..100}; do echo "lorem" > archive/file1; done + for _ in {1..100}; do echo "ipsum" > archive/file2; done + tar --zstd -cf archive.tar.zst archive + ''; + installPhase = '' + install -Dm 644 -T archive.tar.zst $out/share/archive + ''; + }; + + fileCmd = "${pkgs.file}/bin/file"; +in + +{ + name = "gzip-content-encoding"; + + nodes = + { machine = + { config, pkgs, ... }: + { networking.firewall.allowedTCPPorts = [ 80 ]; + + services.nginx.enable = true; + services.nginx.virtualHosts."localhost" = + { root = "${ztdCompressedFile}/share/"; + # Make sure that nginx really tries to compress the + # file on the fly with no regard to size/mime. + # http://nginx.org/en/docs/http/ngx_http_gzip_module.html + extraConfig = '' + gzip on; + gzip_types *; + gzip_proxied any; + gzip_min_length 0; + ''; + }; + virtualisation.writableStore = true; + virtualisation.additionalPaths = with pkgs; [ file ]; + nix.settings.substituters = lib.mkForce [ ]; + }; + }; + + # Check that when nix-prefetch-url is used with a zst tarball it does not get decompressed. + testScript = { nodes }: '' + # fmt: off + start_all() + + machine.wait_for_unit("nginx.service") + machine.succeed(""" + # Make sure that the file is properly compressed as the test would be meaningless otherwise + curl --compressed -v http://localhost/archive |& tr -s ' ' |& grep --ignore-case 'content-encoding: gzip' + archive_path=$(nix-prefetch-url http://localhost/archive --print-path | tail -n1) + [[ $(${fileCmd} --brief --mime-type $archive_path) == "application/zstd" ]] + tar --zstd -xf $archive_path + """) + ''; +}