From c173dcef61da864d80a8c7b9be6d072000a3f8b0 Mon Sep 17 00:00:00 2001 From: Alice Carroll Date: Thu, 8 Feb 2024 19:58:23 +0300 Subject: [PATCH] feat: tab support for indentation stripping --- src/libexpr/parser-state.hh | 48 +++++++++++-------- .../functional/lang/eval-okay-ind-string.exp | 2 +- .../functional/lang/eval-okay-ind-string.nix | 20 +++++++- 3 files changed, 48 insertions(+), 22 deletions(-) diff --git a/src/libexpr/parser-state.hh b/src/libexpr/parser-state.hh index 5a928e9aa..6e70e1738 100644 --- a/src/libexpr/parser-state.hh +++ b/src/libexpr/parser-state.hh @@ -2,6 +2,7 @@ ///@file #include "eval.hh" +#include namespace nix { @@ -173,6 +174,15 @@ inline Formals * ParserState::validateFormals(Formals * formals, PosIdx pos, Sym return formals; } +enum IndentChar { + Tab = '\t', + Space = ' ', +}; + +constexpr bool isIndent(char c) { + return c == ' ' || c == '\t'; +} + inline Expr * ParserState::stripIndentation(const PosIdx pos, std::vector>> && es) { @@ -180,36 +190,34 @@ inline Expr * ParserState::stripIndentation(const PosIdx pos, /* Figure out the minimum indentation. Note that by design whitespace-only final lines are not taken into account. (So - the " " in "\n ''" is ignored, but the " " in "\n foo''" is.) */ + the " " in "\n ''" is ignored, but the " " in "\n foo''" is not.) */ + std::optional indentChar = std::nullopt; bool atStartOfLine = true; /* = seen only whitespace in the current line */ - size_t minIndent = 1000000; + size_t minIndent = std::numeric_limits::max(); size_t curIndent = 0; for (auto & [i_pos, i] : es) { auto * str = std::get_if(&i); if (!str || !str->hasIndentation) { /* Anti-quotations and escaped characters end the current start-of-line whitespace. */ - if (atStartOfLine) { - atStartOfLine = false; - if (curIndent < minIndent) minIndent = curIndent; - } + if (atStartOfLine) minIndent = std::min(minIndent, curIndent); + atStartOfLine = false; continue; } for (size_t j = 0; j < str->l; ++j) { - if (atStartOfLine) { - if (str->p[j] == ' ') - curIndent++; - else if (str->p[j] == '\n') { - /* Empty line, doesn't influence minimum - indentation. */ - curIndent = 0; - } else { - atStartOfLine = false; - if (curIndent < minIndent) minIndent = curIndent; - } - } else if (str->p[j] == '\n') { - atStartOfLine = true; + auto cur = str->p[j]; + if (cur == '\n') { curIndent = 0; + atStartOfLine = true; + continue; } + if (!atStartOfLine) continue; + if (!isIndent(cur)) { + atStartOfLine = false; + minIndent = std::min(minIndent, curIndent); + continue; + } + if (!indentChar) indentChar = IndentChar(cur); + if (cur == indentChar) ++curIndent; } } @@ -228,7 +236,7 @@ inline Expr * ParserState::stripIndentation(const PosIdx pos, std::string s2; for (size_t j = 0; j < t.l; ++j) { if (atStartOfLine) { - if (t.p[j] == ' ') { + if (t.p[j] == indentChar) { if (curDropped++ >= minIndent) s2 += t.p[j]; } diff --git a/tests/functional/lang/eval-okay-ind-string.exp b/tests/functional/lang/eval-okay-ind-string.exp index 7862331fa..ea60f0d1c 100644 --- a/tests/functional/lang/eval-okay-ind-string.exp +++ b/tests/functional/lang/eval-okay-ind-string.exp @@ -1 +1 @@ -"This is an indented multi-line string\nliteral. An amount of whitespace at\nthe start of each line matching the minimum\nindentation of all lines in the string\nliteral together will be removed. Thus,\nin this case four spaces will be\nstripped from each line, even though\n THIS LINE is indented six spaces.\n\nAlso, empty lines don't count in the\ndetermination of the indentation level (the\nprevious empty line has indentation 0, but\nit doesn't matter).\nIf the string starts with whitespace\n followed by a newline, it's stripped, but\n that's not the case here. Two spaces are\n stripped because of the \" \" at the start. \nThis line is indented\na bit further.\nAnti-quotations, like so, are\nalso allowed.\n The \\ is not special here.\n' can be followed by any character except another ', e.g. 'x'.\nLikewise for $, e.g. $$ or $varName.\nBut ' followed by ' is special, as is $ followed by {.\nIf you want them, use anti-quotations: '', \${.\n Tabs are not interpreted as whitespace (since we can't guess\n what tab settings are intended), so don't use them.\n\tThis line starts with a space and a tab, so only one\n space will be stripped from each line.\nAlso note that if the last line (just before the closing ' ')\nconsists only of whitespace, it's ignored. But here there is\nsome non-whitespace stuff, so the line isn't removed. \nThis shows a hacky way to preserve an empty line after the start.\nBut there's no reason to do so: you could just repeat the empty\nline.\n Similarly you can force an indentation level,\n in this case to 2 spaces. This works because the anti-quote\n is significant (not whitespace).\nstart on network-interfaces\n\nstart script\n\n rm -f /var/run/opengl-driver\n ln -sf 123 /var/run/opengl-driver\n\n rm -f /var/log/slim.log\n \nend script\n\nenv SLIM_CFGFILE=abc\nenv SLIM_THEMESDIR=def\nenv FONTCONFIG_FILE=/etc/fonts/fonts.conf \t\t\t\t# !!! cleanup\nenv XKB_BINDIR=foo/bin \t\t\t\t# Needed for the Xkb extension.\nenv LD_LIBRARY_PATH=libX11/lib:libXext/lib:/usr/lib/ # related to xorg-sys-opengl - needed to load libglx for (AI)GLX support (for compiz)\n\nenv XORG_DRI_DRIVER_PATH=nvidiaDrivers/X11R6/lib/modules/drivers/ \n\nexec slim/bin/slim\nEscaping of ' followed by ': ''\nEscaping of $ followed by {: \${\nAnd finally to interpret \\n etc. as in a string: \n, \r, \t.\nfoo\n'bla'\nbar\ncut -d $'\\t' -f 1\nending dollar $$\n" +"This is an indented multi-line string\nliteral. An amount of whitespace at\nthe start of each line matching the minimum\nindentation of all lines in the string\nliteral together will be removed. Thus,\nin this case four spaces will be\nstripped from each line, even though\n THIS LINE is indented six spaces.\n\nAlso, empty lines don't count in the\ndetermination of the indentation level (the\nprevious empty line has indentation 0, but\nit doesn't matter).\nIf the string starts with whitespace\n followed by a newline, it's stripped, but\n that's not the case here. Two spaces are\n stripped because of the \" \" at the start. \nThis line is indented\na bit further.\nAnti-quotations, like so, are\nalso allowed.\n The \\ is not special here.\n' can be followed by any character except another ', e.g. 'x'.\nLikewise for $, e.g. $$ or $varName.\nBut ' followed by ' is special, as is $ followed by {.\nIf you want them, use anti-quotations: '', \${.\n Tabs are not interpreted as whitespace (since we can't guess\n what tab settings are intended), so don't use them.\n\tThis line starts with a space and a tab, so only one\n space will be stripped from each line.\nAlso note that if the last line (just before the closing ' ')\nconsists only of whitespace, it's ignored. But here there is\nsome non-whitespace stuff, so the line isn't removed. \nThis shows a hacky way to preserve an empty line after the start.\nBut there's no reason to do so: you could just repeat the empty\nline.\n Similarly you can force an indentation level,\n in this case to 2 spaces. This works because the anti-quote\n is significant (not whitespace).\nstart on network-interfaces\n\nstart script\n\n rm -f /var/run/opengl-driver\n ln -sf 123 /var/run/opengl-driver\n\n rm -f /var/log/slim.log\n \nend script\n\nenv SLIM_CFGFILE=abc\nenv SLIM_THEMESDIR=def\nenv FONTCONFIG_FILE=/etc/fonts/fonts.conf \t\t\t\t# !!! cleanup\nenv XKB_BINDIR=foo/bin \t\t\t\t# Needed for the Xkb extension.\nenv LD_LIBRARY_PATH=libX11/lib:libXext/lib:/usr/lib/ # related to xorg-sys-opengl - needed to load libglx for (AI)GLX support (for compiz)\n\nenv XORG_DRI_DRIVER_PATH=nvidiaDrivers/X11R6/lib/modules/drivers/ \n\nexec slim/bin/slim\nEscaping of ' followed by ': ''\nEscaping of $ followed by {: \${\nAnd finally to interpret \\n etc. as in a string: \n, \r, \t.\nfoo\n'bla'\nbar\ncut -d $'\\t' -f 1\nending dollar $$\nThis text uses\n\ttabs\nfor indentation\nAnd this text uses\n spaces\nbut is indented with tabs\n \tThis text uses\n\t both spaces and tabs\n\t\tso nothing is stripped\n\t" diff --git a/tests/functional/lang/eval-okay-ind-string.nix b/tests/functional/lang/eval-okay-ind-string.nix index 95d59b508..b7c4df1d1 100644 --- a/tests/functional/lang/eval-okay-ind-string.nix +++ b/tests/functional/lang/eval-okay-ind-string.nix @@ -125,4 +125,22 @@ let # Accept dollars at end of strings s17 = ''ending dollar $'' + ''$'' + "\n"; -in s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 + s11 + s12 + s13 + s14 + s15 + s16 + s17 + s18 = '' + This text uses + tabs + for indentation + ''; + + s19 = '' + And this text uses + spaces + but is indented with tabs + ''; + + s20 = '' + This text uses + both spaces and tabs + so nothing is stripped + ''; + +in s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 + s11 + s12 + s13 + s14 + s15 + s16 + s17 + s18 + s19 + s20