Skip to content

Commit

Permalink
Merge pull request #1534 from pyrmont/bugfix.windows-longstrings
Browse files Browse the repository at this point in the history
Support dedenting long-strings with Windows EOLs
  • Loading branch information
bakpakin authored Dec 20, 2024
2 parents 7f745a3 + 67e8518 commit b2d2690
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 22 deletions.
35 changes: 18 additions & 17 deletions src/core/parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,7 @@ static int stringend(JanetParser *p, JanetParseState *state) {
JanetParseState top = p->states[p->statecount - 1];
int32_t indent_col = (int32_t) top.column - 1;
uint8_t *r = bufstart, *end = r + buflen;
/* Check if there are any characters before the start column -
* if so, do not reindent. */
/* Unless there are only spaces before EOLs, disable reindenting */
int reindent = 1;
while (reindent && (r < end)) {
if (*r++ == '\n') {
Expand All @@ -374,34 +373,36 @@ static int stringend(JanetParser *p, JanetParseState *state) {
break;
}
}
if ((r + 1) < end && *r == '\r' && *(r + 1) == '\n') reindent = 1;
}
}
/* Now reindent if able to, otherwise just drop leading newline. */
if (!reindent) {
if (buflen > 0 && bufstart[0] == '\n') {
buflen--;
bufstart++;
}
} else {
/* Now reindent if able */
if (reindent) {
uint8_t *w = bufstart;
r = bufstart;
while (r < end) {
if (*r == '\n') {
if (r == bufstart) {
/* Skip leading newline */
r++;
} else {
*w++ = *r++;
}
*w++ = *r++;
for (int32_t j = 0; (r < end) && (*r != '\n') && (j < indent_col); j++, r++);
if ((r + 1) < end && *r == '\r' && *(r + 1) == '\n') *w++ = *r++;
} else {
*w++ = *r++;
}
}
buflen = (int32_t)(w - bufstart);
}
/* Check for trailing newline character so we can remove it */
if (buflen > 0 && bufstart[buflen - 1] == '\n') {
/* Check for leading EOL so we can remove it */
if (buflen > 1 && bufstart[0] == '\r' && bufstart[1] == '\n') { /* Windows EOL */
buflen = buflen - 2;
bufstart = bufstart + 2;
} else if (buflen > 0 && bufstart[0] == '\n') { /* Unix EOL */
buflen--;
bufstart++;
}
/* Check for trailing EOL so we can remove it */
if (buflen > 1 && bufstart[buflen - 2] == '\r' && bufstart[buflen - 1] == '\n') { /* Windows EOL */
buflen = buflen - 2;
} else if (buflen > 0 && bufstart[buflen - 1] == '\n') { /* Unix EOL */
buflen--;
}
}
Expand Down
30 changes: 25 additions & 5 deletions test/suite-parse.janet
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,26 @@
(for i (+ index 1) (+ index indent 1)
(case (get text i)
nil (break)
(chr "\r") (if-not (= (chr "\n") (get text (inc i)))
(set rewrite false))
(chr "\n") (break)
(chr " ") nil
(set rewrite false))))

# Only re-indent if no dedented characters.
(def str
(if rewrite
(peg/replace-all ~(* "\n" (between 0 ,indent " ")) "\n" text)
(peg/replace-all ~(* '(* (? "\r") "\n") (between 0 ,indent " "))
(fn [mtch eol] eol) text)
text))

(def first-nl (= (chr "\n") (first str)))
(def last-nl (= (chr "\n") (last str)))
(string/slice str (if first-nl 1 0) (if last-nl -2)))
(def first-eol (cond
(string/has-prefix? "\r\n" str) :crlf
(string/has-prefix? "\n" str) :lf))
(def last-eol (cond
(string/has-suffix? "\r\n" str) :crlf
(string/has-suffix? "\n" str) :lf))
(string/slice str (case first-eol :crlf 2 :lf 1 0) (case last-eol :crlf -3 :lf -2)))

(defn reindent-reference
"Same as reindent but use parser functionality. Useful for
Expand All @@ -89,8 +96,10 @@
(let [a (reindent text indent)
b (reindent-reference text indent)]
(assert (= a b)
(string "indent " indent-counter " (indent=" indent ")"))))
(string/format "reindent: %q, parse: %q (indent-test #%d with indent of %d)" a b indent-counter indent)
)))

# Unix EOLs
(check-indent "" 0)
(check-indent "\n" 0)
(check-indent "\n" 1)
Expand All @@ -106,6 +115,17 @@
(check-indent "\n Hello, world!\n " 4)
(check-indent "\n Hello, world!\n dedented text\n " 4)
(check-indent "\n Hello, world!\n indented text\n " 4)
# Windows EOLs
(check-indent "\r\n" 0)
(check-indent "\r\n" 1)
(check-indent "\r\n\r\n" 0)
(check-indent "\r\n\r\n" 1)
(check-indent "\r\nHello, world!" 0)
(check-indent "\r\nHello, world!" 1)
(check-indent "\r\n Hello, world!\r\n " 4)
(check-indent "\r\n Hello, world!\r\n " 4)
(check-indent "\r\n Hello, world!\r\n dedented text\r\n " 4)
(check-indent "\r\n Hello, world!\r\n indented text\r\n " 4)

# Symbols with @ character
# d68eae9
Expand Down

0 comments on commit b2d2690

Please sign in to comment.