From 83b135699fc9391c940648804fed12384fd14f3d Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Fri, 27 Dec 2024 10:04:48 +0000 Subject: [PATCH] Allow using lookahead operator with captures. This is a fix for #518. --- src/regexp/ast_to_re.cc | 14 +- test/tags/captvars_with_lookahead.c | 246 +++++++++++++++++++++++++++ test/tags/captvars_with_lookahead.re | 46 +++++ test/tags/error_2.c | 2 +- test/tags/error_2.re | 2 +- 5 files changed, 302 insertions(+), 8 deletions(-) create mode 100644 test/tags/captvars_with_lookahead.c create mode 100644 test/tags/captvars_with_lookahead.re diff --git a/src/regexp/ast_to_re.cc b/src/regexp/ast_to_re.cc index 479e5daad..2463cac2b 100644 --- a/src/regexp/ast_to_re.cc +++ b/src/regexp/ast_to_re.cc @@ -423,12 +423,14 @@ LOCAL_NODISCARD(Ret ast_to_re(RESpec& spec, break; case AstKind::TAG: - if (ast->tag.name && !opts->tags) { - RET_FAIL(spec.msg.error( - ast->loc, "tags are only allowed with '-T, --tags' option")); - } else if (opts->captures) { - RET_FAIL(spec.msg.error( - ast->loc, "cannot mix capturing groups and standalone tags")); + if (ast->tag.name) { // skip lookahead operator / (unnamed tag), it's a special case + if (!opts->tags) { + RET_FAIL(spec.msg.error( + ast->loc, "tags are only allowed with '-T, --tags' option")); + } else if (opts->captures) { + RET_FAIL(spec.msg.error( + ast->loc, "cannot mix capturing groups and standalone tags")); + } } re = re_tag(spec, tags.size(), false); tags.emplace_back(ast->tag.name, ast->tag.history, x.height); diff --git a/test/tags/captvars_with_lookahead.c b/test/tags/captvars_with_lookahead.c new file mode 100644 index 000000000..34a2eca43 --- /dev/null +++ b/test/tags/captvars_with_lookahead.c @@ -0,0 +1,246 @@ +/* Generated by re2c */ +#line 1 "tags/captvars_with_lookahead.re" +// re2c $INPUT -o $OUTPUT +#include +#include + +typedef struct { int major, minor, patch; } SemVer; + +static int s2n(const char *s, const char *e) { // pre-parsed string to number + int n = 0; + for (; s < e; ++s) n = n * 10 + (*s - '0'); + return n; +} + +static int lex(const char *str, SemVer *ver) { + const char *YYCURSOR = str, *YYMARKER; + + // Final tag variables available in semantic action. + +#line 21 "tags/captvars_with_lookahead.c" +const char *yytl0; +const char *yytl1; +const char *yytl2; +const char *yytl3; +const char *yytr0; +const char *yytr1; +const char *yytr2; +const char *yytr3; +#line 17 "tags/captvars_with_lookahead.re" + + + // Intermediate tag variables used by the lexer (must be autogenerated). + +#line 35 "tags/captvars_with_lookahead.c" +const char *yyt1; +const char *yyt2; +const char *yyt3; +const char *yyt4; +const char *yyt5; +const char *yyt6; +#line 20 "tags/captvars_with_lookahead.re" + + + +#line 46 "tags/captvars_with_lookahead.c" +{ + char yych; + unsigned int yyaccept = 0; + yych = *YYCURSOR; + switch (yych) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + yyt1 = YYCURSOR; + goto yy3; + default: goto yy1; + } +yy1: + ++YYCURSOR; +yy2: +#line 36 "tags/captvars_with_lookahead.re" + { return 1; } +#line 71 "tags/captvars_with_lookahead.c" +yy3: + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case '.': goto yy4; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy6; + default: goto yy2; + } +yy4: + yych = *++YYCURSOR; + switch (yych) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + yyt2 = YYCURSOR; + goto yy7; + default: goto yy5; + } +yy5: + YYCURSOR = YYMARKER; + if (yyaccept == 0) { + goto yy2; + } else { + yyt3 = YYCURSOR; + yyt4 = NULL; + yyt5 = YYCURSOR; + yyt6 = NULL; + goto yy8; + } +yy6: + yych = *++YYCURSOR; + switch (yych) { + case '.': goto yy4; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy6; + default: goto yy5; + } +yy7: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case '.': + yyt3 = YYCURSOR; + yyt6 = YYCURSOR; + goto yy9; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy7; + case 'L': + case 'U': + case 'l': + case 'u': + yyt3 = YYCURSOR; + yyt4 = NULL; + yyt5 = YYCURSOR; + yyt6 = NULL; + goto yy10; + default: + yyt3 = YYCURSOR; + yyt4 = NULL; + yyt5 = YYCURSOR; + yyt6 = NULL; + goto yy8; + } +yy8: + yytl1 = yyt1; + yytl2 = yyt2; + yytr2 = yyt3; + yytl3 = yyt6; + yytr3 = yyt4; + yytl0 = yyt1; + yytr0 = yyt5; + yytr1 = yyt2; + yytr1 -= 1; + YYCURSOR = yyt5; +#line 29 "tags/captvars_with_lookahead.re" + { + (void) yytl0; (void) yytr0; // some variables are unused + ver->major = s2n(yytl1, yytr1); + ver->minor = s2n(yytl2, yytr2); + ver->patch = yytl3 ? s2n(yytl3 + 1, yytr3) : 0; + return 0; + } +#line 186 "tags/captvars_with_lookahead.c" +yy9: + yych = *++YYCURSOR; + switch (yych) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy11; + default: goto yy5; + } +yy10: + yych = *++YYCURSOR; + switch (yych) { + case 'L': + case 'U': + case 'l': + case 'u': goto yy10; + default: goto yy8; + } +yy11: + yych = *++YYCURSOR; + switch (yych) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy11; + case 'L': + case 'U': + case 'l': + case 'u': + yyt4 = YYCURSOR; + yyt5 = YYCURSOR; + goto yy10; + default: + yyt4 = YYCURSOR; + yyt5 = YYCURSOR; + goto yy8; + } +} +#line 37 "tags/captvars_with_lookahead.re" + +} + +int main() { + SemVer v; + assert(lex("23.34ull", &v) == 0 && v.major == 23 && v.minor == 34 && v.patch == 0); + assert(lex("1.2.999", &v) == 0 && v.major == 1 && v.minor == 2 && v.patch == 999); + assert(lex("1.a", &v) == 1); + return 0; +} \ No newline at end of file diff --git a/test/tags/captvars_with_lookahead.re b/test/tags/captvars_with_lookahead.re new file mode 100644 index 000000000..f4499209c --- /dev/null +++ b/test/tags/captvars_with_lookahead.re @@ -0,0 +1,46 @@ +// re2c $INPUT -o $OUTPUT +#include +#include + +typedef struct { int major, minor, patch; } SemVer; + +static int s2n(const char *s, const char *e) { // pre-parsed string to number + int n = 0; + for (; s < e; ++s) n = n * 10 + (*s - '0'); + return n; +} + +static int lex(const char *str, SemVer *ver) { + const char *YYCURSOR = str, *YYMARKER; + + // Final tag variables available in semantic action. + /*!svars:re2c format = 'const char *@@;\n'; */ + + // Intermediate tag variables used by the lexer (must be autogenerated). + /*!stags:re2c format = 'const char *@@;\n'; */ + + /*!re2c + re2c:yyfill:enable = 0; + re2c:define:YYCTYPE = char; + re2c:captvars = 1; + + num = [0-9]+; + + (num) "." (num) ("." num)? / [uUlL]* { + (void) yytl0; (void) yytr0; // some variables are unused + ver->major = s2n(yytl1, yytr1); + ver->minor = s2n(yytl2, yytr2); + ver->patch = yytl3 ? s2n(yytl3 + 1, yytr3) : 0; + return 0; + } + * { return 1; } + */ +} + +int main() { + SemVer v; + assert(lex("23.34ull", &v) == 0 && v.major == 23 && v.minor == 34 && v.patch == 0); + assert(lex("1.2.999", &v) == 0 && v.major == 1 && v.minor == 2 && v.patch == 999); + assert(lex("1.a", &v) == 1); + return 0; +} \ No newline at end of file diff --git a/test/tags/error_2.c b/test/tags/error_2.c index aac4a4476..270b268eb 100644 --- a/test/tags/error_2.c +++ b/test/tags/error_2.c @@ -1 +1 @@ -tags/error_2.re:3:18: error: cannot mix capturing groups and standalone tags +tags/error_2.re:4:11: error: cannot mix capturing groups and standalone tags diff --git a/test/tags/error_2.re b/test/tags/error_2.re index b69ebdafd..78212154a 100644 --- a/test/tags/error_2.re +++ b/test/tags/error_2.re @@ -1,5 +1,5 @@ // re2c $INPUT -o $OUTPUT --captures /*!re2c - ("c"*) / "a"* {} // error, lookahead tag mixed with captures + ("c"*) / "a"* {} // ok, lookahead tag is a special case ("c"*) @t "b"* {} // error, standalone tag mixed with captures */