Skip to content

Commit

Permalink
Allow using lookahead operator with captures.
Browse files Browse the repository at this point in the history
This is a fix for #518.
  • Loading branch information
skvadrik committed Dec 27, 2024
1 parent 2ecbbce commit 83b1356
Show file tree
Hide file tree
Showing 5 changed files with 302 additions and 8 deletions.
14 changes: 8 additions & 6 deletions src/regexp/ast_to_re.cc
Original file line number Diff line number Diff line change
Expand Up @@ -423,12 +423,14 @@ LOCAL_NODISCARD(Ret ast_to_re(RESpec& spec,
break;

case AstKind::TAG:
if (ast->tag.name && !opts->tags) {
RET_FAIL(spec.msg.error(
ast->loc, "tags are only allowed with '-T, --tags' option"));
} else if (opts->captures) {
RET_FAIL(spec.msg.error(
ast->loc, "cannot mix capturing groups and standalone tags"));
if (ast->tag.name) { // skip lookahead operator / (unnamed tag), it's a special case
if (!opts->tags) {
RET_FAIL(spec.msg.error(
ast->loc, "tags are only allowed with '-T, --tags' option"));
} else if (opts->captures) {
RET_FAIL(spec.msg.error(
ast->loc, "cannot mix capturing groups and standalone tags"));
}
}
re = re_tag(spec, tags.size(), false);
tags.emplace_back(ast->tag.name, ast->tag.history, x.height);
Expand Down
246 changes: 246 additions & 0 deletions test/tags/captvars_with_lookahead.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
/* Generated by re2c */
#line 1 "tags/captvars_with_lookahead.re"
// re2c $INPUT -o $OUTPUT
#include <assert.h>
#include <stddef.h>

typedef struct { int major, minor, patch; } SemVer;

static int s2n(const char *s, const char *e) { // pre-parsed string to number
int n = 0;
for (; s < e; ++s) n = n * 10 + (*s - '0');
return n;
}

static int lex(const char *str, SemVer *ver) {
const char *YYCURSOR = str, *YYMARKER;

// Final tag variables available in semantic action.

#line 21 "tags/captvars_with_lookahead.c"
const char *yytl0;
const char *yytl1;
const char *yytl2;
const char *yytl3;
const char *yytr0;
const char *yytr1;
const char *yytr2;
const char *yytr3;
#line 17 "tags/captvars_with_lookahead.re"


// Intermediate tag variables used by the lexer (must be autogenerated).

#line 35 "tags/captvars_with_lookahead.c"
const char *yyt1;
const char *yyt2;
const char *yyt3;
const char *yyt4;
const char *yyt5;
const char *yyt6;
#line 20 "tags/captvars_with_lookahead.re"



#line 46 "tags/captvars_with_lookahead.c"
{
char yych;
unsigned int yyaccept = 0;
yych = *YYCURSOR;
switch (yych) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
yyt1 = YYCURSOR;
goto yy3;
default: goto yy1;
}
yy1:
++YYCURSOR;
yy2:
#line 36 "tags/captvars_with_lookahead.re"
{ return 1; }
#line 71 "tags/captvars_with_lookahead.c"
yy3:
yyaccept = 0;
yych = *(YYMARKER = ++YYCURSOR);
switch (yych) {
case '.': goto yy4;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': goto yy6;
default: goto yy2;
}
yy4:
yych = *++YYCURSOR;
switch (yych) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
yyt2 = YYCURSOR;
goto yy7;
default: goto yy5;
}
yy5:
YYCURSOR = YYMARKER;
if (yyaccept == 0) {
goto yy2;
} else {
yyt3 = YYCURSOR;
yyt4 = NULL;
yyt5 = YYCURSOR;
yyt6 = NULL;
goto yy8;
}
yy6:
yych = *++YYCURSOR;
switch (yych) {
case '.': goto yy4;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': goto yy6;
default: goto yy5;
}
yy7:
yyaccept = 1;
yych = *(YYMARKER = ++YYCURSOR);
switch (yych) {
case '.':
yyt3 = YYCURSOR;
yyt6 = YYCURSOR;
goto yy9;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': goto yy7;
case 'L':
case 'U':
case 'l':
case 'u':
yyt3 = YYCURSOR;
yyt4 = NULL;
yyt5 = YYCURSOR;
yyt6 = NULL;
goto yy10;
default:
yyt3 = YYCURSOR;
yyt4 = NULL;
yyt5 = YYCURSOR;
yyt6 = NULL;
goto yy8;
}
yy8:
yytl1 = yyt1;
yytl2 = yyt2;
yytr2 = yyt3;
yytl3 = yyt6;
yytr3 = yyt4;
yytl0 = yyt1;
yytr0 = yyt5;
yytr1 = yyt2;
yytr1 -= 1;
YYCURSOR = yyt5;
#line 29 "tags/captvars_with_lookahead.re"
{
(void) yytl0; (void) yytr0; // some variables are unused
ver->major = s2n(yytl1, yytr1);
ver->minor = s2n(yytl2, yytr2);
ver->patch = yytl3 ? s2n(yytl3 + 1, yytr3) : 0;
return 0;
}
#line 186 "tags/captvars_with_lookahead.c"
yy9:
yych = *++YYCURSOR;
switch (yych) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': goto yy11;
default: goto yy5;
}
yy10:
yych = *++YYCURSOR;
switch (yych) {
case 'L':
case 'U':
case 'l':
case 'u': goto yy10;
default: goto yy8;
}
yy11:
yych = *++YYCURSOR;
switch (yych) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': goto yy11;
case 'L':
case 'U':
case 'l':
case 'u':
yyt4 = YYCURSOR;
yyt5 = YYCURSOR;
goto yy10;
default:
yyt4 = YYCURSOR;
yyt5 = YYCURSOR;
goto yy8;
}
}
#line 37 "tags/captvars_with_lookahead.re"

}

int main() {
SemVer v;
assert(lex("23.34ull", &v) == 0 && v.major == 23 && v.minor == 34 && v.patch == 0);
assert(lex("1.2.999", &v) == 0 && v.major == 1 && v.minor == 2 && v.patch == 999);
assert(lex("1.a", &v) == 1);
return 0;
}
46 changes: 46 additions & 0 deletions test/tags/captvars_with_lookahead.re
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// re2c $INPUT -o $OUTPUT
#include <assert.h>
#include <stddef.h>

typedef struct { int major, minor, patch; } SemVer;

static int s2n(const char *s, const char *e) { // pre-parsed string to number
int n = 0;
for (; s < e; ++s) n = n * 10 + (*s - '0');
return n;
}

static int lex(const char *str, SemVer *ver) {
const char *YYCURSOR = str, *YYMARKER;

// Final tag variables available in semantic action.
/*!svars:re2c format = 'const char *@@;\n'; */

// Intermediate tag variables used by the lexer (must be autogenerated).
/*!stags:re2c format = 'const char *@@;\n'; */

/*!re2c
re2c:yyfill:enable = 0;
re2c:define:YYCTYPE = char;
re2c:captvars = 1;
num = [0-9]+;
(num) "." (num) ("." num)? / [uUlL]* {
(void) yytl0; (void) yytr0; // some variables are unused
ver->major = s2n(yytl1, yytr1);
ver->minor = s2n(yytl2, yytr2);
ver->patch = yytl3 ? s2n(yytl3 + 1, yytr3) : 0;
return 0;
}
* { return 1; }
*/
}

int main() {
SemVer v;
assert(lex("23.34ull", &v) == 0 && v.major == 23 && v.minor == 34 && v.patch == 0);
assert(lex("1.2.999", &v) == 0 && v.major == 1 && v.minor == 2 && v.patch == 999);
assert(lex("1.a", &v) == 1);
return 0;
}
2 changes: 1 addition & 1 deletion test/tags/error_2.c
Original file line number Diff line number Diff line change
@@ -1 +1 @@
tags/error_2.re:3:18: error: cannot mix capturing groups and standalone tags
tags/error_2.re:4:11: error: cannot mix capturing groups and standalone tags
2 changes: 1 addition & 1 deletion test/tags/error_2.re
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// re2c $INPUT -o $OUTPUT --captures
/*!re2c
("c"*) / "a"* {} // error, lookahead tag mixed with captures
("c"*) / "a"* {} // ok, lookahead tag is a special case
("c"*) @t "b"* {} // error, standalone tag mixed with captures
*/

0 comments on commit 83b1356

Please sign in to comment.