Skip to content

Commit

Permalink
Support for parsing lexer commands (skip, channel, and so on)
Browse files Browse the repository at this point in the history
  • Loading branch information
KvanTTT committed Jul 27, 2024
1 parent fad3282 commit a9f716a
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 3 deletions.
6 changes: 6 additions & 0 deletions src/main/kotlin/AntlrTreeVisitor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ abstract class AntlrTreeVisitor<out R, in D> {

open fun visitRuleNode(node: RuleNode, data: D) = visitTreeNode(node, data)

open fun visitCommandsNode(node: CommandsNode, data: D) = visitTreeNode(node, data)

open fun visitCommaCommandNode(node: CommandsNode.CommaCommandNode, data: D) = visitTreeNode(node, data)

open fun visitCommandNode(node: CommandNode, data: D) = visitTreeNode(node, data)

open fun visitModeNode(node: ModeNode, data: D) = visitTreeNode(node, data)

open fun visitModeDeclaration(node: ModeNode.ModeDeclaration, data: D) = visitTreeNode(node, data)
Expand Down
5 changes: 4 additions & 1 deletion src/main/kotlin/parser/AntlrLexer.kt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ class AntlrLexer(
'\u0300'..'\u036F',
'\u203F'..'\u2040',
)
private val hexDigits = charSetOf('0'..'9', 'a'..'f', 'A'..'F')
private val digits = charSetOf('0'..'9')
private val hexDigits = digits + charSetOf('a'..'f', 'A'..'F')

private val fragmentKeyword = Keyword(AntlrTokenType.Fragment, "fragment")
private val grammarKeyword = Keyword(AntlrTokenType.Grammar, "grammar")
Expand Down Expand Up @@ -166,6 +167,8 @@ class AntlrLexer(
idContinueChars
)

in digits -> tokenizeSequence(AntlrTokenType.Digit, digits)

else -> tokenizeSingleChar(AntlrTokenType.Error, AntlrTokenChannel.Error)
}
}
Expand Down
53 changes: 51 additions & 2 deletions src/main/kotlin/parser/AntlrParser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class AntlrParser(
}

// rule
// : 'fragment'? id ':' block ';'
// : 'fragment'? id ':' block commands? ';'
// ;
fun parseRule(): RuleNode {
val fragmentToken = if (getToken().type == AntlrTokenType.Fragment) matchToken() else null
Expand All @@ -108,17 +108,66 @@ class AntlrParser(

val blockNode = parseBlock(colonToken.end())

val commandsNode = if (getToken().type == AntlrTokenType.RightArrow) {
parseCommands()
} else {
null
}

val semicolonToken = matchToken(AntlrTokenType.Semicolon)

return RuleNode(
fragmentToken,
lexerIdOrParserIdToken,
colonToken,
blockNode,
semicolonToken
commandsNode,
semicolonToken,
)
}

// commands
// : '->' command (',' command)*
// ;
private fun parseCommands(): CommandsNode {
val leftArrowToken = matchToken(AntlrTokenType.RightArrow)

val commandNode = parseCommand()

val commaCommandChildren = buildList {
var nextToken = getToken()
while (nextToken.type == AntlrTokenType.Comma) {
val commaToken = matchToken()
add(CommandsNode.CommaCommandNode(commaToken, parseCommand()))
nextToken = getToken()
}
}

return CommandsNode(leftArrowToken, commandNode, commaCommandChildren)
}

// command
// : id ('(' (id | digit) ')')?
// ;
private fun parseCommand(): CommandNode {
val nameToken = parseId()

val paramsNode = if (getToken().type == AntlrTokenType.LeftParen) {
val leftParenToken = matchToken()
val paramToken = when (getToken().type) {
AntlrTokenType.LexerId, AntlrTokenType.ParserId -> parseId()
AntlrTokenType.Digit -> matchToken()
else -> emitMissingToken(tokenType = null)
}
val rightParenToken = matchToken(AntlrTokenType.RightParen)
CommandNode.Params(leftParenToken, paramToken, rightParenToken)
} else {
null
}

return CommandNode(nameToken, paramsNode)
}

// block
// : alternative ('|' alternative)*
// ;
Expand Down
1 change: 1 addition & 0 deletions src/main/kotlin/parser/AntlrToken.kt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ enum class AntlrTokenType {
Char,
EscapedChar,
UnicodeEscapedChar,
Digit,
Empty, // Special token type needed for preserving location of empty alternatives
}

Expand Down
60 changes: 60 additions & 0 deletions src/main/kotlin/parser/AntlrTreeNodes.kt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class RuleNode(
val idToken: AntlrToken,
val colonToken: AntlrToken,
val blockNode: BlockNode,
val commandsNode: CommandsNode?,
val semicolonToken: AntlrToken,
) : AntlrTreeNode() {
override fun calculateLeftToken(): AntlrToken = idToken
Expand All @@ -58,11 +59,70 @@ class RuleNode(
visitor.visitToken(idToken, data)
visitor.visitToken(colonToken, data)
visitor.visitBlockNode(blockNode, data)
commandsNode?.let { visitor.visitCommandsNode(it, data) }
visitor.visitToken(semicolonToken, data)
return null
}
}

class CommandsNode(
val arrowToken: AntlrToken,
val commandNode: CommandNode,
val commaCommandNodes: List<CommaCommandNode>,
) : AntlrTreeNode() {
class CommaCommandNode(val comma: AntlrToken, val command: CommandNode) : AntlrTreeNode() {
override fun calculateLeftToken(): AntlrToken = comma

override fun calculateRightToken(): AntlrToken = command.rightToken

override fun <R, D> acceptChildren(visitor: AntlrTreeVisitor<R, D>, data: D): R? {
visitor.visitToken(comma, data)
visitor.visitCommandNode(command, data)
return null
}
}

override fun calculateLeftToken(): AntlrToken = arrowToken

override fun calculateRightToken(): AntlrToken = commaCommandNodes.lastOrNull()?.rightToken ?: commandNode.rightToken

override fun <R, D> acceptChildren(visitor: AntlrTreeVisitor<R, D>, data: D): R? {
visitor.visitToken(arrowToken, data)
visitor.visitTreeNode(commandNode, data)
commaCommandNodes.forEach { visitor.visitCommaCommandNode(it, data) }
return null
}
}

class CommandNode(
val nameToken: AntlrToken,
val paramsNode: Params?,
) : AntlrTreeNode() {
class Params(val leftParenToken: AntlrToken, val paramToken: AntlrToken, val rightParenToken: AntlrToken) : AntlrTreeNode() {
override fun calculateLeftToken(): AntlrToken = leftParenToken

override fun calculateRightToken(): AntlrToken = rightParenToken

override fun <R, D> acceptChildren(visitor: AntlrTreeVisitor<R, D>, data: D): R? {
visitor.visitToken(leftParenToken, data)
visitor.visitToken(paramToken, data)
visitor.visitToken(rightParenToken, data)
return null
}
}

override fun calculateLeftToken(): AntlrToken = nameToken

override fun calculateRightToken(): AntlrToken = paramsNode?.rightParenToken ?: nameToken

override fun <R, D> acceptChildren(visitor: AntlrTreeVisitor<R, D>, data: D): R? {
visitor.visitToken(nameToken, data)
paramsNode?.let { visitor.visitTreeNode(it, data) }
return null
}
}


class ModeNode(val modeDeclaration: ModeDeclaration?, val ruleNodes: List<RuleNode>) : AntlrTreeNode() {
class ModeDeclaration(val modeToken: AntlrToken, val idToken: AntlrToken, val semicolonToken: AntlrToken) : AntlrTreeNode() {
override fun calculateLeftToken(): AntlrToken = modeToken
Expand Down
47 changes: 47 additions & 0 deletions src/test/kotlin/parser/AntlrParserWithLexerTests.kt
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,51 @@ object AntlrParserWithLexerTests {
"~A"
) { it.parseElement() }
}

@Test
fun commands() {
infrastructure.check(
RuleNode(
fragmentToken = null,
AntlrToken(AntlrTokenType.LexerId, value = "A"),
AntlrToken(AntlrTokenType.Colon),
BlockNode(
alternativeNode = AlternativeNode(
listOf(ElementNode.StringLiteralOrRange(
tilde = null,
ElementNode.StringLiteralOrRange.StringLiteral(
AntlrToken(AntlrTokenType.Quote),
listOf(AntlrToken(AntlrTokenType.Char, value = "A")),
AntlrToken(AntlrTokenType.Quote)
),
range = null,
elementSuffix = null
))
),
orAlternativeNodes = emptyList(),
),
CommandsNode(
AntlrToken(AntlrTokenType.RightArrow),
commandNode = CommandNode(
AntlrToken(AntlrTokenType.ParserId, value = "skip"),
paramsNode = null
),
commaCommandNodes = listOf(CommandsNode.CommaCommandNode(
AntlrToken(AntlrTokenType.Comma),
command = CommandNode(
AntlrToken(AntlrTokenType.ParserId, value = "pushMode"),
paramsNode = CommandNode.Params(
AntlrToken(AntlrTokenType.LeftParen),
AntlrToken(AntlrTokenType.LexerId, value = "DEFAULT_MODE"),
AntlrToken(AntlrTokenType.RightParen)
)
))
),
),
AntlrToken(AntlrTokenType.Semicolon),
),
"A: 'A' -> skip, pushMode(DEFAULT_MODE);") {
it.parseRule()
}
}
}
2 changes: 2 additions & 0 deletions src/test/kotlin/parser/ExampleData.kt
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ x
),
),

commandsNode = null,

AntlrToken(AntlrTokenType.Semicolon),
),
)
Expand Down

0 comments on commit a9f716a

Please sign in to comment.