From a9f716a1dfb51eac8fdf6543edbfb0a82540b3a7 Mon Sep 17 00:00:00 2001 From: Ivan Kochurkin Date: Sat, 27 Jul 2024 15:16:41 +0200 Subject: [PATCH] Support for parsing lexer commands (skip, channel, and so on) #6 --- src/main/kotlin/AntlrTreeVisitor.kt | 6 ++ src/main/kotlin/parser/AntlrLexer.kt | 5 +- src/main/kotlin/parser/AntlrParser.kt | 53 +++++++++++++++- src/main/kotlin/parser/AntlrToken.kt | 1 + src/main/kotlin/parser/AntlrTreeNodes.kt | 60 +++++++++++++++++++ .../parser/AntlrParserWithLexerTests.kt | 47 +++++++++++++++ src/test/kotlin/parser/ExampleData.kt | 2 + 7 files changed, 171 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/AntlrTreeVisitor.kt b/src/main/kotlin/AntlrTreeVisitor.kt index a2516d1..433fea6 100644 --- a/src/main/kotlin/AntlrTreeVisitor.kt +++ b/src/main/kotlin/AntlrTreeVisitor.kt @@ -9,6 +9,12 @@ abstract class AntlrTreeVisitor { open fun visitRuleNode(node: RuleNode, data: D) = visitTreeNode(node, data) + open fun visitCommandsNode(node: CommandsNode, data: D) = visitTreeNode(node, data) + + open fun visitCommaCommandNode(node: CommandsNode.CommaCommandNode, data: D) = visitTreeNode(node, data) + + open fun visitCommandNode(node: CommandNode, data: D) = visitTreeNode(node, data) + open fun visitModeNode(node: ModeNode, data: D) = visitTreeNode(node, data) open fun visitModeDeclaration(node: ModeNode.ModeDeclaration, data: D) = visitTreeNode(node, data) diff --git a/src/main/kotlin/parser/AntlrLexer.kt b/src/main/kotlin/parser/AntlrLexer.kt index 1de5e56..e304e7c 100644 --- a/src/main/kotlin/parser/AntlrLexer.kt +++ b/src/main/kotlin/parser/AntlrLexer.kt @@ -36,7 +36,8 @@ class AntlrLexer( '\u0300'..'\u036F', '\u203F'..'\u2040', ) - private val hexDigits = charSetOf('0'..'9', 'a'..'f', 'A'..'F') + private val digits = charSetOf('0'..'9') + private val hexDigits = digits + charSetOf('a'..'f', 'A'..'F') private val fragmentKeyword = Keyword(AntlrTokenType.Fragment, "fragment") private val grammarKeyword = Keyword(AntlrTokenType.Grammar, "grammar") @@ -166,6 +167,8 @@ class AntlrLexer( idContinueChars ) + in digits -> tokenizeSequence(AntlrTokenType.Digit, digits) + else -> tokenizeSingleChar(AntlrTokenType.Error, AntlrTokenChannel.Error) } } diff --git a/src/main/kotlin/parser/AntlrParser.kt b/src/main/kotlin/parser/AntlrParser.kt index 16b5f7b..cf9583a 100644 --- a/src/main/kotlin/parser/AntlrParser.kt +++ b/src/main/kotlin/parser/AntlrParser.kt @@ -97,7 +97,7 @@ class AntlrParser( } // rule - // : 'fragment'? id ':' block ';' + // : 'fragment'? id ':' block commands? ';' // ; fun parseRule(): RuleNode { val fragmentToken = if (getToken().type == AntlrTokenType.Fragment) matchToken() else null @@ -108,6 +108,12 @@ class AntlrParser( val blockNode = parseBlock(colonToken.end()) + val commandsNode = if (getToken().type == AntlrTokenType.RightArrow) { + parseCommands() + } else { + null + } + val semicolonToken = matchToken(AntlrTokenType.Semicolon) return RuleNode( @@ -115,10 +121,53 @@ class AntlrParser( lexerIdOrParserIdToken, colonToken, blockNode, - semicolonToken + commandsNode, + semicolonToken, ) } + // commands + // : '->' command (',' command)* + // ; + private fun parseCommands(): CommandsNode { + val leftArrowToken = matchToken(AntlrTokenType.RightArrow) + + val commandNode = parseCommand() + + val commaCommandChildren = buildList { + var nextToken = getToken() + while (nextToken.type == AntlrTokenType.Comma) { + val commaToken = matchToken() + add(CommandsNode.CommaCommandNode(commaToken, parseCommand())) + nextToken = getToken() + } + } + + return CommandsNode(leftArrowToken, commandNode, commaCommandChildren) + } + + // command + // : id ('(' (id | digit) ')')? + // ; + private fun parseCommand(): CommandNode { + val nameToken = parseId() + + val paramsNode = if (getToken().type == AntlrTokenType.LeftParen) { + val leftParenToken = matchToken() + val paramToken = when (getToken().type) { + AntlrTokenType.LexerId, AntlrTokenType.ParserId -> parseId() + AntlrTokenType.Digit -> matchToken() + else -> emitMissingToken(tokenType = null) + } + val rightParenToken = matchToken(AntlrTokenType.RightParen) + CommandNode.Params(leftParenToken, paramToken, rightParenToken) + } else { + null + } + + return CommandNode(nameToken, paramsNode) + } + // block // : alternative ('|' alternative)* // ; diff --git a/src/main/kotlin/parser/AntlrToken.kt b/src/main/kotlin/parser/AntlrToken.kt index 047b33c..d66f6ad 100644 --- a/src/main/kotlin/parser/AntlrToken.kt +++ b/src/main/kotlin/parser/AntlrToken.kt @@ -68,6 +68,7 @@ enum class AntlrTokenType { Char, EscapedChar, UnicodeEscapedChar, + Digit, Empty, // Special token type needed for preserving location of empty alternatives } diff --git a/src/main/kotlin/parser/AntlrTreeNodes.kt b/src/main/kotlin/parser/AntlrTreeNodes.kt index 029f760..ba5f654 100644 --- a/src/main/kotlin/parser/AntlrTreeNodes.kt +++ b/src/main/kotlin/parser/AntlrTreeNodes.kt @@ -47,6 +47,7 @@ class RuleNode( val idToken: AntlrToken, val colonToken: AntlrToken, val blockNode: BlockNode, + val commandsNode: CommandsNode?, val semicolonToken: AntlrToken, ) : AntlrTreeNode() { override fun calculateLeftToken(): AntlrToken = idToken @@ -58,11 +59,70 @@ class RuleNode( visitor.visitToken(idToken, data) visitor.visitToken(colonToken, data) visitor.visitBlockNode(blockNode, data) + commandsNode?.let { visitor.visitCommandsNode(it, data) } visitor.visitToken(semicolonToken, data) return null } } +class CommandsNode( + val arrowToken: AntlrToken, + val commandNode: CommandNode, + val commaCommandNodes: List, +) : AntlrTreeNode() { + class CommaCommandNode(val comma: AntlrToken, val command: CommandNode) : AntlrTreeNode() { + override fun calculateLeftToken(): AntlrToken = comma + + override fun calculateRightToken(): AntlrToken = command.rightToken + + override fun acceptChildren(visitor: AntlrTreeVisitor, data: D): R? { + visitor.visitToken(comma, data) + visitor.visitCommandNode(command, data) + return null + } + } + + override fun calculateLeftToken(): AntlrToken = arrowToken + + override fun calculateRightToken(): AntlrToken = commaCommandNodes.lastOrNull()?.rightToken ?: commandNode.rightToken + + override fun acceptChildren(visitor: AntlrTreeVisitor, data: D): R? { + visitor.visitToken(arrowToken, data) + visitor.visitTreeNode(commandNode, data) + commaCommandNodes.forEach { visitor.visitCommaCommandNode(it, data) } + return null + } +} + +class CommandNode( + val nameToken: AntlrToken, + val paramsNode: Params?, +) : AntlrTreeNode() { + class Params(val leftParenToken: AntlrToken, val paramToken: AntlrToken, val rightParenToken: AntlrToken) : AntlrTreeNode() { + override fun calculateLeftToken(): AntlrToken = leftParenToken + + override fun calculateRightToken(): AntlrToken = rightParenToken + + override fun acceptChildren(visitor: AntlrTreeVisitor, data: D): R? { + visitor.visitToken(leftParenToken, data) + visitor.visitToken(paramToken, data) + visitor.visitToken(rightParenToken, data) + return null + } + } + + override fun calculateLeftToken(): AntlrToken = nameToken + + override fun calculateRightToken(): AntlrToken = paramsNode?.rightParenToken ?: nameToken + + override fun acceptChildren(visitor: AntlrTreeVisitor, data: D): R? { + visitor.visitToken(nameToken, data) + paramsNode?.let { visitor.visitTreeNode(it, data) } + return null + } +} + + class ModeNode(val modeDeclaration: ModeDeclaration?, val ruleNodes: List) : AntlrTreeNode() { class ModeDeclaration(val modeToken: AntlrToken, val idToken: AntlrToken, val semicolonToken: AntlrToken) : AntlrTreeNode() { override fun calculateLeftToken(): AntlrToken = modeToken diff --git a/src/test/kotlin/parser/AntlrParserWithLexerTests.kt b/src/test/kotlin/parser/AntlrParserWithLexerTests.kt index 5721097..2fbef4a 100644 --- a/src/test/kotlin/parser/AntlrParserWithLexerTests.kt +++ b/src/test/kotlin/parser/AntlrParserWithLexerTests.kt @@ -220,4 +220,51 @@ object AntlrParserWithLexerTests { "~A" ) { it.parseElement() } } + + @Test + fun commands() { + infrastructure.check( + RuleNode( + fragmentToken = null, + AntlrToken(AntlrTokenType.LexerId, value = "A"), + AntlrToken(AntlrTokenType.Colon), + BlockNode( + alternativeNode = AlternativeNode( + listOf(ElementNode.StringLiteralOrRange( + tilde = null, + ElementNode.StringLiteralOrRange.StringLiteral( + AntlrToken(AntlrTokenType.Quote), + listOf(AntlrToken(AntlrTokenType.Char, value = "A")), + AntlrToken(AntlrTokenType.Quote) + ), + range = null, + elementSuffix = null + )) + ), + orAlternativeNodes = emptyList(), + ), + CommandsNode( + AntlrToken(AntlrTokenType.RightArrow), + commandNode = CommandNode( + AntlrToken(AntlrTokenType.ParserId, value = "skip"), + paramsNode = null + ), + commaCommandNodes = listOf(CommandsNode.CommaCommandNode( + AntlrToken(AntlrTokenType.Comma), + command = CommandNode( + AntlrToken(AntlrTokenType.ParserId, value = "pushMode"), + paramsNode = CommandNode.Params( + AntlrToken(AntlrTokenType.LeftParen), + AntlrToken(AntlrTokenType.LexerId, value = "DEFAULT_MODE"), + AntlrToken(AntlrTokenType.RightParen) + ) + )) + ), + ), + AntlrToken(AntlrTokenType.Semicolon), + ), + "A: 'A' -> skip, pushMode(DEFAULT_MODE);") { + it.parseRule() + } + } } \ No newline at end of file diff --git a/src/test/kotlin/parser/ExampleData.kt b/src/test/kotlin/parser/ExampleData.kt index b43b0c2..a1d2130 100644 --- a/src/test/kotlin/parser/ExampleData.kt +++ b/src/test/kotlin/parser/ExampleData.kt @@ -112,6 +112,8 @@ x ), ), + commandsNode = null, + AntlrToken(AntlrTokenType.Semicolon), ), )