forked from clark800/lambda-zero
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scan.c
128 lines (107 loc) · 3.54 KB
/
scan.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "scan.h"
const char* INTERNAL_CODE = NULL;
const char* SOURCE_CODE = NULL;
bool isSpaceCharacter(char c) {
return c == ' ' || c == '\t' || c == '\r';
}
bool isQuoteCharacter(char c) {
return c == '"' || c == '\'';
}
bool isDelimiterCharacter(char c) {
return c == '\0' || strchr(" \n,;`()[]{}", c) != NULL;
}
bool isOperandCharacter(char c) {
// check c > 0 to ensure it is ASCII
return c > 0 && (isalnum(c) || c == '\'' || c == '_');
}
bool isOperatorCharacter(char c) {
// check c > 0 to ensure it is ASCII
return c > 0 && !isDelimiterCharacter(c) && !isOperandCharacter(c)
&& !isQuoteCharacter(c) && strchr("{};@$", c) == NULL;
}
static inline bool isLineComment(const char* s) {
return s[0] == '/' && s[1] == '/';
}
static inline bool isBlockComment(const char* s) {
return s[0] == '/' && s[1] == '*';
}
static inline bool isNotNewline(char c) {
return c != '\n';
}
static inline const char* skipWhile(const char* s, bool (*predicate)(char)) {
while (s[0] != '\0' && predicate(s[0]))
s++;
return s;
}
static inline const char* skipBlockComment(const char* s) {
while (s[0] != '\0' && (s[0] != '*' || s[1] != '/'))
s++;
return s[0] == '\0' ? s : s + 2;
}
static inline const char* skipComments(const char* s) {
while (isLineComment(s) || isBlockComment(s))
s = isLineComment(s) ? skipWhile(s, isNotNewline) : skipBlockComment(s);
return s;
}
static inline const char* skipQuote(const char* s) {
char quote = s[0];
// assumption is that s points to the opening quotation mark
for (s += 1; s[0] != '\0' && s[0] != '\n'; s += 1) {
if (s[0] == '\\' && s[1] != '\0')
s += 1; // skip character following slash
else if (s[0] == quote)
return s + 1;
}
return s;
}
static inline const char* skipLexeme(const char* lexeme) {
assert(lexeme[0] != '\0');
if (isSpaceCharacter(lexeme[0]))
return skipWhile(lexeme, isSpaceCharacter);
if (isQuoteCharacter(lexeme[0]))
return skipQuote(lexeme);
if (isOperandCharacter(lexeme[0]))
return skipWhile(lexeme, isOperandCharacter);
if (isOperatorCharacter(lexeme[0]))
return skipWhile(lexeme, isOperatorCharacter);
return lexeme + 1; // delimiter or illegal character
}
const char* getFirstLexeme(const char* input) {
if (INTERNAL_CODE == NULL)
INTERNAL_CODE = input;
SOURCE_CODE = input;
return skipComments(input);
}
const char* getNextLexeme(const char* lastLexeme) {
return skipComments(skipLexeme(lastLexeme));
}
unsigned int getLexemeLength(const char* lexeme) {
return (unsigned int)(lexeme[0] == '\0' ? 1 : skipLexeme(lexeme) - lexeme);
}
bool isSameLexeme(const char* a, const char* b) {
unsigned int lengthA = getLexemeLength(a);
return getLexemeLength(b) == lengthA && strncmp(a, b, lengthA) == 0;
}
int getLexemeLocation(const char* lexeme) {
return (int)(lexeme - SOURCE_CODE + 1);
}
const char* getLexemeByLocation(int location) {
const char* start = location < 0 ? INTERNAL_CODE : SOURCE_CODE;
return location == 0 ? "\0" : &start[abs(location) - 1];
}
Position getPosition(unsigned int location) {
Position position = {1, 1}; // use 1-based indexing
for (unsigned int i = 0; i < location - 1; i++) {
position.column += 1;
if (SOURCE_CODE[i] == '\n') {
position.line += 1;
position.column = 1;
}
}
return position;
}