1 // Copyright 2017 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "fidl/lexer.h"
6
7 #include <assert.h>
8 #include <map>
9
10 namespace fidl {
11
12 namespace {
13
IsIdentifierBody(char c)14 bool IsIdentifierBody(char c) {
15 switch (c) {
16 case 'a':
17 case 'b':
18 case 'c':
19 case 'd':
20 case 'e':
21 case 'f':
22 case 'g':
23 case 'h':
24 case 'i':
25 case 'j':
26 case 'k':
27 case 'l':
28 case 'm':
29 case 'n':
30 case 'o':
31 case 'p':
32 case 'q':
33 case 'r':
34 case 's':
35 case 't':
36 case 'u':
37 case 'v':
38 case 'w':
39 case 'x':
40 case 'y':
41 case 'z':
42 case 'A':
43 case 'B':
44 case 'C':
45 case 'D':
46 case 'E':
47 case 'F':
48 case 'G':
49 case 'H':
50 case 'I':
51 case 'J':
52 case 'K':
53 case 'L':
54 case 'M':
55 case 'N':
56 case 'O':
57 case 'P':
58 case 'Q':
59 case 'R':
60 case 'S':
61 case 'T':
62 case 'U':
63 case 'V':
64 case 'W':
65 case 'X':
66 case 'Y':
67 case 'Z':
68 case '0':
69 case '1':
70 case '2':
71 case '3':
72 case '4':
73 case '5':
74 case '6':
75 case '7':
76 case '8':
77 case '9':
78 case '_':
79 return true;
80 default:
81 return false;
82 }
83 }
84
85 // IsIdentifierValid disallows identifiers (escaped, and unescaped) from
86 // starting or ending with underscore.
IsIdentifierValid(StringView source_data)87 bool IsIdentifierValid(StringView source_data) {
88 return source_data[0] != '_' && source_data[source_data.size() - 1] != '_';
89 }
90
IsNumericLiteralBody(char c)91 bool IsNumericLiteralBody(char c) {
92 switch (c) {
93 case '0':
94 case '1':
95 case '2':
96 case '3':
97 case '4':
98 case '5':
99 case '6':
100 case '7':
101 case '8':
102 case '9':
103 case 'a':
104 case 'A':
105 case 'b':
106 case 'B':
107 case 'c':
108 case 'C':
109 case 'd':
110 case 'D':
111 case 'e':
112 case 'E':
113 case 'f':
114 case 'F':
115 case 'x':
116 case 'X':
117 case '-':
118 case '_':
119 case '.':
120 return true;
121 default:
122 return false;
123 }
124 }
125
126 } // namespace
127
Peek() const128 constexpr char Lexer::Peek() const {
129 return *current_;
130 }
131
Skip()132 void Lexer::Skip() {
133 ++current_;
134 ++token_start_;
135 }
136
Consume()137 char Lexer::Consume() {
138 auto current = *current_;
139 ++current_;
140 ++token_size_;
141 return current;
142 }
143
Reset(Token::Kind kind)144 StringView Lexer::Reset(Token::Kind kind) {
145 auto data = StringView(token_start_, token_size_);
146 if (kind != Token::Kind::kComment) {
147 previous_end_ = token_start_ + token_size_;
148 }
149 token_start_ = current_;
150 token_size_ = 0u;
151 return data;
152 }
153
Finish(Token::Kind kind)154 Token Lexer::Finish(Token::Kind kind) {
155 assert(kind != Token::Kind::kIdentifier);
156 StringView previous(previous_end_, token_start_ - previous_end_);
157 SourceLocation previous_location(previous, source_file_);
158 return Token(previous_location,
159 SourceLocation(Reset(kind), source_file_), kind, Token::Subkind::kNone);
160 }
161
LexEndOfStream()162 Token Lexer::LexEndOfStream() {
163 return Finish(Token::Kind::kEndOfFile);
164 }
165
LexNumericLiteral()166 Token Lexer::LexNumericLiteral() {
167 while (IsNumericLiteralBody(Peek()))
168 Consume();
169 return Finish(Token::Kind::kNumericLiteral);
170 }
171
LexIdentifier()172 Token Lexer::LexIdentifier() {
173 while (IsIdentifierBody(Peek()))
174 Consume();
175 StringView previous(previous_end_, token_start_ - previous_end_);
176 SourceLocation previous_end(previous, source_file_);
177 StringView identifier_data = Reset(Token::Kind::kIdentifier);
178 if (!IsIdentifierValid(identifier_data)) {
179 SourceLocation location(StringView(token_start_, token_size_), source_file_);
180 std::string msg("invalid identifier '");
181 msg.append(identifier_data);
182 msg.append("'");
183 error_reporter_->ReportError(location, msg);
184 }
185 auto subkind = Token::Subkind::kNone;
186 auto lookup = keyword_table_.find(identifier_data);
187 if (lookup != keyword_table_.end())
188 subkind = lookup->second;
189 return Token(previous_end, SourceLocation(identifier_data, source_file_),
190 Token::Kind::kIdentifier, subkind);
191 }
192
LexStringLiteral()193 Token Lexer::LexStringLiteral() {
194 auto last = Peek();
195
196 // Lexing a "string literal" to the next matching delimiter.
197 for (;;) {
198 auto next = Consume();
199 switch (next) {
200 case 0:
201 return LexEndOfStream();
202 case '"':
203 // This escaping logic is incorrect for the input: "\\"
204 if (last != '\\')
205 return Finish(Token::Kind::kStringLiteral);
206 // Fall through.
207 default:
208 last = next;
209 }
210 }
211 }
212
LexCommentOrDocComment()213 Token Lexer::LexCommentOrDocComment() {
214 // Consume the second /.
215 assert(Peek() == '/');
216 Consume();
217
218 // Check if it's a Doc Comment
219 auto comment_type = Token::Kind::kComment;
220 if (Peek() == '/') {
221 comment_type = Token::Kind::kDocComment;
222 Consume();
223 // Anything with more than 3 slashes is a likely a section
224 // break comment
225 if (Peek() == '/') {
226 comment_type = Token::Kind::kComment;
227 }
228 }
229
230 // Lexing a C++-style // comment. Go to the end of the line or
231 // file.
232 for (;;) {
233 switch (Peek()) {
234 case 0:
235 case '\n':
236 return Finish(comment_type);
237 default:
238 Consume();
239 continue;
240 }
241 }
242 }
243
SkipWhitespace()244 void Lexer::SkipWhitespace() {
245 for (;;) {
246 switch (Peek()) {
247 case ' ':
248 case '\n':
249 case '\r':
250 case '\t':
251 Skip();
252 continue;
253 default:
254 return;
255 }
256 }
257 }
258
LexNoComments()259 Token Lexer::LexNoComments() {
260 for (;;) {
261 auto token = Lex();
262 if (token.kind() == Token::Kind::kComment)
263 continue;
264 return token;
265 }
266 }
267
Lex()268 Token Lexer::Lex() {
269 do {
270 SkipWhitespace();
271
272 switch (Consume()) {
273 case 0:
274 return LexEndOfStream();
275
276 case ' ':
277 case '\n':
278 case '\r':
279 case '\t':
280 assert(false && "Should have been handled by SkipWhitespace!");
281
282 case '-':
283 // Maybe the start of an arrow.
284 if (Peek() == '>') {
285 Consume();
286 return Finish(Token::Kind::kArrow);
287 }
288 // Fallthrough
289 case '0':
290 case '1':
291 case '2':
292 case '3':
293 case '4':
294 case '5':
295 case '6':
296 case '7':
297 case '8':
298 case '9':
299 return LexNumericLiteral();
300
301 case 'a':
302 case 'A':
303 case 'b':
304 case 'B':
305 case 'c':
306 case 'C':
307 case 'd':
308 case 'D':
309 case 'e':
310 case 'E':
311 case 'f':
312 case 'F':
313 case 'g':
314 case 'G':
315 case 'h':
316 case 'H':
317 case 'i':
318 case 'I':
319 case 'j':
320 case 'J':
321 case 'k':
322 case 'K':
323 case 'l':
324 case 'L':
325 case 'm':
326 case 'M':
327 case 'n':
328 case 'N':
329 case 'o':
330 case 'O':
331 case 'p':
332 case 'P':
333 case 'q':
334 case 'Q':
335 case 'r':
336 case 'R':
337 case 's':
338 case 'S':
339 case 't':
340 case 'T':
341 case 'u':
342 case 'U':
343 case 'v':
344 case 'V':
345 case 'w':
346 case 'W':
347 case 'x':
348 case 'X':
349 case 'y':
350 case 'Y':
351 case 'z':
352 case 'Z':
353 return LexIdentifier();
354
355 case '"':
356 return LexStringLiteral();
357
358 case '/':
359 // Maybe the start of a comment.
360 switch (Peek()) {
361 case '/':
362 return LexCommentOrDocComment();
363 default: {
364 SourceLocation location(StringView(token_start_, token_size_), source_file_);
365 std::string msg("invalid character '");
366 msg.append(location.data());
367 msg.append("'");
368 error_reporter_->ReportError(location, msg);
369 continue;
370 }
371 } // switch
372
373 case '(':
374 return Finish(Token::Kind::kLeftParen);
375 case ')':
376 return Finish(Token::Kind::kRightParen);
377 case '[':
378 return Finish(Token::Kind::kLeftSquare);
379 case ']':
380 return Finish(Token::Kind::kRightSquare);
381 case '{':
382 return Finish(Token::Kind::kLeftCurly);
383 case '}':
384 return Finish(Token::Kind::kRightCurly);
385 case '<':
386 return Finish(Token::Kind::kLeftAngle);
387 case '>':
388 return Finish(Token::Kind::kRightAngle);
389
390 case '.':
391 return Finish(Token::Kind::kDot);
392 case ',':
393 return Finish(Token::Kind::kComma);
394 case ';':
395 return Finish(Token::Kind::kSemicolon);
396 case ':':
397 return Finish(Token::Kind::kColon);
398 case '?':
399 return Finish(Token::Kind::kQuestion);
400 case '=':
401 return Finish(Token::Kind::kEqual);
402 case '&':
403 return Finish(Token::Kind::kAmpersand);
404
405 default: {
406 SourceLocation location(StringView(token_start_, token_size_), source_file_);
407 std::string msg("invalid character '");
408 msg.append(location.data());
409 msg.append("'");
410 error_reporter_->ReportError(location, msg);
411 continue;
412 }
413 } // switch
414 } while (true);
415 }
416
417 } // namespace fidl
418