1 // Copyright 2017 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef ZIRCON_SYSTEM_HOST_FIDL_INCLUDE_FIDL_LEXER_H_
6 #define ZIRCON_SYSTEM_HOST_FIDL_INCLUDE_FIDL_LEXER_H_
7 
8 #include <assert.h>
9 #include <map>
10 #include <stdint.h>
11 
12 #include "error_reporter.h"
13 #include "source_manager.h"
14 #include "string_view.h"
15 #include "token.h"
16 
17 namespace fidl {
18 
19 // The lexer does not own the data it operates on. It merely takes a
20 // StringView and produces a stream of tokens and possibly a failure
21 // partway through.
22 class Lexer {
23 public:
24     // The Lexer assumes the final character is 0. This substantially
25     // simplifies advancing to the next character.
Lexer(const SourceFile & source_file,ErrorReporter * error_reporter)26     Lexer(const SourceFile& source_file, ErrorReporter* error_reporter)
27         : source_file_(source_file), error_reporter_(error_reporter) {
28         assert(data()[data().size() - 1] == 0);
29         keyword_table_ = {
30 #define KEYWORD(Name, Spelling) {Spelling, Token::Subkind::k##Name},
31 #include "fidl/token_definitions.inc"
32 #undef KEYWORD
33         };
34         current_ = data().data();
35         previous_end_ = token_start_ = current_;
36     }
37 
38     Token Lex();
39     Token LexNoComments();
40 
41 private:
data()42     StringView data() { return source_file_.data(); }
43 
44     constexpr char Peek() const;
45     void Skip();
46     char Consume();
47     StringView Reset(Token::Kind kind);
48     Token Finish(Token::Kind kind);
49 
50     void SkipWhitespace();
51 
52     Token LexEndOfStream();
53     Token LexNumericLiteral();
54     Token LexIdentifier();
55     Token LexEscapedIdentifier();
56     Token LexStringLiteral();
57     Token LexCommentOrDocComment();
58 
59     const SourceFile& source_file_;
60     std::map<StringView, Token::Subkind> keyword_table_;
61     ErrorReporter* error_reporter_;
62 
63     const char* current_ = nullptr;
64     const char* token_start_ = nullptr;
65     const char* previous_end_ = nullptr;
66     size_t token_size_ = 0u;
67 };
68 
69 } // namespace fidl
70 
71 #endif // ZIRCON_SYSTEM_HOST_FIDL_INCLUDE_FIDL_LEXER_H_
72