1 // Copyright 2018 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef ZIRCON_SYSTEM_HOST_FIDL_INCLUDE_FIDL_FORMATTER_H_
6 #define ZIRCON_SYSTEM_HOST_FIDL_INCLUDE_FIDL_FORMATTER_H_
7 
8 #include <memory>
9 #include <regex>
10 #include <sstream>
11 
12 #include "tree_visitor.h"
13 
14 // A TreeVisitor (see tree_visitor.h) that pretty-prints FIDL code.
15 
16 namespace fidl {
17 namespace raw {
18 
19 class ScopedBool {
20 public:
21     ScopedBool(bool& source, bool value = true)
prev_value_(source)22         : prev_value_(source), source_(source) {
23         source_ = value;
24     }
~ScopedBool()25     ~ScopedBool() {
26         source_ = prev_value_;
27     }
28 
29 private:
30     bool prev_value_;
31     bool& source_;
32 };
33 
34 class ScopedIncrement {
35 public:
ScopedIncrement(int & source)36     ScopedIncrement(int& source)
37         : source_(source) {
38         source_++;
39     }
~ScopedIncrement()40     ~ScopedIncrement() {
41         source_--;
42     }
43 
44 private:
45     int& source_;
46 };
47 
48 // A Visitor that pretty prints its AST and makes its result available via
49 // formatted_output().
50 //
51 // Implementation note: The visitor mostly does the same thing on every node,
52 // which is encapsulated in OnSourceElementShared.  Where we have overridden a
53 // particular node's visitor, it usually means that slightly different behavior
54 // is needed for that language construct.  For example, using and const
55 // declarations respect leading blank lines if they are already there, and
56 // struct, enum, and interface declarations require leading blank lines.
57 class FormattingTreeVisitor : public DeclarationOrderTreeVisitor {
58 public:
FormattingTreeVisitor()59     FormattingTreeVisitor()
60         : last_source_location_(nullptr) {}
61 
OnInterfaceDeclaration(std::unique_ptr<InterfaceDeclaration> const & element)62     virtual void OnInterfaceDeclaration(std::unique_ptr<InterfaceDeclaration> const& element) override {
63         OnBlankLineRequiringNode();
64         TreeVisitor::OnInterfaceDeclaration(element);
65     }
66 
OnSourceElementStart(const SourceElement & element)67     virtual void OnSourceElementStart(const SourceElement& element) override {
68         OnSourceElementShared(element.start_);
69     }
70 
OnSourceElementEnd(const SourceElement & element)71     virtual void OnSourceElementEnd(const SourceElement& element) override {
72         OnSourceElementShared(element.end_);
73     }
74 
OnAttributeList(std::unique_ptr<AttributeList> const & element)75     virtual void OnAttributeList(std::unique_ptr<AttributeList> const& element) override {
76         // Disabling these in case we're in an interface method and it thinks
77         // the next line needs to be indented more.  We don't want an indent
78         // after a newline following an attribute list.  It will be reenabled by
79         // the next visited AST node in the method.
80         newline_means_indent_more_ = false;
81         interface_method_alignment_ = false;
82         // This prevents the above from being reenabled during our walk of the
83         // AttributeList
84         ScopedBool suppress(is_member_decl_, false);
85         TreeVisitor::OnAttributeList(element);
86     }
87 
OnUsing(std::unique_ptr<Using> const & element)88     virtual void OnUsing(std::unique_ptr<Using> const& element) override {
89         OnBlankLineRespectingNode();
90         ScopedBool mem(is_member_decl_);
91         TreeVisitor::OnUsing(element);
92     }
93 
OnConstDeclaration(std::unique_ptr<ConstDeclaration> const & element)94     virtual void OnConstDeclaration(std::unique_ptr<ConstDeclaration> const& element) override {
95         OnBlankLineRespectingNode();
96         ScopedBool mem(is_member_decl_);
97         TreeVisitor::OnConstDeclaration(element);
98     }
99 
OnEnumMember(std::unique_ptr<EnumMember> const & element)100     virtual void OnEnumMember(std::unique_ptr<EnumMember> const& element) override {
101         OnBlankLineRespectingNode();
102         ScopedBool mem(is_member_decl_);
103         TreeVisitor::OnEnumMember(element);
104     }
105 
OnEnumDeclaration(std::unique_ptr<EnumDeclaration> const & element)106     virtual void OnEnumDeclaration(std::unique_ptr<EnumDeclaration> const& element) override {
107         OnBlankLineRequiringNode();
108         TreeVisitor::OnEnumDeclaration(element);
109     }
110 
OnInterfaceMethod(std::unique_ptr<InterfaceMethod> const & element)111     virtual void OnInterfaceMethod(std::unique_ptr<InterfaceMethod> const& element) override {
112         interface_method_alignment_ = true;
113         interface_method_alignment_size_ = -1;
114         next_nonws_char_is_checkpoint_ = false;
115         OnBlankLineRespectingNode();
116         ScopedBool before(blank_space_before_colon_, false);
117         ScopedBool mem(is_member_decl_);
118         ScopedBool has_ordinal(has_ordinal_, element->ordinal != nullptr);
119         TreeVisitor::OnInterfaceMethod(element);
120     }
121 
OnStructDeclaration(std::unique_ptr<StructDeclaration> const & element)122     virtual void OnStructDeclaration(std::unique_ptr<StructDeclaration> const& element) override {
123         OnBlankLineRequiringNode();
124         TreeVisitor::OnStructDeclaration(element);
125     }
126 
OnStructMember(std::unique_ptr<StructMember> const & element)127     virtual void OnStructMember(std::unique_ptr<StructMember> const& element) override {
128         OnBlankLineRespectingNode();
129         ScopedBool mem(is_member_decl_);
130         TreeVisitor::OnStructMember(element);
131     }
132 
OnTableDeclaration(std::unique_ptr<TableDeclaration> const & element)133     virtual void OnTableDeclaration(std::unique_ptr<TableDeclaration> const& element) override {
134         OnBlankLineRequiringNode();
135         TreeVisitor::OnTableDeclaration(element);
136     }
137 
OnTableMember(std::unique_ptr<TableMember> const & element)138     virtual void OnTableMember(std::unique_ptr<TableMember> const& element) override {
139         OnBlankLineRespectingNode();
140         ScopedBool mem(is_member_decl_);
141         ScopedBool before_colon(blank_space_before_colon_, false);
142         ScopedBool after_colon(blank_space_after_colon_, true);
143         TreeVisitor::OnTableMember(element);
144     }
145 
OnUnionDeclaration(std::unique_ptr<UnionDeclaration> const & element)146     virtual void OnUnionDeclaration(std::unique_ptr<UnionDeclaration> const& element) override {
147         OnBlankLineRequiringNode();
148         TreeVisitor::OnUnionDeclaration(element);
149     }
150 
OnUnionMember(std::unique_ptr<UnionMember> const & element)151     virtual void OnUnionMember(std::unique_ptr<UnionMember> const& element) override {
152         ScopedBool mem(is_member_decl_);
153         OnBlankLineRespectingNode();
154         TreeVisitor::OnUnionMember(element);
155     }
156 
OnType(std::unique_ptr<Type> const & element)157     virtual void OnType(std::unique_ptr<Type> const& element) override {
158         ScopedIncrement si(nested_type_depth_);
159         ScopedBool before_colon(blank_space_before_colon_, false);
160         ScopedBool after_colon(blank_space_after_colon_, false);
161         TreeVisitor::OnType(element);
162     }
163 
164     virtual void OnFile(std::unique_ptr<File> const& element) override;
165 
formatted_output()166     std::string* formatted_output() { return &formatted_output_; }
167 
168 private:
169     const char* last_source_location_;
170 
171     static constexpr int kIndentSpaces = 4;
172 
173     static constexpr char kWsCharacters[] = " \t\n\v\f\r";
174 
175     static constexpr char kWsCharactersNoNewline[] = " \t\v\f\r";
176 
177     std::string formatted_output_;
178 
179     // Indentations can be caused by nesting in the AST (e.g., if you've started
180     // a compound statement) or, in some situations, if there is a newline in
181     // the code (e.g., if you've decided to line break before the "->" operator
182     // in the middle of an interface definition).
183 
184     // What follows are adjustable properties.  We flip them from true to false
185     // at various points in the tree walk depending on what behavior we want.
186 
187     // When this is true, you get a blank line and indentation at the end of the
188     // segment.  This is true for top level decls that *require* blank lines:
189     // e.g., structs, unions, and interfaces.
190     bool blank_line_requiring_node_ = false;
191 
192     // When this is true, you get a blank line and indentation at the end of the
193     // segment if there is already a blank line there.  This is true for consts
194     // and using declarations.
195     bool blank_line_respecting_node_ = false;
196 
197     // When this is true, and a newline happens, you get an additional
198     // indentation.
199     bool newline_means_indent_more_ = false;
200 
201     // This is true in decl headers, but not after the ordinal in an interface
202     // method or in the element count for relevant types.
203     bool blank_space_before_colon_ = true;
204 
205     // This is true in decl headers and after the ordinal in an interface
206     // method, but not in the element count for relevant types.
207     bool blank_space_after_colon_ = true;
208 
209     // > characters require spaces after them unless you are in the middle of a
210     // parameterized type: arrays, vectors, handles, and requests.
211     int nested_type_depth_ = 0;
212 
213     // Interface methods have fancy alignment: if the last open parenthesis was
214     // at EOL, indentation is to the column that had beginning of method name +
215     // kIndentSpaces.  Otherwise, it is to the column with the last open
216     // parenthesis + 1.
217     bool interface_method_alignment_ = false;
218     int interface_method_alignment_size_ = -1;
219     int distance_from_last_newline_ = 0;
220     int offset_of_first_id_ = 0;
221     bool next_nonws_char_is_checkpoint_ = false;
222 
223     // When we complete a node and know the next thing needs to be whitespace
224     bool ws_required_next_ = false;
225 
226     // Some helper methods.
IsNonNewlineWS(char ch)227     static bool IsNonNewlineWS(char ch) {
228         return strchr(kWsCharactersNoNewline, ch) != nullptr;
229     }
230 
IsStartOfBlankLine(const std::string & str,int offset)231     static bool IsStartOfBlankLine(const std::string& str, int offset) {
232         for (int i = offset; i < str.size() && str[i] != '\n'; i++) {
233             if (!IsNonNewlineWS(str[i])) {
234                 return false;
235             }
236         }
237         return true;
238     }
239 
IsStartOfComment(std::string str,int i)240     static bool IsStartOfComment(std::string str, int i) {
241         return (i < str.size() - 1) && str[i] == '/' && str[i + 1] == '/';
242     }
243 
244     // If the string str at offset pos is the beginning of a comment, pos is
245     // modified to be the newline character at EOL.
MaybeWindPastComment(std::string str,int & pos)246     static void MaybeWindPastComment(std::string str, int& pos) {
247         if (IsStartOfComment(str, pos)) {
248             while (pos < str.size() && str[pos] != '\n') {
249                 pos++;
250             }
251         }
252     }
253 
254     // A "Segment" is a part of the source that we format - it extends from the
255     // end of the previously formatted AST node to the end of the first token
256     // represented in this AST node.
257     class Segment {
258     public:
Segment(std::string input,FormattingTreeVisitor * outer)259         Segment(std::string input, FormattingTreeVisitor* outer)
260             : output_(input), visitor_(outer) {}
261 
RemoveTrailingWS()262         void RemoveTrailingWS() {
263             std::string re("[");
264             re += kWsCharactersNoNewline;
265             re += "]+\n";
266             output_ = std::regex_replace(output_, std::regex(re), "\n");
267         }
268 
269         // Removes all of the whitespace at the beginning of every line.  Will
270         // add back indentation later.
RemoveLeadingWS()271         void RemoveLeadingWS() {
272             std::string re("\n[");
273             re += kWsCharactersNoNewline;
274             re += "]+";
275             output_ = std::regex_replace(output_, std::regex(re), "\n");
276         }
277 
278         void RemoveExtraBlankLines(bool respects_trailing_blankline);
279 
280         void InsertRequiredNewlines(bool is_top_level);
281 
282         // No non-' ' or '\n' whitespace
283         // One ws token before / after every ws-requiring character
284         // No non-newline ws before / after characters that don't want it.
285         void RegularizeSpaces(bool& ws_required_next);
286 
287         // Precondition: By now, everything should have had its leading ws
288         // stripped, and } characters are the first things on their own lines.
289         void Indent(int& current_nesting);
290 
GetOutput()291         std::string GetOutput() {
292             return output_;
293         }
294 
295     private:
RequiresWSBeforeChar(char ch)296         bool RequiresWSBeforeChar(char ch) {
297             return (ch == '{') ||
298                    (ch == '=') ||
299                    (visitor_->blank_space_before_colon_ && ch == ':');
300         }
301 
NoSpacesBeforeChar(char ch)302         bool NoSpacesBeforeChar(char ch) {
303             return NoWSBeforeChar(ch) ||
304                    (ch == ')') ||
305                    (ch == '?') ||
306                    (!visitor_->blank_space_before_colon_ && ch == ':') ||
307                    (visitor_->nested_type_depth_ > 0 && ch == '>');
308         }
309 
NoWSBeforeChar(char ch)310         bool NoWSBeforeChar(char ch) {
311             return (ch == ';');
312         }
313 
RequiresWSAfterChar(char ch)314         bool RequiresWSAfterChar(char ch) {
315             return (ch == '=') ||
316                    (ch == ',') ||
317                    (ch == '>' && (visitor_->nested_type_depth_ <= 1)) ||
318                    (ch == ':' && visitor_->blank_space_after_colon_);
319         }
320 
NoWSAfterChar(char ch)321         bool NoWSAfterChar(char ch) {
322             return (ch == ':' && !visitor_->blank_space_after_colon_) || (ch == '(');
323         }
324 
325         // As specified on the tin: erases multiple spaces from output_ at
326         // offset pos.  Optional |leave_this_many| parameter indicates number of
327         // spaces to leave; default is 1.  Optional |incl_newline| param
328         // indicates to erase newline characters as well as other WS.
329         // It returns the number of spaces that were deleted.
330         int EraseMultipleSpacesAt(int pos, int leave_this_many = 1, bool incl_newline = false);
331 
332         std::string output_;
333         FormattingTreeVisitor* visitor_;
334     };
335 
336     int current_nesting_ = 0;
337 
FormatAndPrintSegment(const std::string & segment)338     std::string FormatAndPrintSegment(const std::string& segment) {
339         Segment seg(segment, this);
340         seg.RemoveTrailingWS();
341         seg.RemoveLeadingWS();
342         seg.RemoveExtraBlankLines(blank_line_respecting_node_);
343         seg.RegularizeSpaces(ws_required_next_);
344         seg.InsertRequiredNewlines(blank_line_requiring_node_);
345         seg.Indent(current_nesting_);
346 
347         std::string output = seg.GetOutput();
348 
349         // We've respected prior blank lines for this decl (if it is a decl),
350         // and we should stop now.
351         blank_line_requiring_node_ = false;
352         blank_line_respecting_node_ = false;
353 
354         // If this was the start of a member decl, it was indented by
355         // kIndentSpaces.  Any other newlines inside the member decl should be
356         // indented additionally.
357         if (is_member_decl_) {
358             interface_method_alignment_ = true;
359             newline_means_indent_more_ = true;
360         }
361 
362         return output;
363     }
364 
OnBlankLineRequiringNode()365     void OnBlankLineRequiringNode() {
366         blank_line_requiring_node_ = true;
367         blank_line_respecting_node_ = true;
368     }
369 
OnBlankLineRespectingNode()370     void OnBlankLineRespectingNode() {
371         blank_line_respecting_node_ = true;
372     }
373 
374     bool is_member_decl_ = false;
375 
376     // Does the current member have an explicit ordinal?
377     bool has_ordinal_ = false;
378 
379     // str is a gap plus the next meaningful token.
380     void TrackInterfaceMethodAlignment(const std::string& str);
381 
OnSourceElementShared(const fidl::Token & current_token)382     void OnSourceElementShared(const fidl::Token& current_token) {
383         const char* ws_location = current_token.previous_end().data().data();
384         // Printed code must increase in monotonic order, for two reasons.
385         // First of all, we don't reorder anything.  Second of all, the start
386         // token for an identifier list (for example) is the same as the start
387         // token for the first identifier, so we need to make sure we don't
388         // print that token twice.
389         if (ws_location > last_source_location_) {
390             int size = (int)(current_token.data().data() - current_token.previous_end().data().data());
391             std::string gap(ws_location, size);
392             std::string content(current_token.data().data(), current_token.data().size());
393             std::string total_string = FormatAndPrintSegment(gap + content);
394             TrackInterfaceMethodAlignment(total_string);
395             formatted_output_ += total_string;
396             last_source_location_ = ws_location;
397         }
398     }
399 };
400 
401 class OrdinalRemovalVisitor : public fidl::raw::DeclarationOrderTreeVisitor {
402 public:
OrdinalRemovalVisitor()403     OrdinalRemovalVisitor() {}
OnInterfaceMethod(std::unique_ptr<fidl::raw::InterfaceMethod> const & element)404     virtual void OnInterfaceMethod(
405         std::unique_ptr<fidl::raw::InterfaceMethod> const& element) override {
406         if (element->ordinal != nullptr) {
407             const char* start = element->ordinal->start_.data().data();
408             const char* end = element->ordinal->end_.data().data() +
409                               element->ordinal->end_.data().size();
410             for (char* ptr = const_cast<char*>(start); ptr < end; ptr++) {
411                 // Don't erase comments in the middle of ordinals;
412                 if (strncmp("//", ptr, 2) == 0) {
413                     while (*ptr != '\n' && ptr < end) {
414                         ptr++;
415                     }
416                     continue;
417                 }
418                 *ptr = ' ';
419             }
420 
421             // Incorporate the whitespace that came before the ordinal into the
422             // whitespace before the identifier.
423             element->identifier->start_.set_previous_end(
424                 element->ordinal->start_.previous_end());
425             element->identifier->end_.set_previous_end(
426                 element->ordinal->start_.previous_end());
427 
428             // If there are no attributes, the identifier is now the start of
429             // the method.
430             if (element->attributes == nullptr)
431                 element->start_ = element->identifier->start_;
432 
433             element->ordinal.reset(nullptr);
434         }
435         DeclarationOrderTreeVisitor::OnInterfaceMethod(element);
436     }
437 };
438 
439 } // namespace raw
440 } // namespace fidl
441 
442 #endif // ZIRCON_SYSTEM_HOST_FIDL_INCLUDE_FIDL_FORMATTER_H_
443