1 // Copyright 2018 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // The implementation for the FormattingTreeVisitor that pretty-prints FIDL code.
6 #include <locale>
7 #include <map>
8 #include <regex>
9 #include <set>
10 #include <string>
11 
12 #include "fidl/formatter.h"
13 
14 namespace fidl {
15 namespace raw {
16 
17 // Rules:
18 //   No more than one blank line in a row.
19 //   Keep blank lines before and after comments.
20 //   Will add newlines before top level declarations later.
RemoveExtraBlankLines(bool respects_trailing_blankline)21 void FormattingTreeVisitor::Segment::RemoveExtraBlankLines(bool respects_trailing_blankline) {
22     std::set<int> blank_line_indices;
23     std::set<int> comment_line_indices;
24     std::map<int, int> line_offsets;
25 
26     // First, find all of the blank lines and comment lines.
27     int line_num = 0;
28     line_offsets[line_num++] = 0;
29     for (int i = 0; i < output_.size(); i++) {
30         if (output_[i] == '\n' && i + 1 != output_.size()) {
31             line_offsets[line_num] = i + 1;
32             bool is_blank = true;
33             bool is_comment = false;
34             for (int j = i + 1;
35                  j < output_.size() && output_[j] != '\n'; j++) {
36                 if (!IsNonNewlineWS(output_[j])) {
37                     is_blank = false;
38                 }
39                 if (IsStartOfComment(output_, j)) {
40                     is_comment = true;
41                 }
42             }
43             if (is_blank) {
44                 blank_line_indices.insert(line_num);
45             }
46             if (is_comment) {
47                 comment_line_indices.insert(line_num);
48             }
49             line_num++;
50         }
51     }
52 
53     int last_line_num = line_num - 1;
54 
55     // Next, get rid of any blank line that isn't next to a comment or,
56     // if we are respecting trailing blank lines, right before the end.
57     // Make an exception if the previous line is blank - i.e., coalesce
58     // multiple blank lines.  Work backwards so we don't screw up line
59     // numbers.
60     for (auto i = blank_line_indices.rbegin();
61          i != blank_line_indices.rend();
62          ++i) {
63         int line_num = *i;
64         bool is_next_to_comment =
65             comment_line_indices.count(line_num - 1) != 0 ||
66             comment_line_indices.count(line_num + 1) != 0;
67         bool need_to_preserve_because_last =
68             respects_trailing_blankline &&
69             line_num == last_line_num - 1;
70         bool need_to_coalesce =
71             blank_line_indices.count(line_num + 1) != 0;
72         if ((!is_next_to_comment && !need_to_preserve_because_last) ||
73             need_to_coalesce) {
74             int offset = line_offsets[line_num];
75             size_t next_newline = output_.find_first_of("\n", offset);
76             if (next_newline == std::string::npos) {
77                 next_newline = output_.size();
78             }
79             output_.erase(offset, next_newline - offset + 1);
80         }
81     }
82 }
83 
84 // Assumptions: Leading WS has been stripped.
85 // Rules:
86 //  - newlines after ';', '{' (unless before a comment)
87 //  - newlines before top-level decls (unless after a comment).
InsertRequiredNewlines(bool is_top_level)88 void FormattingTreeVisitor::Segment::InsertRequiredNewlines(bool is_top_level) {
89     // Insert lines after ';' and '{', if not already present
90     for (int i = 0; i < output_.size(); i++) {
91         MaybeWindPastComment(output_, i);
92         char ch = output_[i];
93         if (ch == ';' || ch == '{') {
94             if (i == output_.size() - 1) {
95                 output_.append("\n");
96             } else {
97                 size_t j = output_.find_first_not_of(kWsCharactersNoNewline, i + 1);
98                 // Unless the next thing is a comment.
99                 if (j != std::string::npos && !IsStartOfComment(output_, j)) {
100                     // Make the next thing a newline.
101                     if (IsNonNewlineWS(output_[i + 1])) {
102                         output_[i + 1] = '\n';
103                     } else if (output_[i + 1] != '\n') {
104                         output_.insert(i + 1, "\n");
105                     }
106                 }
107             }
108         }
109     }
110 
111     // Insert lines before top level decls.
112     if (is_top_level) {
113         // Right before the last word in this string, we need a blank line,
114         // followed by some (possibly zero) number of comment lines.  So we
115         // break the string into lines, and then work backwards.
116 
117         std::stringstream ss(output_);
118         std::string tmp;
119         std::vector<std::string> lines;
120 
121         while (std::getline(ss, tmp, '\n')) {
122             lines.push_back(tmp);
123         }
124         std::string terminal = lines.back();
125         lines.pop_back();
126         if (lines.size() == 1) {
127             lines[0].append("\n");
128         } else {
129             // From the end of the list of lines, find the first line
130             // that isn't a comment, and insert a blank line (if it
131             // isn't already blank).
132             int i = lines.size() - 1;
133             while (i >= 0 && IsStartOfComment(lines[i], 0)) {
134                 i--;
135             }
136 
137             if (!IsStartOfBlankLine(lines[i], 0)) {
138                 lines.insert(lines.begin() + i + 1, "");
139             }
140         }
141         output_ = "";
142         for (auto line : lines) {
143             output_ += line + "\n";
144         }
145         output_ += terminal;
146     }
147 }
148 
EraseMultipleSpacesAt(int pos,int leave_this_many,bool incl_newline)149 int FormattingTreeVisitor::Segment::EraseMultipleSpacesAt(int pos, int leave_this_many, bool incl_newline) {
150     std::function<bool(char)> is_ws;
151     if (incl_newline) {
152         is_ws = [](char ch) { return isspace(ch); };
153     } else {
154         is_ws = [](char ch) { return IsNonNewlineWS(ch); };
155     }
156     if (!is_ws(output_[pos])) {
157         return 0;
158     }
159     int length_of_spaces = 0;
160 
161     int start_pos = pos;
162     int end_pos = pos;
163     while (start_pos > 0 && is_ws(output_[start_pos - 1])) {
164         start_pos--;
165     }
166 
167     // int_size - 2 can be negative, and output_.size() is unsigned,
168     // cast to make the comparison work.
169     int int_size = static_cast<int>(output_.size());
170     while (end_pos <= int_size - 2 &&
171            is_ws(output_[end_pos + 1])) {
172         end_pos++;
173     }
174 
175     length_of_spaces = end_pos - start_pos + 1;
176     int num_deleted_spaces =
177         std::max(length_of_spaces - leave_this_many, 0);
178     output_.erase(start_pos, num_deleted_spaces);
179     return num_deleted_spaces;
180 }
181 
182 // Assumption: Trailing WS has been stripped, spaces have been changed to ' '
183 // Rules:
184 //  - No non-' ' or '\n' whitespace
185 //  - One ws token before / after every ws-requiring character
186 //  - No non-newline ws before / after characters that don't want it.
187 //  - "->" operators are never at the end of the line.
RegularizeSpaces(bool & ws_required_next)188 void FormattingTreeVisitor::Segment::RegularizeSpaces(bool& ws_required_next) {
189     bool last_char_required_ws = false;
190 
191     // Check if this is still true from the last node.
192     if (ws_required_next) {
193         output_.insert(0, " ");
194         ws_required_next = false;
195     }
196 
197     for (int i = 0; i < output_.size(); i++) {
198         // If it is a comment, jump to EOL.
199         MaybeWindPastComment(output_, i);
200 
201         // If we see "->\n", change it to "\n->".
202         const char arrow_nl[] = "->\n";
203         if (output_.compare(i, strlen(arrow_nl), arrow_nl) == 0) {
204             output_.replace(i, strlen(arrow_nl), "\n->");
205             i -= EraseMultipleSpacesAt(i - 1, 0);
206         }
207 
208         // Erase multiple spaces
209         EraseMultipleSpacesAt(i);
210 
211         // Ensure whitespace around certain characters
212         if (RequiresWSBeforeChar(output_[i])) {
213             if (i == 0 || !isspace(output_[i - 1])) {
214                 output_.insert(i, " ");
215                 i++;
216             }
217         }
218 
219         // This is a little weird.  '(' requires ws if it follows an
220         // arrow, but not if it follows a method name.  Both of these
221         // are in interface method definitions, so this ends up being
222         // slightly easier than having it positionally defined during
223         // AST traversal.
224         if (output_[i] == '(') {
225             if (!last_char_required_ws && i > 0) {
226                 i -= EraseMultipleSpacesAt(i - 1, 0);
227             }
228         }
229 
230         // Ensure no whitespace around other characters
231         if (NoSpacesBeforeChar(output_[i])) {
232             if (i > 0) {
233                 i -= EraseMultipleSpacesAt(i - 1, 0, NoWSBeforeChar(output_[i]));
234             }
235         }
236 
237         // We don't want whitespace after these characters... unless there is a
238         // comment after the WS.
239         int j;
240         for (j = i + 1;
241              j < output_.size() && IsNonNewlineWS(output_[j]);
242              j++)
243             ;
244         if (NoWSAfterChar(output_[i]) &&
245             !IsStartOfComment(output_, j)) {
246             EraseMultipleSpacesAt(i + 1, 0);
247         }
248 
249         // The following clause is figuring out whether the next
250         // iteration requires ws, so we need to keep it past anything
251         // that uses that information in the loop.
252         if (RequiresWSAfterChar(output_[i])) {
253             if (i != output_.size() - 1 && !isspace(output_[i + 1])) {
254                 output_.insert(i + 1, " ");
255                 i++;
256             }
257             last_char_required_ws = true;
258         } else {
259             if (!isspace(output_[i])) {
260                 last_char_required_ws = false;
261             }
262         }
263     }
264     ws_required_next = last_char_required_ws;
265 }
266 
267 // Rules are mostly obvious, but see TrackMethodInterfaceAlignment below.
268 // Precondition: By now, everything should have had its leading ws
269 // stripped, and } characters are the first things on their own lines.
Indent(int & current_nesting)270 void FormattingTreeVisitor::Segment::Indent(int& current_nesting) {
271     for (int i = 0; i < output_.size(); i++) {
272         if (output_[i] == '\n') {
273             // Don't indent a blank line.
274             if (output_[i + 1] == '\n') {
275                 continue;
276             }
277             // If this is an outdent line, do that.
278             if (output_[i + 1] == '}') {
279                 current_nesting--;
280             }
281             int indent = current_nesting * kIndentSpaces;
282             if (visitor_->newline_means_indent_more_) {
283                 if (visitor_->interface_method_alignment_ && visitor_->interface_method_alignment_size_ > -1) {
284                     indent = visitor_->interface_method_alignment_size_;
285                 } else {
286                     indent += kIndentSpaces;
287                 }
288             }
289             output_.insert(i + 1, indent, ' ');
290         }
291 
292         int pos = i;
293         // Skip comments at this point, because we don't want to
294         // increase nesting based on a '{' character in a comment. :)
295         MaybeWindPastComment(output_, pos);
296 
297         // 1 less than pos because i will be incremented on the next
298         // iteration.  But that means it is a real character, so we need
299         // to skip testing that character to see if it changes the
300         // nesting level.
301         if (pos != i) {
302             i = pos - 1;
303             continue;
304         }
305 
306         if (output_[i] == '{') {
307             current_nesting++;
308         }
309         if (output_[i] == ')') {
310             visitor_->interface_method_alignment_size_ = visitor_->offset_of_first_id_;
311         }
312         if (output_[i] == ';') {
313             visitor_->interface_method_alignment_size_ = -1;
314             visitor_->interface_method_alignment_ = false;
315             visitor_->newline_means_indent_more_ = false;
316         }
317     }
318 }
319 
320 // The purpose of this method is to figure out what the indentation will be if
321 // we encounter a newline.  The rule is :
322 //  - If there isn't a parameter on the same line after the '(' character, +1
323 //    indent past the beginning of the method name.
324 //  - If there is a parameter on the same line after the '(' character,
325 //    align at the same vertical column as that parameter.
TrackInterfaceMethodAlignment(const std::string & str)326 void FormattingTreeVisitor::TrackInterfaceMethodAlignment(const std::string& str) {
327     static std::locale c_locale("C");
328     if (interface_method_alignment_) {
329         for (int i = 0; i < str.size(); i++) {
330             MaybeWindPastComment(str, i);
331 
332             char ch = str[i];
333             if (ch == '\n') {
334                 distance_from_last_newline_ = 0;
335             } else {
336                 distance_from_last_newline_++;
337             }
338 
339             // This figures out if we are supposed to align to the '(' or the
340             // method name.
341             if (ch == '(') {
342                 bool align_on_oparen = false;
343                 for (int j = i + 1; j < str.size(); j++) {
344                     MaybeWindPastComment(str, j);
345                     if (str[j] == '\n')
346                         break;
347                     if (!IsNonNewlineWS(str[j]))
348                         align_on_oparen = true;
349                 }
350                 if (align_on_oparen) {
351                     interface_method_alignment_size_ = distance_from_last_newline_;
352                 }
353             }
354 
355             if (!has_ordinal_) {
356                 if (isalpha(ch, c_locale) &&
357                     interface_method_alignment_size_ == -1) {
358                     // This should be the method identifier.
359                     offset_of_first_id_ =
360                         interface_method_alignment_size_ =
361                             distance_from_last_newline_ + kIndentSpaces - 1;
362                 }
363             } else {
364                 // TODO(FIDL-372): remove this branch.  It is only relevant when
365                 // there are explicit ordinals.
366                 // This tracks the distance from the beginning of the method
367                 // name, in case we need it (i.e., in case we don't indent to
368                 // the '(' character.
369                 if (!isspace(ch) && next_nonws_char_is_checkpoint_) {
370                     offset_of_first_id_ =
371                         interface_method_alignment_size_ =
372                             distance_from_last_newline_ + kIndentSpaces - 1;
373                     next_nonws_char_is_checkpoint_ = false;
374                 }
375                 if (str[i] == ':' && interface_method_alignment_size_ == -1) {
376                     // The first ':' we see - means it is the gap after the
377                     // ordinal.  The next thing we see is the method name, so
378                     // that might become the indentation level.
379                     next_nonws_char_is_checkpoint_ = true;
380                 }
381             }
382         }
383     }
384 }
385 
OnFile(std::unique_ptr<fidl::raw::File> const & element)386 void FormattingTreeVisitor::OnFile(std::unique_ptr<fidl::raw::File> const& element) {
387     // Eat ws at the beginning of the file.
388     fidl::Token real_start = element->start_;
389     fidl::StringView start_view = real_start.previous_end().data();
390     const char* start_ptr = start_view.data();
391     int initial_length = start_view.size();
392     size_t offset = strspn(start_ptr, kWsCharacters);
393     fidl::StringView processed_file_start(
394         start_ptr + offset, initial_length - offset);
395     element->start_.set_previous_end(
396         fidl::SourceLocation(processed_file_start, real_start.previous_end().source_file()));
397 
398     fidl::raw::DeclarationOrderTreeVisitor::OnFile(element);
399 }
400 
401 } // namespace raw
402 } // namespace fidl
403