1 // Copyright 2018 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // The implementation for the FormattingTreeVisitor that pretty-prints FIDL code.
6 #include <locale>
7 #include <map>
8 #include <regex>
9 #include <set>
10 #include <string>
11
12 #include "fidl/formatter.h"
13
14 namespace fidl {
15 namespace raw {
16
17 // Rules:
18 // No more than one blank line in a row.
19 // Keep blank lines before and after comments.
20 // Will add newlines before top level declarations later.
RemoveExtraBlankLines(bool respects_trailing_blankline)21 void FormattingTreeVisitor::Segment::RemoveExtraBlankLines(bool respects_trailing_blankline) {
22 std::set<int> blank_line_indices;
23 std::set<int> comment_line_indices;
24 std::map<int, int> line_offsets;
25
26 // First, find all of the blank lines and comment lines.
27 int line_num = 0;
28 line_offsets[line_num++] = 0;
29 for (int i = 0; i < output_.size(); i++) {
30 if (output_[i] == '\n' && i + 1 != output_.size()) {
31 line_offsets[line_num] = i + 1;
32 bool is_blank = true;
33 bool is_comment = false;
34 for (int j = i + 1;
35 j < output_.size() && output_[j] != '\n'; j++) {
36 if (!IsNonNewlineWS(output_[j])) {
37 is_blank = false;
38 }
39 if (IsStartOfComment(output_, j)) {
40 is_comment = true;
41 }
42 }
43 if (is_blank) {
44 blank_line_indices.insert(line_num);
45 }
46 if (is_comment) {
47 comment_line_indices.insert(line_num);
48 }
49 line_num++;
50 }
51 }
52
53 int last_line_num = line_num - 1;
54
55 // Next, get rid of any blank line that isn't next to a comment or,
56 // if we are respecting trailing blank lines, right before the end.
57 // Make an exception if the previous line is blank - i.e., coalesce
58 // multiple blank lines. Work backwards so we don't screw up line
59 // numbers.
60 for (auto i = blank_line_indices.rbegin();
61 i != blank_line_indices.rend();
62 ++i) {
63 int line_num = *i;
64 bool is_next_to_comment =
65 comment_line_indices.count(line_num - 1) != 0 ||
66 comment_line_indices.count(line_num + 1) != 0;
67 bool need_to_preserve_because_last =
68 respects_trailing_blankline &&
69 line_num == last_line_num - 1;
70 bool need_to_coalesce =
71 blank_line_indices.count(line_num + 1) != 0;
72 if ((!is_next_to_comment && !need_to_preserve_because_last) ||
73 need_to_coalesce) {
74 int offset = line_offsets[line_num];
75 size_t next_newline = output_.find_first_of("\n", offset);
76 if (next_newline == std::string::npos) {
77 next_newline = output_.size();
78 }
79 output_.erase(offset, next_newline - offset + 1);
80 }
81 }
82 }
83
84 // Assumptions: Leading WS has been stripped.
85 // Rules:
86 // - newlines after ';', '{' (unless before a comment)
87 // - newlines before top-level decls (unless after a comment).
InsertRequiredNewlines(bool is_top_level)88 void FormattingTreeVisitor::Segment::InsertRequiredNewlines(bool is_top_level) {
89 // Insert lines after ';' and '{', if not already present
90 for (int i = 0; i < output_.size(); i++) {
91 MaybeWindPastComment(output_, i);
92 char ch = output_[i];
93 if (ch == ';' || ch == '{') {
94 if (i == output_.size() - 1) {
95 output_.append("\n");
96 } else {
97 size_t j = output_.find_first_not_of(kWsCharactersNoNewline, i + 1);
98 // Unless the next thing is a comment.
99 if (j != std::string::npos && !IsStartOfComment(output_, j)) {
100 // Make the next thing a newline.
101 if (IsNonNewlineWS(output_[i + 1])) {
102 output_[i + 1] = '\n';
103 } else if (output_[i + 1] != '\n') {
104 output_.insert(i + 1, "\n");
105 }
106 }
107 }
108 }
109 }
110
111 // Insert lines before top level decls.
112 if (is_top_level) {
113 // Right before the last word in this string, we need a blank line,
114 // followed by some (possibly zero) number of comment lines. So we
115 // break the string into lines, and then work backwards.
116
117 std::stringstream ss(output_);
118 std::string tmp;
119 std::vector<std::string> lines;
120
121 while (std::getline(ss, tmp, '\n')) {
122 lines.push_back(tmp);
123 }
124 std::string terminal = lines.back();
125 lines.pop_back();
126 if (lines.size() == 1) {
127 lines[0].append("\n");
128 } else {
129 // From the end of the list of lines, find the first line
130 // that isn't a comment, and insert a blank line (if it
131 // isn't already blank).
132 int i = lines.size() - 1;
133 while (i >= 0 && IsStartOfComment(lines[i], 0)) {
134 i--;
135 }
136
137 if (!IsStartOfBlankLine(lines[i], 0)) {
138 lines.insert(lines.begin() + i + 1, "");
139 }
140 }
141 output_ = "";
142 for (auto line : lines) {
143 output_ += line + "\n";
144 }
145 output_ += terminal;
146 }
147 }
148
EraseMultipleSpacesAt(int pos,int leave_this_many,bool incl_newline)149 int FormattingTreeVisitor::Segment::EraseMultipleSpacesAt(int pos, int leave_this_many, bool incl_newline) {
150 std::function<bool(char)> is_ws;
151 if (incl_newline) {
152 is_ws = [](char ch) { return isspace(ch); };
153 } else {
154 is_ws = [](char ch) { return IsNonNewlineWS(ch); };
155 }
156 if (!is_ws(output_[pos])) {
157 return 0;
158 }
159 int length_of_spaces = 0;
160
161 int start_pos = pos;
162 int end_pos = pos;
163 while (start_pos > 0 && is_ws(output_[start_pos - 1])) {
164 start_pos--;
165 }
166
167 // int_size - 2 can be negative, and output_.size() is unsigned,
168 // cast to make the comparison work.
169 int int_size = static_cast<int>(output_.size());
170 while (end_pos <= int_size - 2 &&
171 is_ws(output_[end_pos + 1])) {
172 end_pos++;
173 }
174
175 length_of_spaces = end_pos - start_pos + 1;
176 int num_deleted_spaces =
177 std::max(length_of_spaces - leave_this_many, 0);
178 output_.erase(start_pos, num_deleted_spaces);
179 return num_deleted_spaces;
180 }
181
182 // Assumption: Trailing WS has been stripped, spaces have been changed to ' '
183 // Rules:
184 // - No non-' ' or '\n' whitespace
185 // - One ws token before / after every ws-requiring character
186 // - No non-newline ws before / after characters that don't want it.
187 // - "->" operators are never at the end of the line.
RegularizeSpaces(bool & ws_required_next)188 void FormattingTreeVisitor::Segment::RegularizeSpaces(bool& ws_required_next) {
189 bool last_char_required_ws = false;
190
191 // Check if this is still true from the last node.
192 if (ws_required_next) {
193 output_.insert(0, " ");
194 ws_required_next = false;
195 }
196
197 for (int i = 0; i < output_.size(); i++) {
198 // If it is a comment, jump to EOL.
199 MaybeWindPastComment(output_, i);
200
201 // If we see "->\n", change it to "\n->".
202 const char arrow_nl[] = "->\n";
203 if (output_.compare(i, strlen(arrow_nl), arrow_nl) == 0) {
204 output_.replace(i, strlen(arrow_nl), "\n->");
205 i -= EraseMultipleSpacesAt(i - 1, 0);
206 }
207
208 // Erase multiple spaces
209 EraseMultipleSpacesAt(i);
210
211 // Ensure whitespace around certain characters
212 if (RequiresWSBeforeChar(output_[i])) {
213 if (i == 0 || !isspace(output_[i - 1])) {
214 output_.insert(i, " ");
215 i++;
216 }
217 }
218
219 // This is a little weird. '(' requires ws if it follows an
220 // arrow, but not if it follows a method name. Both of these
221 // are in interface method definitions, so this ends up being
222 // slightly easier than having it positionally defined during
223 // AST traversal.
224 if (output_[i] == '(') {
225 if (!last_char_required_ws && i > 0) {
226 i -= EraseMultipleSpacesAt(i - 1, 0);
227 }
228 }
229
230 // Ensure no whitespace around other characters
231 if (NoSpacesBeforeChar(output_[i])) {
232 if (i > 0) {
233 i -= EraseMultipleSpacesAt(i - 1, 0, NoWSBeforeChar(output_[i]));
234 }
235 }
236
237 // We don't want whitespace after these characters... unless there is a
238 // comment after the WS.
239 int j;
240 for (j = i + 1;
241 j < output_.size() && IsNonNewlineWS(output_[j]);
242 j++)
243 ;
244 if (NoWSAfterChar(output_[i]) &&
245 !IsStartOfComment(output_, j)) {
246 EraseMultipleSpacesAt(i + 1, 0);
247 }
248
249 // The following clause is figuring out whether the next
250 // iteration requires ws, so we need to keep it past anything
251 // that uses that information in the loop.
252 if (RequiresWSAfterChar(output_[i])) {
253 if (i != output_.size() - 1 && !isspace(output_[i + 1])) {
254 output_.insert(i + 1, " ");
255 i++;
256 }
257 last_char_required_ws = true;
258 } else {
259 if (!isspace(output_[i])) {
260 last_char_required_ws = false;
261 }
262 }
263 }
264 ws_required_next = last_char_required_ws;
265 }
266
267 // Rules are mostly obvious, but see TrackMethodInterfaceAlignment below.
268 // Precondition: By now, everything should have had its leading ws
269 // stripped, and } characters are the first things on their own lines.
Indent(int & current_nesting)270 void FormattingTreeVisitor::Segment::Indent(int& current_nesting) {
271 for (int i = 0; i < output_.size(); i++) {
272 if (output_[i] == '\n') {
273 // Don't indent a blank line.
274 if (output_[i + 1] == '\n') {
275 continue;
276 }
277 // If this is an outdent line, do that.
278 if (output_[i + 1] == '}') {
279 current_nesting--;
280 }
281 int indent = current_nesting * kIndentSpaces;
282 if (visitor_->newline_means_indent_more_) {
283 if (visitor_->interface_method_alignment_ && visitor_->interface_method_alignment_size_ > -1) {
284 indent = visitor_->interface_method_alignment_size_;
285 } else {
286 indent += kIndentSpaces;
287 }
288 }
289 output_.insert(i + 1, indent, ' ');
290 }
291
292 int pos = i;
293 // Skip comments at this point, because we don't want to
294 // increase nesting based on a '{' character in a comment. :)
295 MaybeWindPastComment(output_, pos);
296
297 // 1 less than pos because i will be incremented on the next
298 // iteration. But that means it is a real character, so we need
299 // to skip testing that character to see if it changes the
300 // nesting level.
301 if (pos != i) {
302 i = pos - 1;
303 continue;
304 }
305
306 if (output_[i] == '{') {
307 current_nesting++;
308 }
309 if (output_[i] == ')') {
310 visitor_->interface_method_alignment_size_ = visitor_->offset_of_first_id_;
311 }
312 if (output_[i] == ';') {
313 visitor_->interface_method_alignment_size_ = -1;
314 visitor_->interface_method_alignment_ = false;
315 visitor_->newline_means_indent_more_ = false;
316 }
317 }
318 }
319
320 // The purpose of this method is to figure out what the indentation will be if
321 // we encounter a newline. The rule is :
322 // - If there isn't a parameter on the same line after the '(' character, +1
323 // indent past the beginning of the method name.
324 // - If there is a parameter on the same line after the '(' character,
325 // align at the same vertical column as that parameter.
TrackInterfaceMethodAlignment(const std::string & str)326 void FormattingTreeVisitor::TrackInterfaceMethodAlignment(const std::string& str) {
327 static std::locale c_locale("C");
328 if (interface_method_alignment_) {
329 for (int i = 0; i < str.size(); i++) {
330 MaybeWindPastComment(str, i);
331
332 char ch = str[i];
333 if (ch == '\n') {
334 distance_from_last_newline_ = 0;
335 } else {
336 distance_from_last_newline_++;
337 }
338
339 // This figures out if we are supposed to align to the '(' or the
340 // method name.
341 if (ch == '(') {
342 bool align_on_oparen = false;
343 for (int j = i + 1; j < str.size(); j++) {
344 MaybeWindPastComment(str, j);
345 if (str[j] == '\n')
346 break;
347 if (!IsNonNewlineWS(str[j]))
348 align_on_oparen = true;
349 }
350 if (align_on_oparen) {
351 interface_method_alignment_size_ = distance_from_last_newline_;
352 }
353 }
354
355 if (!has_ordinal_) {
356 if (isalpha(ch, c_locale) &&
357 interface_method_alignment_size_ == -1) {
358 // This should be the method identifier.
359 offset_of_first_id_ =
360 interface_method_alignment_size_ =
361 distance_from_last_newline_ + kIndentSpaces - 1;
362 }
363 } else {
364 // TODO(FIDL-372): remove this branch. It is only relevant when
365 // there are explicit ordinals.
366 // This tracks the distance from the beginning of the method
367 // name, in case we need it (i.e., in case we don't indent to
368 // the '(' character.
369 if (!isspace(ch) && next_nonws_char_is_checkpoint_) {
370 offset_of_first_id_ =
371 interface_method_alignment_size_ =
372 distance_from_last_newline_ + kIndentSpaces - 1;
373 next_nonws_char_is_checkpoint_ = false;
374 }
375 if (str[i] == ':' && interface_method_alignment_size_ == -1) {
376 // The first ':' we see - means it is the gap after the
377 // ordinal. The next thing we see is the method name, so
378 // that might become the indentation level.
379 next_nonws_char_is_checkpoint_ = true;
380 }
381 }
382 }
383 }
384 }
385
OnFile(std::unique_ptr<fidl::raw::File> const & element)386 void FormattingTreeVisitor::OnFile(std::unique_ptr<fidl::raw::File> const& element) {
387 // Eat ws at the beginning of the file.
388 fidl::Token real_start = element->start_;
389 fidl::StringView start_view = real_start.previous_end().data();
390 const char* start_ptr = start_view.data();
391 int initial_length = start_view.size();
392 size_t offset = strspn(start_ptr, kWsCharacters);
393 fidl::StringView processed_file_start(
394 start_ptr + offset, initial_length - offset);
395 element->start_.set_previous_end(
396 fidl::SourceLocation(processed_file_start, real_start.previous_end().source_file()));
397
398 fidl::raw::DeclarationOrderTreeVisitor::OnFile(element);
399 }
400
401 } // namespace raw
402 } // namespace fidl
403