1 // Copyright 2018 The Fuchsia Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef ZIRCON_SYSTEM_HOST_FIDL_INCLUDE_FIDL_FORMATTER_H_ 6 #define ZIRCON_SYSTEM_HOST_FIDL_INCLUDE_FIDL_FORMATTER_H_ 7 8 #include <memory> 9 #include <regex> 10 #include <sstream> 11 12 #include "tree_visitor.h" 13 14 // A TreeVisitor (see tree_visitor.h) that pretty-prints FIDL code. 15 16 namespace fidl { 17 namespace raw { 18 19 class ScopedBool { 20 public: 21 ScopedBool(bool& source, bool value = true) prev_value_(source)22 : prev_value_(source), source_(source) { 23 source_ = value; 24 } ~ScopedBool()25 ~ScopedBool() { 26 source_ = prev_value_; 27 } 28 29 private: 30 bool prev_value_; 31 bool& source_; 32 }; 33 34 class ScopedIncrement { 35 public: ScopedIncrement(int & source)36 ScopedIncrement(int& source) 37 : source_(source) { 38 source_++; 39 } ~ScopedIncrement()40 ~ScopedIncrement() { 41 source_--; 42 } 43 44 private: 45 int& source_; 46 }; 47 48 // A Visitor that pretty prints its AST and makes its result available via 49 // formatted_output(). 50 // 51 // Implementation note: The visitor mostly does the same thing on every node, 52 // which is encapsulated in OnSourceElementShared. Where we have overridden a 53 // particular node's visitor, it usually means that slightly different behavior 54 // is needed for that language construct. For example, using and const 55 // declarations respect leading blank lines if they are already there, and 56 // struct, enum, and interface declarations require leading blank lines. 57 class FormattingTreeVisitor : public DeclarationOrderTreeVisitor { 58 public: FormattingTreeVisitor()59 FormattingTreeVisitor() 60 : last_source_location_(nullptr) {} 61 OnInterfaceDeclaration(std::unique_ptr<InterfaceDeclaration> const & element)62 virtual void OnInterfaceDeclaration(std::unique_ptr<InterfaceDeclaration> const& element) override { 63 OnBlankLineRequiringNode(); 64 TreeVisitor::OnInterfaceDeclaration(element); 65 } 66 OnSourceElementStart(const SourceElement & element)67 virtual void OnSourceElementStart(const SourceElement& element) override { 68 OnSourceElementShared(element.start_); 69 } 70 OnSourceElementEnd(const SourceElement & element)71 virtual void OnSourceElementEnd(const SourceElement& element) override { 72 OnSourceElementShared(element.end_); 73 } 74 OnAttributeList(std::unique_ptr<AttributeList> const & element)75 virtual void OnAttributeList(std::unique_ptr<AttributeList> const& element) override { 76 // Disabling these in case we're in an interface method and it thinks 77 // the next line needs to be indented more. We don't want an indent 78 // after a newline following an attribute list. It will be reenabled by 79 // the next visited AST node in the method. 80 newline_means_indent_more_ = false; 81 interface_method_alignment_ = false; 82 // This prevents the above from being reenabled during our walk of the 83 // AttributeList 84 ScopedBool suppress(is_member_decl_, false); 85 TreeVisitor::OnAttributeList(element); 86 } 87 OnUsing(std::unique_ptr<Using> const & element)88 virtual void OnUsing(std::unique_ptr<Using> const& element) override { 89 OnBlankLineRespectingNode(); 90 ScopedBool mem(is_member_decl_); 91 TreeVisitor::OnUsing(element); 92 } 93 OnConstDeclaration(std::unique_ptr<ConstDeclaration> const & element)94 virtual void OnConstDeclaration(std::unique_ptr<ConstDeclaration> const& element) override { 95 OnBlankLineRespectingNode(); 96 ScopedBool mem(is_member_decl_); 97 TreeVisitor::OnConstDeclaration(element); 98 } 99 OnEnumMember(std::unique_ptr<EnumMember> const & element)100 virtual void OnEnumMember(std::unique_ptr<EnumMember> const& element) override { 101 OnBlankLineRespectingNode(); 102 ScopedBool mem(is_member_decl_); 103 TreeVisitor::OnEnumMember(element); 104 } 105 OnEnumDeclaration(std::unique_ptr<EnumDeclaration> const & element)106 virtual void OnEnumDeclaration(std::unique_ptr<EnumDeclaration> const& element) override { 107 OnBlankLineRequiringNode(); 108 TreeVisitor::OnEnumDeclaration(element); 109 } 110 OnInterfaceMethod(std::unique_ptr<InterfaceMethod> const & element)111 virtual void OnInterfaceMethod(std::unique_ptr<InterfaceMethod> const& element) override { 112 interface_method_alignment_ = true; 113 interface_method_alignment_size_ = -1; 114 next_nonws_char_is_checkpoint_ = false; 115 OnBlankLineRespectingNode(); 116 ScopedBool before(blank_space_before_colon_, false); 117 ScopedBool mem(is_member_decl_); 118 ScopedBool has_ordinal(has_ordinal_, element->ordinal != nullptr); 119 TreeVisitor::OnInterfaceMethod(element); 120 } 121 OnStructDeclaration(std::unique_ptr<StructDeclaration> const & element)122 virtual void OnStructDeclaration(std::unique_ptr<StructDeclaration> const& element) override { 123 OnBlankLineRequiringNode(); 124 TreeVisitor::OnStructDeclaration(element); 125 } 126 OnStructMember(std::unique_ptr<StructMember> const & element)127 virtual void OnStructMember(std::unique_ptr<StructMember> const& element) override { 128 OnBlankLineRespectingNode(); 129 ScopedBool mem(is_member_decl_); 130 TreeVisitor::OnStructMember(element); 131 } 132 OnTableDeclaration(std::unique_ptr<TableDeclaration> const & element)133 virtual void OnTableDeclaration(std::unique_ptr<TableDeclaration> const& element) override { 134 OnBlankLineRequiringNode(); 135 TreeVisitor::OnTableDeclaration(element); 136 } 137 OnTableMember(std::unique_ptr<TableMember> const & element)138 virtual void OnTableMember(std::unique_ptr<TableMember> const& element) override { 139 OnBlankLineRespectingNode(); 140 ScopedBool mem(is_member_decl_); 141 ScopedBool before_colon(blank_space_before_colon_, false); 142 ScopedBool after_colon(blank_space_after_colon_, true); 143 TreeVisitor::OnTableMember(element); 144 } 145 OnUnionDeclaration(std::unique_ptr<UnionDeclaration> const & element)146 virtual void OnUnionDeclaration(std::unique_ptr<UnionDeclaration> const& element) override { 147 OnBlankLineRequiringNode(); 148 TreeVisitor::OnUnionDeclaration(element); 149 } 150 OnUnionMember(std::unique_ptr<UnionMember> const & element)151 virtual void OnUnionMember(std::unique_ptr<UnionMember> const& element) override { 152 ScopedBool mem(is_member_decl_); 153 OnBlankLineRespectingNode(); 154 TreeVisitor::OnUnionMember(element); 155 } 156 OnType(std::unique_ptr<Type> const & element)157 virtual void OnType(std::unique_ptr<Type> const& element) override { 158 ScopedIncrement si(nested_type_depth_); 159 ScopedBool before_colon(blank_space_before_colon_, false); 160 ScopedBool after_colon(blank_space_after_colon_, false); 161 TreeVisitor::OnType(element); 162 } 163 164 virtual void OnFile(std::unique_ptr<File> const& element) override; 165 formatted_output()166 std::string* formatted_output() { return &formatted_output_; } 167 168 private: 169 const char* last_source_location_; 170 171 static constexpr int kIndentSpaces = 4; 172 173 static constexpr char kWsCharacters[] = " \t\n\v\f\r"; 174 175 static constexpr char kWsCharactersNoNewline[] = " \t\v\f\r"; 176 177 std::string formatted_output_; 178 179 // Indentations can be caused by nesting in the AST (e.g., if you've started 180 // a compound statement) or, in some situations, if there is a newline in 181 // the code (e.g., if you've decided to line break before the "->" operator 182 // in the middle of an interface definition). 183 184 // What follows are adjustable properties. We flip them from true to false 185 // at various points in the tree walk depending on what behavior we want. 186 187 // When this is true, you get a blank line and indentation at the end of the 188 // segment. This is true for top level decls that *require* blank lines: 189 // e.g., structs, unions, and interfaces. 190 bool blank_line_requiring_node_ = false; 191 192 // When this is true, you get a blank line and indentation at the end of the 193 // segment if there is already a blank line there. This is true for consts 194 // and using declarations. 195 bool blank_line_respecting_node_ = false; 196 197 // When this is true, and a newline happens, you get an additional 198 // indentation. 199 bool newline_means_indent_more_ = false; 200 201 // This is true in decl headers, but not after the ordinal in an interface 202 // method or in the element count for relevant types. 203 bool blank_space_before_colon_ = true; 204 205 // This is true in decl headers and after the ordinal in an interface 206 // method, but not in the element count for relevant types. 207 bool blank_space_after_colon_ = true; 208 209 // > characters require spaces after them unless you are in the middle of a 210 // parameterized type: arrays, vectors, handles, and requests. 211 int nested_type_depth_ = 0; 212 213 // Interface methods have fancy alignment: if the last open parenthesis was 214 // at EOL, indentation is to the column that had beginning of method name + 215 // kIndentSpaces. Otherwise, it is to the column with the last open 216 // parenthesis + 1. 217 bool interface_method_alignment_ = false; 218 int interface_method_alignment_size_ = -1; 219 int distance_from_last_newline_ = 0; 220 int offset_of_first_id_ = 0; 221 bool next_nonws_char_is_checkpoint_ = false; 222 223 // When we complete a node and know the next thing needs to be whitespace 224 bool ws_required_next_ = false; 225 226 // Some helper methods. IsNonNewlineWS(char ch)227 static bool IsNonNewlineWS(char ch) { 228 return strchr(kWsCharactersNoNewline, ch) != nullptr; 229 } 230 IsStartOfBlankLine(const std::string & str,int offset)231 static bool IsStartOfBlankLine(const std::string& str, int offset) { 232 for (int i = offset; i < str.size() && str[i] != '\n'; i++) { 233 if (!IsNonNewlineWS(str[i])) { 234 return false; 235 } 236 } 237 return true; 238 } 239 IsStartOfComment(std::string str,int i)240 static bool IsStartOfComment(std::string str, int i) { 241 return (i < str.size() - 1) && str[i] == '/' && str[i + 1] == '/'; 242 } 243 244 // If the string str at offset pos is the beginning of a comment, pos is 245 // modified to be the newline character at EOL. MaybeWindPastComment(std::string str,int & pos)246 static void MaybeWindPastComment(std::string str, int& pos) { 247 if (IsStartOfComment(str, pos)) { 248 while (pos < str.size() && str[pos] != '\n') { 249 pos++; 250 } 251 } 252 } 253 254 // A "Segment" is a part of the source that we format - it extends from the 255 // end of the previously formatted AST node to the end of the first token 256 // represented in this AST node. 257 class Segment { 258 public: Segment(std::string input,FormattingTreeVisitor * outer)259 Segment(std::string input, FormattingTreeVisitor* outer) 260 : output_(input), visitor_(outer) {} 261 RemoveTrailingWS()262 void RemoveTrailingWS() { 263 std::string re("["); 264 re += kWsCharactersNoNewline; 265 re += "]+\n"; 266 output_ = std::regex_replace(output_, std::regex(re), "\n"); 267 } 268 269 // Removes all of the whitespace at the beginning of every line. Will 270 // add back indentation later. RemoveLeadingWS()271 void RemoveLeadingWS() { 272 std::string re("\n["); 273 re += kWsCharactersNoNewline; 274 re += "]+"; 275 output_ = std::regex_replace(output_, std::regex(re), "\n"); 276 } 277 278 void RemoveExtraBlankLines(bool respects_trailing_blankline); 279 280 void InsertRequiredNewlines(bool is_top_level); 281 282 // No non-' ' or '\n' whitespace 283 // One ws token before / after every ws-requiring character 284 // No non-newline ws before / after characters that don't want it. 285 void RegularizeSpaces(bool& ws_required_next); 286 287 // Precondition: By now, everything should have had its leading ws 288 // stripped, and } characters are the first things on their own lines. 289 void Indent(int& current_nesting); 290 GetOutput()291 std::string GetOutput() { 292 return output_; 293 } 294 295 private: RequiresWSBeforeChar(char ch)296 bool RequiresWSBeforeChar(char ch) { 297 return (ch == '{') || 298 (ch == '=') || 299 (visitor_->blank_space_before_colon_ && ch == ':'); 300 } 301 NoSpacesBeforeChar(char ch)302 bool NoSpacesBeforeChar(char ch) { 303 return NoWSBeforeChar(ch) || 304 (ch == ')') || 305 (ch == '?') || 306 (!visitor_->blank_space_before_colon_ && ch == ':') || 307 (visitor_->nested_type_depth_ > 0 && ch == '>'); 308 } 309 NoWSBeforeChar(char ch)310 bool NoWSBeforeChar(char ch) { 311 return (ch == ';'); 312 } 313 RequiresWSAfterChar(char ch)314 bool RequiresWSAfterChar(char ch) { 315 return (ch == '=') || 316 (ch == ',') || 317 (ch == '>' && (visitor_->nested_type_depth_ <= 1)) || 318 (ch == ':' && visitor_->blank_space_after_colon_); 319 } 320 NoWSAfterChar(char ch)321 bool NoWSAfterChar(char ch) { 322 return (ch == ':' && !visitor_->blank_space_after_colon_) || (ch == '('); 323 } 324 325 // As specified on the tin: erases multiple spaces from output_ at 326 // offset pos. Optional |leave_this_many| parameter indicates number of 327 // spaces to leave; default is 1. Optional |incl_newline| param 328 // indicates to erase newline characters as well as other WS. 329 // It returns the number of spaces that were deleted. 330 int EraseMultipleSpacesAt(int pos, int leave_this_many = 1, bool incl_newline = false); 331 332 std::string output_; 333 FormattingTreeVisitor* visitor_; 334 }; 335 336 int current_nesting_ = 0; 337 FormatAndPrintSegment(const std::string & segment)338 std::string FormatAndPrintSegment(const std::string& segment) { 339 Segment seg(segment, this); 340 seg.RemoveTrailingWS(); 341 seg.RemoveLeadingWS(); 342 seg.RemoveExtraBlankLines(blank_line_respecting_node_); 343 seg.RegularizeSpaces(ws_required_next_); 344 seg.InsertRequiredNewlines(blank_line_requiring_node_); 345 seg.Indent(current_nesting_); 346 347 std::string output = seg.GetOutput(); 348 349 // We've respected prior blank lines for this decl (if it is a decl), 350 // and we should stop now. 351 blank_line_requiring_node_ = false; 352 blank_line_respecting_node_ = false; 353 354 // If this was the start of a member decl, it was indented by 355 // kIndentSpaces. Any other newlines inside the member decl should be 356 // indented additionally. 357 if (is_member_decl_) { 358 interface_method_alignment_ = true; 359 newline_means_indent_more_ = true; 360 } 361 362 return output; 363 } 364 OnBlankLineRequiringNode()365 void OnBlankLineRequiringNode() { 366 blank_line_requiring_node_ = true; 367 blank_line_respecting_node_ = true; 368 } 369 OnBlankLineRespectingNode()370 void OnBlankLineRespectingNode() { 371 blank_line_respecting_node_ = true; 372 } 373 374 bool is_member_decl_ = false; 375 376 // Does the current member have an explicit ordinal? 377 bool has_ordinal_ = false; 378 379 // str is a gap plus the next meaningful token. 380 void TrackInterfaceMethodAlignment(const std::string& str); 381 OnSourceElementShared(const fidl::Token & current_token)382 void OnSourceElementShared(const fidl::Token& current_token) { 383 const char* ws_location = current_token.previous_end().data().data(); 384 // Printed code must increase in monotonic order, for two reasons. 385 // First of all, we don't reorder anything. Second of all, the start 386 // token for an identifier list (for example) is the same as the start 387 // token for the first identifier, so we need to make sure we don't 388 // print that token twice. 389 if (ws_location > last_source_location_) { 390 int size = (int)(current_token.data().data() - current_token.previous_end().data().data()); 391 std::string gap(ws_location, size); 392 std::string content(current_token.data().data(), current_token.data().size()); 393 std::string total_string = FormatAndPrintSegment(gap + content); 394 TrackInterfaceMethodAlignment(total_string); 395 formatted_output_ += total_string; 396 last_source_location_ = ws_location; 397 } 398 } 399 }; 400 401 class OrdinalRemovalVisitor : public fidl::raw::DeclarationOrderTreeVisitor { 402 public: OrdinalRemovalVisitor()403 OrdinalRemovalVisitor() {} OnInterfaceMethod(std::unique_ptr<fidl::raw::InterfaceMethod> const & element)404 virtual void OnInterfaceMethod( 405 std::unique_ptr<fidl::raw::InterfaceMethod> const& element) override { 406 if (element->ordinal != nullptr) { 407 const char* start = element->ordinal->start_.data().data(); 408 const char* end = element->ordinal->end_.data().data() + 409 element->ordinal->end_.data().size(); 410 for (char* ptr = const_cast<char*>(start); ptr < end; ptr++) { 411 // Don't erase comments in the middle of ordinals; 412 if (strncmp("//", ptr, 2) == 0) { 413 while (*ptr != '\n' && ptr < end) { 414 ptr++; 415 } 416 continue; 417 } 418 *ptr = ' '; 419 } 420 421 // Incorporate the whitespace that came before the ordinal into the 422 // whitespace before the identifier. 423 element->identifier->start_.set_previous_end( 424 element->ordinal->start_.previous_end()); 425 element->identifier->end_.set_previous_end( 426 element->ordinal->start_.previous_end()); 427 428 // If there are no attributes, the identifier is now the start of 429 // the method. 430 if (element->attributes == nullptr) 431 element->start_ = element->identifier->start_; 432 433 element->ordinal.reset(nullptr); 434 } 435 DeclarationOrderTreeVisitor::OnInterfaceMethod(element); 436 } 437 }; 438 439 } // namespace raw 440 } // namespace fidl 441 442 #endif // ZIRCON_SYSTEM_HOST_FIDL_INCLUDE_FIDL_FORMATTER_H_ 443