1 // Copyright 2018 The Fuchsia Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef ZIRCON_SYSTEM_HOST_BANJO_INCLUDE_BANJO_FORMATTER_H_ 6 #define ZIRCON_SYSTEM_HOST_BANJO_INCLUDE_BANJO_FORMATTER_H_ 7 8 #include <memory> 9 #include <regex> 10 #include <sstream> 11 12 #include "tree_visitor.h" 13 14 // A TreeVisitor (see tree_visitor.h) that pretty-prints BANJO code. 15 16 namespace banjo { 17 namespace raw { 18 19 class ScopedBool { 20 public: 21 ScopedBool(bool& source, bool value = true) prev_value_(source)22 : prev_value_(source), source_(source) { 23 source_ = value; 24 } ~ScopedBool()25 ~ScopedBool() { 26 source_ = prev_value_; 27 } 28 29 private: 30 bool prev_value_; 31 bool& source_; 32 }; 33 34 class ScopedIncrement { 35 public: ScopedIncrement(int & source)36 ScopedIncrement(int& source) 37 : source_(source) { 38 source_++; 39 } ~ScopedIncrement()40 ~ScopedIncrement() { 41 source_--; 42 } 43 44 private: 45 int& source_; 46 }; 47 48 // A Visitor that pretty prints its AST and makes its result available via 49 // formatted_output(). 50 // 51 // Implementation note: The visitor mostly does the same thing on every node, 52 // which is encapsulated in OnSourceElementShared. Where we have overridden a 53 // particular node's visitor, it usually means that slightly different behavior 54 // is needed for that language construct. For example, using and const 55 // declarations respect leading blank lines if they are already there, and 56 // struct, enum, and interface declarations require leading blank lines. 57 class FormattingTreeVisitor : public DeclarationOrderTreeVisitor { 58 public: FormattingTreeVisitor()59 FormattingTreeVisitor() 60 : last_source_location_(nullptr) {} 61 OnInterfaceDeclaration(std::unique_ptr<InterfaceDeclaration> const & element)62 virtual void OnInterfaceDeclaration(std::unique_ptr<InterfaceDeclaration> const& element) override { 63 OnBlankLineRequiringNode(); 64 TreeVisitor::OnInterfaceDeclaration(element); 65 } 66 OnSourceElementStart(const SourceElement & element)67 virtual void OnSourceElementStart(const SourceElement& element) override { 68 OnSourceElementShared(element.start_); 69 } 70 OnSourceElementEnd(const SourceElement & element)71 virtual void OnSourceElementEnd(const SourceElement& element) override { 72 OnSourceElementShared(element.end_); 73 } 74 OnAttributeList(std::unique_ptr<AttributeList> const & element)75 virtual void OnAttributeList(std::unique_ptr<AttributeList> const& element) override { 76 // Disabling these in case we're in an interface method and it thinks 77 // the next line needs to be indented more. We don't want an indent 78 // after a newline following an attribute list. It will be reenabled by 79 // the next visited AST node in the method. 80 newline_means_indent_more_ = false; 81 interface_method_alignment_ = false; 82 // This prevents the above from being reenabled during our walk of the 83 // AttributeList 84 ScopedBool suppress(is_member_decl_, false); 85 TreeVisitor::OnAttributeList(element); 86 } 87 OnUsing(std::unique_ptr<Using> const & element)88 virtual void OnUsing(std::unique_ptr<Using> const& element) override { 89 OnBlankLineRespectingNode(); 90 ScopedBool mem(is_member_decl_); 91 TreeVisitor::OnUsing(element); 92 } 93 OnConstDeclaration(std::unique_ptr<ConstDeclaration> const & element)94 virtual void OnConstDeclaration(std::unique_ptr<ConstDeclaration> const& element) override { 95 OnBlankLineRespectingNode(); 96 ScopedBool mem(is_member_decl_); 97 TreeVisitor::OnConstDeclaration(element); 98 } 99 OnEnumMember(std::unique_ptr<EnumMember> const & element)100 virtual void OnEnumMember(std::unique_ptr<EnumMember> const& element) override { 101 OnBlankLineRespectingNode(); 102 ScopedBool mem(is_member_decl_); 103 TreeVisitor::OnEnumMember(element); 104 } 105 OnEnumDeclaration(std::unique_ptr<EnumDeclaration> const & element)106 virtual void OnEnumDeclaration(std::unique_ptr<EnumDeclaration> const& element) override { 107 OnBlankLineRequiringNode(); 108 TreeVisitor::OnEnumDeclaration(element); 109 } 110 OnInterfaceMethod(std::unique_ptr<InterfaceMethod> const & element)111 virtual void OnInterfaceMethod(std::unique_ptr<InterfaceMethod> const& element) override { 112 interface_method_alignment_ = true; 113 interface_method_alignment_size_ = -1; 114 next_nonws_char_is_checkpoint_ = false; 115 OnBlankLineRespectingNode(); 116 ScopedBool before(blank_space_before_colon_, false); 117 ScopedBool mem(is_member_decl_); 118 TreeVisitor::OnInterfaceMethod(element); 119 } 120 OnStructDeclaration(std::unique_ptr<StructDeclaration> const & element)121 virtual void OnStructDeclaration(std::unique_ptr<StructDeclaration> const& element) override { 122 OnBlankLineRequiringNode(); 123 TreeVisitor::OnStructDeclaration(element); 124 } 125 OnStructMember(std::unique_ptr<StructMember> const & element)126 virtual void OnStructMember(std::unique_ptr<StructMember> const& element) override { 127 OnBlankLineRespectingNode(); 128 ScopedBool mem(is_member_decl_); 129 TreeVisitor::OnStructMember(element); 130 } 131 OnUnionDeclaration(std::unique_ptr<UnionDeclaration> const & element)132 virtual void OnUnionDeclaration(std::unique_ptr<UnionDeclaration> const& element) override { 133 OnBlankLineRequiringNode(); 134 TreeVisitor::OnUnionDeclaration(element); 135 } 136 OnUnionMember(std::unique_ptr<UnionMember> const & element)137 virtual void OnUnionMember(std::unique_ptr<UnionMember> const& element) override { 138 ScopedBool mem(is_member_decl_); 139 OnBlankLineRespectingNode(); 140 TreeVisitor::OnUnionMember(element); 141 } 142 OnType(std::unique_ptr<Type> const & element)143 virtual void OnType(std::unique_ptr<Type> const& element) override { 144 ScopedIncrement si(nested_type_depth_); 145 ScopedBool before_colon(blank_space_before_colon_, false); 146 ScopedBool after_colon(blank_space_after_colon_, false); 147 TreeVisitor::OnType(element); 148 } 149 150 virtual void OnFile(std::unique_ptr<File> const& element) override; 151 formatted_output()152 std::string* formatted_output() { return &formatted_output_; } 153 154 private: 155 const char* last_source_location_; 156 157 static constexpr int kIndentSpaces = 4; 158 159 static constexpr char kWsCharacters[] = " \t\n\v\f\r"; 160 161 static constexpr char kWsCharactersNoNewline[] = " \t\v\f\r"; 162 163 std::string formatted_output_; 164 165 // Indentations can be caused by nesting in the AST (e.g., if you've started 166 // a compound statement) or, in some situations, if there is a newline in 167 // the code (e.g., if you've decided to line break before the "->" operator 168 // in the middle of an interface definition). 169 170 // What follows are adjustable properties. We flip them from true to false 171 // at various points in the tree walk depending on what behavior we want. 172 173 // When this is true, you get a blank line and indentation at the end of the 174 // segment. This is true for top level decls that *require* blank lines: 175 // e.g., structs, unions, and interfaces. 176 bool blank_line_requiring_node_ = false; 177 178 // When this is true, you get a blank line and indentation at the end of the 179 // segment if there is already a blank line there. This is true for consts 180 // and using declarations. 181 bool blank_line_respecting_node_ = false; 182 183 // When this is true, and a newline happens, you get an additional 184 // indentation. 185 bool newline_means_indent_more_ = false; 186 187 // This is true in decl headers, but not after the ordinal in an interface 188 // method or in the element count for relevant types. 189 bool blank_space_before_colon_ = true; 190 191 // This is true in decl headers and after the ordinal in an interface 192 // method, but not in the element count for relevant types. 193 bool blank_space_after_colon_ = true; 194 195 // > characters require spaces after them unless you are in the middle of a 196 // parameterized type: arrays, vectors, handles, and requests. 197 int nested_type_depth_ = 0; 198 199 // Interface methods have fancy alignment: if the last open parenthesis was 200 // at EOL, indentation is to the column that had beginning of method name + 201 // kIndentSpaces. Otherwise, it is to the column with the last open 202 // parenthesis + 1. 203 bool interface_method_alignment_ = false; 204 int interface_method_alignment_size_ = -1; 205 int distance_from_last_newline_ = 0; 206 int offset_of_first_id_ = 0; 207 bool next_nonws_char_is_checkpoint_ = false; 208 209 // When we complete a node and know the next thing needs to be whitespace 210 bool ws_required_next_ = false; 211 212 // Some helper methods. IsNonNewlineWS(char ch)213 static bool IsNonNewlineWS(char ch) { 214 return strchr(kWsCharactersNoNewline, ch) != nullptr; 215 } 216 IsStartOfBlankLine(const std::string & str,int offset)217 static bool IsStartOfBlankLine(const std::string& str, int offset) { 218 for (int i = offset; i < str.size() && str[i] != '\n'; i++) { 219 if (!IsNonNewlineWS(str[i])) { 220 return false; 221 } 222 } 223 return true; 224 } 225 IsStartOfComment(std::string str,int i)226 static bool IsStartOfComment(std::string str, int i) { 227 return (i < str.size() - 1) && str[i] == '/' && str[i + 1] == '/'; 228 } 229 230 // If the string str at offset pos is the beginning of a comment, pos is 231 // modified to be the newline character at EOL. MaybeWindPastComment(std::string str,int & pos)232 static void MaybeWindPastComment(std::string str, int& pos) { 233 if (IsStartOfComment(str, pos)) { 234 while (pos < str.size() && str[pos] != '\n') { 235 pos++; 236 } 237 } 238 } 239 240 // A "Segment" is a part of the source that we format - it extends from the 241 // end of the previously formatted AST node to the end of the first token 242 // represented in this AST node. 243 class Segment { 244 public: Segment(std::string input,FormattingTreeVisitor * outer)245 Segment(std::string input, FormattingTreeVisitor* outer) 246 : output_(input), visitor_(outer) {} 247 RemoveTrailingWS()248 void RemoveTrailingWS() { 249 std::string re("["); 250 re += kWsCharactersNoNewline; 251 re += "]+\n"; 252 output_ = std::regex_replace(output_, std::regex(re), "\n"); 253 } 254 255 // Removes all of the whitespace at the beginning of every line. Will 256 // add back indentation later. RemoveLeadingWS()257 void RemoveLeadingWS() { 258 std::string re("\n["); 259 re += kWsCharactersNoNewline; 260 re += "]+"; 261 output_ = std::regex_replace(output_, std::regex(re), "\n"); 262 } 263 264 void RemoveExtraBlankLines(bool respects_trailing_blankline); 265 266 void InsertRequiredNewlines(bool is_top_level); 267 268 // No non-' ' or '\n' whitespace 269 // One ws token before / after every ws-requiring character 270 // No non-newline ws before / after characters that don't want it. 271 void RegularizeSpaces(bool& ws_required_next); 272 273 // Precondition: By now, everything should have had its leading ws 274 // stripped, and } characters are the first things on their own lines. 275 void Indent(int& current_nesting); 276 GetOutput()277 std::string GetOutput() { 278 return output_; 279 } 280 281 private: RequiresWSBeforeChar(char ch)282 bool RequiresWSBeforeChar(char ch) { 283 return (ch == '{') || 284 (ch == '=') || 285 (visitor_->blank_space_before_colon_ && ch == ':'); 286 } 287 NoSpacesBeforeChar(char ch)288 bool NoSpacesBeforeChar(char ch) { 289 return NoWSBeforeChar(ch) || 290 (ch == ')') || 291 (ch == '?') || 292 (!visitor_->blank_space_before_colon_ && ch == ':') || 293 (visitor_->nested_type_depth_ > 0 && ch == '>'); 294 } 295 NoWSBeforeChar(char ch)296 bool NoWSBeforeChar(char ch) { 297 return (ch == ';'); 298 } 299 RequiresWSAfterChar(char ch)300 bool RequiresWSAfterChar(char ch) { 301 return (ch == '=') || 302 (ch == ',') || 303 (ch == '>' && (visitor_->nested_type_depth_ <= 1)) || 304 (ch == ':' && visitor_->blank_space_after_colon_); 305 } 306 NoWSAfterChar(char ch)307 bool NoWSAfterChar(char ch) { 308 return (ch == ':' && !visitor_->blank_space_after_colon_) || (ch == '('); 309 } 310 311 // As specified on the tin: erases multiple spaces from output_ at 312 // offset pos. Optional |leave_this_many| parameter indicates number of 313 // spaces to leave; default is 1. Optional |incl_newline| param 314 // indicates to erase newline characters as well as other WS. 315 // It returns the number of spaces that were deleted. 316 int EraseMultipleSpacesAt(int pos, int leave_this_many = 1, bool incl_newline = false); 317 318 std::string output_; 319 FormattingTreeVisitor* visitor_; 320 }; 321 322 int current_nesting_ = 0; 323 FormatAndPrintSegment(const std::string & segment)324 std::string FormatAndPrintSegment(const std::string& segment) { 325 Segment seg(segment, this); 326 seg.RemoveTrailingWS(); 327 seg.RemoveLeadingWS(); 328 seg.RemoveExtraBlankLines(blank_line_respecting_node_); 329 seg.RegularizeSpaces(ws_required_next_); 330 seg.InsertRequiredNewlines(blank_line_requiring_node_); 331 seg.Indent(current_nesting_); 332 333 std::string output = seg.GetOutput(); 334 335 // We've respected prior blank lines for this decl (if it is a decl), 336 // and we should stop now. 337 blank_line_requiring_node_ = false; 338 blank_line_respecting_node_ = false; 339 340 // If this was the start of a member decl, it was indented by 341 // kIndentSpaces. Any other newlines inside the member decl should be 342 // indented additionally. 343 if (is_member_decl_) { 344 interface_method_alignment_ = true; 345 newline_means_indent_more_ = true; 346 } 347 348 return output; 349 } 350 OnBlankLineRequiringNode()351 void OnBlankLineRequiringNode() { 352 blank_line_requiring_node_ = true; 353 blank_line_respecting_node_ = true; 354 } 355 OnBlankLineRespectingNode()356 void OnBlankLineRespectingNode() { 357 blank_line_respecting_node_ = true; 358 } 359 360 bool is_member_decl_ = false; 361 362 // str is a gap plus the next meaningful token. 363 void TrackInterfaceMethodAlignment(const std::string& str); 364 OnSourceElementShared(const banjo::Token & current_token)365 void OnSourceElementShared(const banjo::Token& current_token) { 366 const char* ws_location = current_token.previous_end().data().data(); 367 // Printed code must increase in monotonic order, for two reasons. 368 // First of all, we don't reorder anything. Second of all, the start 369 // token for an identifier list (for example) is the same as the start 370 // token for the first identifier, so we need to make sure we don't 371 // print that token twice. 372 if (ws_location > last_source_location_) { 373 int size = (int)(current_token.data().data() - current_token.previous_end().data().data()); 374 std::string gap(ws_location, size); 375 std::string content(current_token.data().data(), current_token.data().size()); 376 std::string total_string = FormatAndPrintSegment(gap + content); 377 TrackInterfaceMethodAlignment(total_string); 378 formatted_output_ += total_string; 379 last_source_location_ = ws_location; 380 } 381 } 382 }; 383 384 } // namespace raw 385 } // namespace banjo 386 387 #endif // ZIRCON_SYSTEM_HOST_BANJO_INCLUDE_BANJO_FORMATTER_H_ 388