| 1 | // Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file |
| 2 | // for details. All rights reserved. Use of this source code is governed by a |
| 3 | // BSD-style license that can be found in the LICENSE file. |
| 4 | |
| 5 | #ifndef RUNTIME_VM_COMPILER_BACKEND_SEXPRESSION_H_ |
| 6 | #define RUNTIME_VM_COMPILER_BACKEND_SEXPRESSION_H_ |
| 7 | |
| 8 | #if defined(DART_PRECOMPILED_RUNTIME) |
| 9 | #error "AOT runtime should not use compiler sources (including header files)" |
| 10 | #endif // defined(DART_PRECOMPILED_RUNTIME) |
| 11 | |
| 12 | #include "platform/text_buffer.h" |
| 13 | |
| 14 | #include "vm/allocation.h" |
| 15 | #include "vm/growable_array.h" |
| 16 | #include "vm/hash_map.h" |
| 17 | #include "vm/zone.h" |
| 18 | |
| 19 | namespace dart { |
| 20 | |
| 21 | #define FOR_EACH_S_EXPRESSION_ATOM(M) \ |
| 22 | M(Bool, bool) \ |
| 23 | M(Double, double) \ |
| 24 | M(Integer, int64_t) \ |
| 25 | M(String, const char*) \ |
| 26 | M(Symbol, const char*) |
| 27 | |
| 28 | #define FOR_EACH_S_EXPRESSION(M) \ |
| 29 | FOR_EACH_S_EXPRESSION_ATOM(M) \ |
| 30 | M(List, _) |
| 31 | |
| 32 | #define FOR_EACH_ABSTRACT_S_EXPRESSION(M) M(Atom, _) |
| 33 | |
| 34 | #define FORWARD_DECLARATION(name, value_type) class SExp##name; |
| 35 | FOR_EACH_S_EXPRESSION(FORWARD_DECLARATION) |
| 36 | FOR_EACH_ABSTRACT_S_EXPRESSION(FORWARD_DECLARATION) |
| 37 | #undef FORWARD_DECLARATION |
| 38 | |
| 39 | // Abstract base class for S-expressions used as an intermediate form for the |
| 40 | // IL serializer. These aren't true (LISP-like) S-expressions, as the atoms |
| 41 | // are more restricted and the lists have extra information. Here is an |
| 42 | // illustrative BNF-style grammar of the current serialized form of |
| 43 | // S-expressions that includes non-whitespace literal tokens: |
| 44 | // |
| 45 | // <s-exp> ::= <atom> | <list> |
| 46 | // <atom> ::= <bool> | <integer> | <string> | <symbol> |
| 47 | // <list> ::= '(' <s-exp>* <extra-info>? ')' |
| 48 | // <extra-info> ::= '{' <extra-elem>* '}' |
| 49 | // <extra-elem> ::= <symbol> <s-exp> ',' |
| 50 | // |
| 51 | // Here, <string>s are double-quoted strings with backslash escaping and |
| 52 | // <symbol>s are sequences of consecutive non-whitespace characters that do not |
| 53 | // include commas (,), parentheses (()), curly braces ({}), or the double-quote |
| 54 | // character ("). |
| 55 | // |
| 56 | // In addition, the <extra-info> is considered a map from symbol labels to |
| 57 | // S-expression values, and as such each symbol used as a key in an <extra-info> |
| 58 | // block should only appear once as a key within that block. |
| 59 | class SExpression : public ZoneAllocated { |
| 60 | public: |
| 61 | explicit SExpression(intptr_t start = kInvalidPos) : start_(start) {} |
| 62 | virtual ~SExpression() {} |
| 63 | |
| 64 | static intptr_t const kInvalidPos = -1; |
| 65 | |
| 66 | static SExpression* FromCString(Zone* zone, const char* cstr); |
| 67 | const char* ToCString(Zone* zone) const; |
| 68 | intptr_t start() const { return start_; } |
| 69 | |
| 70 | #define S_EXPRESSION_TYPE_CHECK(name, value_type) \ |
| 71 | bool Is##name() const { return (As##name() != nullptr); } \ |
| 72 | SExp##name* As##name() { \ |
| 73 | auto const const_this = const_cast<const SExpression*>(this); \ |
| 74 | return const_cast<SExp##name*>(const_this->As##name()); \ |
| 75 | } \ |
| 76 | virtual const SExp##name* As##name() const { return nullptr; } |
| 77 | |
| 78 | FOR_EACH_S_EXPRESSION(S_EXPRESSION_TYPE_CHECK) |
| 79 | FOR_EACH_ABSTRACT_S_EXPRESSION(S_EXPRESSION_TYPE_CHECK) |
| 80 | |
| 81 | virtual const char* DebugName() const = 0; |
| 82 | virtual bool Equals(SExpression* sexp) const = 0; |
| 83 | virtual void SerializeTo(Zone* zone, |
| 84 | BaseTextBuffer* buffer, |
| 85 | const char* indent, |
| 86 | intptr_t width = 80) const = 0; |
| 87 | virtual void SerializeToLine(BaseTextBuffer* buffer) const = 0; |
| 88 | |
| 89 | private: |
| 90 | // Starting character position of the s-expression in the original |
| 91 | // serialization, if it was deserialized. |
| 92 | intptr_t const start_; |
| 93 | DISALLOW_COPY_AND_ASSIGN(SExpression); |
| 94 | }; |
| 95 | |
| 96 | class SExpAtom : public SExpression { |
| 97 | public: |
| 98 | explicit SExpAtom(intptr_t start = kInvalidPos) : SExpression(start) {} |
| 99 | |
| 100 | virtual const SExpAtom* AsAtom() const { return this; } |
| 101 | // No atoms have sub-elements, so they always print to a single line. |
| 102 | virtual void SerializeTo(Zone* zone, |
| 103 | BaseTextBuffer* buffer, |
| 104 | const char* indent, |
| 105 | intptr_t width = 80) const { |
| 106 | SerializeToLine(buffer); |
| 107 | } |
| 108 | |
| 109 | private: |
| 110 | DISALLOW_COPY_AND_ASSIGN(SExpAtom); |
| 111 | }; |
| 112 | |
| 113 | #define DEFINE_S_EXPRESSION_TYPE_CHECK(name) \ |
| 114 | virtual const SExp##name* As##name() const { return this; } \ |
| 115 | virtual const char* DebugName() const { return #name; } |
| 116 | |
| 117 | // The various concrete S-expression atom classes are thin wrappers around |
| 118 | // their contained value that includes serialization and type check methods. |
| 119 | #define DEFINE_S_EXPRESSION_ATOM_CLASS(name, value_type) \ |
| 120 | class SExp##name : public SExpAtom { \ |
| 121 | public: \ |
| 122 | explicit SExp##name(value_type val, intptr_t start = kInvalidPos) \ |
| 123 | : SExpAtom(start), val_(val) {} \ |
| 124 | value_type value() const { return val_; } \ |
| 125 | virtual bool Equals(SExpression* sexp) const; \ |
| 126 | bool Equals(value_type val) const; \ |
| 127 | virtual void SerializeToLine(BaseTextBuffer* buffer) const; \ |
| 128 | DEFINE_S_EXPRESSION_TYPE_CHECK(name) \ |
| 129 | private: \ |
| 130 | value_type const val_; \ |
| 131 | DISALLOW_COPY_AND_ASSIGN(SExp##name); \ |
| 132 | }; |
| 133 | |
| 134 | FOR_EACH_S_EXPRESSION_ATOM(DEFINE_S_EXPRESSION_ATOM_CLASS) |
| 135 | |
| 136 | // A list of S-expressions. Unlike normal S-expressions, an S-expression list |
| 137 | // also contains a hash map kept separate from the elements, which we use for |
| 138 | // extra non-argument information for IL instructions. |
| 139 | class SExpList : public SExpression { |
| 140 | public: |
| 141 | explicit SExpList(Zone* zone, intptr_t start = kInvalidPos) |
| 142 | : SExpression(start), contents_(zone, 2), extra_info_(zone) {} |
| 143 | |
| 144 | using = CStringMap<SExpression*>; |
| 145 | |
| 146 | void Add(SExpression* sexp); |
| 147 | void (const char* label, SExpression* value); |
| 148 | |
| 149 | SExpression* At(intptr_t i) const { return contents_.At(i); } |
| 150 | intptr_t Length() const { return contents_.length(); } |
| 151 | |
| 152 | intptr_t () const { return extra_info_.Length(); } |
| 153 | ExtraInfoHashMap::Iterator () const { |
| 154 | return extra_info_.GetIterator(); |
| 155 | } |
| 156 | bool (const char* cstr) const { return extra_info_.HasKey(cstr); } |
| 157 | SExpression* (const char* cstr) const { |
| 158 | return extra_info_.LookupValue(cstr); |
| 159 | } |
| 160 | |
| 161 | // Shortcut for retrieving the tag from a tagged list (list that contains an |
| 162 | // initial symbol). Returns nullptr if the list is not a tagged list. |
| 163 | SExpSymbol* Tag() const { |
| 164 | if (Length() == 0 || !At(0)->IsSymbol()) return nullptr; |
| 165 | return At(0)->AsSymbol(); |
| 166 | } |
| 167 | |
| 168 | DEFINE_S_EXPRESSION_TYPE_CHECK(List) |
| 169 | virtual bool Equals(SExpression* sexp) const; |
| 170 | virtual void SerializeTo(Zone* zone, |
| 171 | BaseTextBuffer* buffer, |
| 172 | const char* indent, |
| 173 | intptr_t width = 80) const; |
| 174 | virtual void SerializeToLine(BaseTextBuffer* buffer) const; |
| 175 | |
| 176 | private: |
| 177 | static const char* const kElemIndent; |
| 178 | static const char* const ; |
| 179 | |
| 180 | void (Zone* zone, |
| 181 | BaseTextBuffer* buffer, |
| 182 | const char* indent, |
| 183 | int width) const; |
| 184 | void (BaseTextBuffer* buffer) const; |
| 185 | |
| 186 | ZoneGrowableArray<SExpression*> contents_; |
| 187 | ExtraInfoHashMap ; |
| 188 | |
| 189 | DISALLOW_COPY_AND_ASSIGN(SExpList); |
| 190 | }; |
| 191 | |
| 192 | class SExpParser : public ValueObject { |
| 193 | public: |
| 194 | SExpParser(Zone* zone, const char* cstr) |
| 195 | : SExpParser(zone, cstr, strlen(cstr)) {} |
| 196 | SExpParser(Zone* zone, const char* cstr, intptr_t len) |
| 197 | : zone_(zone), |
| 198 | buffer_(ASSERT_NOTNULL(cstr)), |
| 199 | buffer_size_(strnlen(cstr, len)), |
| 200 | cur_label_(nullptr), |
| 201 | cur_value_(nullptr), |
| 202 | list_stack_(zone, 2), |
| 203 | in_extra_stack_(zone, 2), |
| 204 | extra_start_stack_(zone, 2), |
| 205 | cur_label_stack_(zone, 2), |
| 206 | error_message_(nullptr) {} |
| 207 | |
| 208 | // Constants used in serializing and deserializing S-expressions. |
| 209 | static const char* const kBoolTrueSymbol; |
| 210 | static const char* const kBoolFalseSymbol; |
| 211 | static char const kDoubleExponentChar; |
| 212 | static const char* const kDoubleInfinitySymbol; |
| 213 | static const char* const kDoubleNaNSymbol; |
| 214 | |
| 215 | struct ErrorStrings : AllStatic { |
| 216 | static const char* const kOpenString; |
| 217 | static const char* const kBadUnicodeEscape; |
| 218 | static const char* const kOpenSExpList; |
| 219 | static const char* const kOpenMap; |
| 220 | static const char* const kNestedMap; |
| 221 | static const char* const kMapOutsideList; |
| 222 | static const char* const kNonSymbolLabel; |
| 223 | static const char* const kNoMapLabel; |
| 224 | static const char* const kRepeatedMapLabel; |
| 225 | static const char* const kNoMapValue; |
| 226 | static const char* const ; |
| 227 | static const char* const kUnexpectedComma; |
| 228 | static const char* const kUnexpectedRightParen; |
| 229 | static const char* const kUnexpectedRightCurly; |
| 230 | }; |
| 231 | |
| 232 | intptr_t error_pos() const { return error_pos_; } |
| 233 | const char* error_message() const { return error_message_; } |
| 234 | |
| 235 | const char* Input() const { return buffer_; } |
| 236 | SExpression* Parse(); |
| 237 | DART_NORETURN void ReportError() const; |
| 238 | |
| 239 | private: |
| 240 | #define S_EXP_TOKEN_LIST(M) \ |
| 241 | M(LeftParen) \ |
| 242 | M(RightParen) \ |
| 243 | M(Comma) \ |
| 244 | M(LeftCurly) \ |
| 245 | M(RightCurly) \ |
| 246 | M(QuotedString) \ |
| 247 | M(Integer) \ |
| 248 | M(Double) \ |
| 249 | M(Boolean) \ |
| 250 | M(Symbol) |
| 251 | |
| 252 | // clang-format off |
| 253 | #define DEFINE_S_EXP_TOKEN_ENUM_LINE(name) k##name, |
| 254 | enum TokenType { |
| 255 | S_EXP_TOKEN_LIST(DEFINE_S_EXP_TOKEN_ENUM_LINE) |
| 256 | kMaxTokens, |
| 257 | }; |
| 258 | #undef DEFINE_S_EXP_TOKEN_ENUM |
| 259 | // clang-format on |
| 260 | |
| 261 | class Token : public ZoneAllocated { |
| 262 | public: |
| 263 | Token(TokenType type, const char* cstr, intptr_t len) |
| 264 | : type_(type), cstr_(cstr), len_(len) {} |
| 265 | |
| 266 | TokenType type() const { return type_; } |
| 267 | intptr_t length() const { return len_; } |
| 268 | const char* cstr() const { return cstr_; } |
| 269 | const char* DebugName() const { return TokenNames[type()]; } |
| 270 | const char* ToCString(Zone* zone); |
| 271 | |
| 272 | private: |
| 273 | static const char* const TokenNames[kMaxTokens]; |
| 274 | |
| 275 | TokenType const type_; |
| 276 | const char* const cstr_; |
| 277 | intptr_t const len_; |
| 278 | }; |
| 279 | |
| 280 | SExpression* TokenToSExpression(Token* token); |
| 281 | Token* GetNextToken(); |
| 282 | void Reset(); |
| 283 | void StoreError(intptr_t pos, const char* format, ...) PRINTF_ATTRIBUTE(3, 4); |
| 284 | |
| 285 | static bool IsSymbolContinue(char c); |
| 286 | |
| 287 | Zone* const zone_; |
| 288 | const char* const buffer_; |
| 289 | intptr_t const buffer_size_; |
| 290 | intptr_t cur_pos_ = 0; |
| 291 | bool = false; |
| 292 | intptr_t = -1; |
| 293 | const char* cur_label_; |
| 294 | SExpression* cur_value_; |
| 295 | ZoneGrowableArray<SExpList*> list_stack_; |
| 296 | ZoneGrowableArray<bool> ; |
| 297 | ZoneGrowableArray<intptr_t> ; |
| 298 | ZoneGrowableArray<const char*> cur_label_stack_; |
| 299 | intptr_t error_pos_ = -1; |
| 300 | const char* error_message_; |
| 301 | }; |
| 302 | |
| 303 | } // namespace dart |
| 304 | |
| 305 | #endif // RUNTIME_VM_COMPILER_BACKEND_SEXPRESSION_H_ |
| 306 | |