]>
Commit | Line | Data |
---|---|---|
83ffe9cd | 1 | // Copyright (C) 2020-2023 Free Software Foundation, Inc. |
1841081a AC |
2 | |
3 | // This file is part of GCC. | |
4 | ||
5 | // GCC is free software; you can redistribute it and/or modify it under | |
6 | // the terms of the GNU General Public License as published by the Free | |
7 | // Software Foundation; either version 3, or (at your option) any later | |
8 | // version. | |
9 | ||
10 | // GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
11 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | // for more details. | |
14 | ||
15 | // You should have received a copy of the GNU General Public License | |
16 | // along with GCC; see the file COPYING3. If not see | |
17 | // <http://www.gnu.org/licenses/>. | |
18 | ||
19 | #include "rust-macro-expand.h" | |
20 | #include "rust-macro-substitute-ctx.h" | |
21 | #include "rust-ast-full.h" | |
22 | #include "rust-ast-visitor.h" | |
23 | #include "rust-diagnostics.h" | |
24 | #include "rust-parse.h" | |
25 | #include "rust-attribute-visitor.h" | |
26 | ||
27 | namespace Rust { | |
28 | AST::ASTFragment | |
29 | MacroExpander::expand_decl_macro (Location invoc_locus, | |
30 | AST::MacroInvocData &invoc, | |
31 | AST::MacroRulesDefinition &rules_def, | |
32 | bool semicolon) | |
33 | { | |
34 | // ensure that both invocation and rules are in a valid state | |
35 | rust_assert (!invoc.is_marked_for_strip ()); | |
36 | rust_assert (!rules_def.is_marked_for_strip ()); | |
37 | rust_assert (rules_def.get_macro_rules ().size () > 0); | |
38 | ||
39 | /* probably something here about parsing invoc and rules def token trees to | |
40 | * token stream. if not, how would parser handle the captures of exprs and | |
41 | * stuff? on the other hand, token trees may be kind of useful in rules def as | |
42 | * creating a point where recursion can occur (like having | |
43 | * "compare_macro_match" and then it calling itself when it finds delimiters) | |
44 | */ | |
45 | ||
46 | /* find matching rule to invoc token tree, based on macro rule's matcher. if | |
47 | * none exist, error. | |
48 | * - specifically, check each matcher in order. if one fails to match, move | |
49 | * onto next. */ | |
50 | /* TODO: does doing this require parsing expressions and whatever in the | |
51 | * invoc? if so, might as well save the results if referenced using $ or | |
52 | * whatever. If not, do another pass saving them. Except this is probably | |
53 | * useless as different rules could have different starting points for exprs | |
54 | * or whatever. Decision trees could avoid this, but they have their own | |
55 | * issues. */ | |
56 | /* TODO: will need to modify the parser so that it can essentially "catch" | |
57 | * errors - maybe "try_parse_expr" or whatever methods. */ | |
58 | // this technically creates a back-tracking parser - this will be the | |
59 | // implementation style | |
60 | ||
61 | /* then, after results are saved, generate the macro output from the | |
62 | * transcriber token tree. if i understand this correctly, the macro | |
63 | * invocation gets replaced by the transcriber tokens, except with | |
64 | * substitutions made (e.g. for $i variables) */ | |
65 | ||
66 | /* TODO: it is probably better to modify AST::Token to store a pointer to a | |
67 | * Lexer::Token (rather than being converted) - i.e. not so much have | |
68 | * AST::Token as a Token but rather a TokenContainer (as it is another type of | |
69 | * TokenTree). This will prevent re-conversion of Tokens between each type | |
70 | * all the time, while still allowing the heterogenous storage of token trees. | |
71 | */ | |
72 | ||
73 | AST::DelimTokenTree &invoc_token_tree = invoc.get_delim_tok_tree (); | |
74 | ||
75 | // find matching arm | |
76 | AST::MacroRule *matched_rule = nullptr; | |
77 | std::map<std::string, MatchedFragmentContainer> matched_fragments; | |
78 | for (auto &rule : rules_def.get_rules ()) | |
79 | { | |
80 | sub_stack.push (); | |
81 | bool did_match_rule = try_match_rule (rule, invoc_token_tree); | |
82 | matched_fragments = sub_stack.pop (); | |
83 | ||
84 | if (did_match_rule) | |
85 | { | |
86 | // // Debugging | |
87 | // for (auto &kv : matched_fragments) | |
88 | // rust_debug ("[fragment]: %s (%ld - %s)", kv.first.c_str (), | |
89 | // kv.second.get_fragments ().size (), | |
90 | // kv.second.get_kind () | |
91 | // == MatchedFragmentContainer::Kind::Repetition | |
92 | // ? "repetition" | |
93 | // : "metavar"); | |
94 | ||
95 | matched_rule = &rule; | |
96 | break; | |
97 | } | |
98 | } | |
99 | ||
100 | if (matched_rule == nullptr) | |
101 | { | |
102 | RichLocation r (invoc_locus); | |
103 | r.add_range (rules_def.get_locus ()); | |
104 | rust_error_at (r, "Failed to match any rule within macro"); | |
105 | return AST::ASTFragment::create_error (); | |
106 | } | |
107 | ||
108 | return transcribe_rule (*matched_rule, invoc_token_tree, matched_fragments, | |
109 | semicolon, peek_context ()); | |
110 | } | |
111 | ||
112 | void | |
113 | MacroExpander::expand_invoc (AST::MacroInvocation &invoc, bool has_semicolon) | |
114 | { | |
115 | if (depth_exceeds_recursion_limit ()) | |
116 | { | |
117 | rust_error_at (invoc.get_locus (), "reached recursion limit"); | |
118 | return; | |
119 | } | |
120 | ||
121 | AST::MacroInvocData &invoc_data = invoc.get_invoc_data (); | |
122 | ||
123 | // ?? | |
124 | // switch on type of macro: | |
125 | // - '!' syntax macro (inner switch) | |
126 | // - procedural macro - "A token-based function-like macro" | |
127 | // - 'macro_rules' (by example/pattern-match) macro? or not? "an | |
128 | // AST-based function-like macro" | |
129 | // - else is unreachable | |
130 | // - attribute syntax macro (inner switch) | |
131 | // - procedural macro attribute syntax - "A token-based attribute | |
132 | // macro" | |
133 | // - legacy macro attribute syntax? - "an AST-based attribute macro" | |
134 | // - non-macro attribute: mark known | |
135 | // - else is unreachable | |
136 | // - derive macro (inner switch) | |
137 | // - derive or legacy derive - "token-based" vs "AST-based" | |
138 | // - else is unreachable | |
139 | // - derive container macro - unreachable | |
140 | ||
141 | // lookup the rules for this macro | |
142 | NodeId resolved_node = UNKNOWN_NODEID; | |
143 | NodeId source_node = UNKNOWN_NODEID; | |
144 | if (has_semicolon) | |
145 | source_node = invoc.get_macro_node_id (); | |
146 | else | |
147 | source_node = invoc.get_pattern_node_id (); | |
148 | auto seg | |
149 | = Resolver::CanonicalPath::new_seg (source_node, | |
150 | invoc_data.get_path ().as_string ()); | |
151 | ||
152 | bool found = resolver->get_macro_scope ().lookup (seg, &resolved_node); | |
153 | if (!found) | |
154 | { | |
155 | rust_error_at (invoc.get_locus (), "unknown macro: [%s]", | |
156 | seg.get ().c_str ()); | |
157 | return; | |
158 | } | |
159 | ||
160 | // lookup the rules | |
161 | AST::MacroRulesDefinition *rules_def = nullptr; | |
162 | bool ok = mappings->lookup_macro_def (resolved_node, &rules_def); | |
163 | rust_assert (ok); | |
164 | ||
165 | auto fragment = AST::ASTFragment::create_error (); | |
e88ce5cc | 166 | invoc_data.set_expander (this); |
1841081a AC |
167 | |
168 | if (rules_def->is_builtin ()) | |
169 | fragment | |
170 | = rules_def->get_builtin_transcriber () (invoc.get_locus (), invoc_data); | |
171 | else | |
172 | fragment = expand_decl_macro (invoc.get_locus (), invoc_data, *rules_def, | |
173 | has_semicolon); | |
174 | ||
175 | set_expanded_fragment (std::move (fragment)); | |
176 | } | |
177 | ||
178 | /* Determines whether any cfg predicate is false and hence item with attributes | |
179 | * should be stripped. Note that attributes must be expanded before calling. */ | |
180 | bool | |
181 | MacroExpander::fails_cfg (const AST::AttrVec &attrs) const | |
182 | { | |
183 | for (const auto &attr : attrs) | |
184 | { | |
185 | if (attr.get_path () == "cfg" && !attr.check_cfg_predicate (session)) | |
186 | return true; | |
187 | } | |
188 | return false; | |
189 | } | |
190 | ||
191 | /* Determines whether any cfg predicate is false and hence item with attributes | |
192 | * should be stripped. Will expand attributes as well. */ | |
193 | bool | |
194 | MacroExpander::fails_cfg_with_expand (AST::AttrVec &attrs) const | |
195 | { | |
196 | // TODO: maybe have something that strips cfg attributes that evaluate true? | |
197 | for (auto &attr : attrs) | |
198 | { | |
199 | if (attr.get_path () == "cfg") | |
200 | { | |
201 | if (!attr.is_parsed_to_meta_item ()) | |
202 | attr.parse_attr_to_meta_item (); | |
203 | ||
204 | // DEBUG | |
205 | if (!attr.is_parsed_to_meta_item ()) | |
206 | rust_debug ("failed to parse attr to meta item, right before " | |
207 | "cfg predicate check"); | |
208 | else | |
209 | rust_debug ("attr has been successfully parsed to meta item, " | |
210 | "right before cfg predicate check"); | |
211 | ||
212 | if (!attr.check_cfg_predicate (session)) | |
213 | { | |
214 | // DEBUG | |
215 | rust_debug ( | |
216 | "cfg predicate failed for attribute: \033[0;31m'%s'\033[0m", | |
217 | attr.as_string ().c_str ()); | |
218 | ||
219 | return true; | |
220 | } | |
221 | else | |
222 | { | |
223 | // DEBUG | |
224 | rust_debug ("cfg predicate succeeded for attribute: " | |
225 | "\033[0;31m'%s'\033[0m", | |
226 | attr.as_string ().c_str ()); | |
227 | } | |
228 | } | |
229 | } | |
230 | return false; | |
231 | } | |
232 | ||
233 | // Expands cfg_attr attributes. | |
234 | void | |
235 | MacroExpander::expand_cfg_attrs (AST::AttrVec &attrs) | |
236 | { | |
237 | for (std::size_t i = 0; i < attrs.size (); i++) | |
238 | { | |
239 | auto &attr = attrs[i]; | |
240 | if (attr.get_path () == "cfg_attr") | |
241 | { | |
242 | if (!attr.is_parsed_to_meta_item ()) | |
243 | attr.parse_attr_to_meta_item (); | |
244 | ||
245 | if (attr.check_cfg_predicate (session)) | |
246 | { | |
247 | // split off cfg_attr | |
248 | AST::AttrVec new_attrs = attr.separate_cfg_attrs (); | |
249 | ||
250 | // remove attr from vector | |
251 | attrs.erase (attrs.begin () + i); | |
252 | ||
253 | // add new attrs to vector | |
254 | attrs.insert (attrs.begin () + i, | |
255 | std::make_move_iterator (new_attrs.begin ()), | |
256 | std::make_move_iterator (new_attrs.end ())); | |
257 | } | |
258 | ||
259 | /* do something - if feature (first token in tree) is in fact enabled, | |
260 | * make tokens listed afterwards into attributes. i.e.: for | |
261 | * [cfg_attr(feature = "wow", wow1, wow2)], if "wow" is true, then add | |
262 | * attributes [wow1] and [wow2] to attribute list. This can also be | |
263 | * recursive, so check for expanded attributes being recursive and | |
264 | * possibly recursively call the expand_attrs? */ | |
265 | } | |
266 | else | |
267 | { | |
268 | i++; | |
269 | } | |
270 | } | |
271 | attrs.shrink_to_fit (); | |
272 | } | |
273 | ||
274 | void | |
275 | MacroExpander::expand_crate () | |
276 | { | |
277 | NodeId scope_node_id = crate.get_node_id (); | |
278 | resolver->get_macro_scope ().push (scope_node_id); | |
279 | ||
280 | /* fill macro/decorator map from init list? not sure where init list comes | |
281 | * from? */ | |
282 | ||
283 | // TODO: does cfg apply for inner attributes? research. | |
284 | // the apparent answer (from playground test) is yes | |
285 | ||
286 | // expand crate cfg_attr attributes | |
287 | expand_cfg_attrs (crate.inner_attrs); | |
288 | ||
289 | if (fails_cfg_with_expand (crate.inner_attrs)) | |
290 | { | |
291 | // basically, delete whole crate | |
292 | crate.strip_crate (); | |
293 | // TODO: maybe create warning here? probably not desired behaviour | |
294 | } | |
295 | // expand module attributes? | |
296 | ||
297 | push_context (ITEM); | |
298 | ||
299 | // expand attributes recursively and strip items if required | |
300 | AttrVisitor attr_visitor (*this); | |
301 | auto &items = crate.items; | |
302 | for (auto it = items.begin (); it != items.end ();) | |
303 | { | |
304 | auto &item = *it; | |
305 | ||
306 | // mark for stripping if required | |
307 | item->accept_vis (attr_visitor); | |
308 | ||
309 | auto fragment = take_expanded_fragment (attr_visitor); | |
310 | if (fragment.should_expand ()) | |
311 | { | |
312 | // Remove the current expanded invocation | |
313 | it = items.erase (it); | |
314 | for (auto &node : fragment.get_nodes ()) | |
315 | { | |
316 | it = items.insert (it, node.take_item ()); | |
317 | it++; | |
318 | } | |
319 | } | |
320 | else if (item->is_marked_for_strip ()) | |
321 | it = items.erase (it); | |
322 | else | |
323 | it++; | |
324 | } | |
325 | ||
326 | pop_context (); | |
327 | ||
328 | // TODO: should recursive attribute and macro expansion be done in the same | |
329 | // transversal? Or in separate ones like currently? | |
330 | ||
331 | // expand module tree recursively | |
332 | ||
333 | // post-process | |
334 | ||
335 | // extract exported macros? | |
336 | } | |
337 | ||
338 | bool | |
339 | MacroExpander::depth_exceeds_recursion_limit () const | |
340 | { | |
341 | return expansion_depth >= cfg.recursion_limit; | |
342 | } | |
343 | ||
344 | bool | |
345 | MacroExpander::try_match_rule (AST::MacroRule &match_rule, | |
346 | AST::DelimTokenTree &invoc_token_tree) | |
347 | { | |
348 | MacroInvocLexer lex (invoc_token_tree.to_token_stream ()); | |
349 | Parser<MacroInvocLexer> parser (lex); | |
350 | ||
351 | AST::MacroMatcher &matcher = match_rule.get_matcher (); | |
352 | ||
353 | expansion_depth++; | |
354 | if (!match_matcher (parser, matcher)) | |
355 | { | |
356 | expansion_depth--; | |
357 | return false; | |
358 | } | |
359 | expansion_depth--; | |
360 | ||
361 | bool used_all_input_tokens = parser.skip_token (END_OF_FILE); | |
362 | return used_all_input_tokens; | |
363 | } | |
364 | ||
365 | bool | |
366 | MacroExpander::match_fragment (Parser<MacroInvocLexer> &parser, | |
367 | AST::MacroMatchFragment &fragment) | |
368 | { | |
369 | switch (fragment.get_frag_spec ().get_kind ()) | |
370 | { | |
371 | case AST::MacroFragSpec::EXPR: | |
372 | parser.parse_expr (); | |
373 | break; | |
374 | ||
375 | case AST::MacroFragSpec::BLOCK: | |
376 | parser.parse_block_expr (); | |
377 | break; | |
378 | ||
379 | case AST::MacroFragSpec::IDENT: | |
380 | parser.parse_identifier_pattern (); | |
381 | break; | |
382 | ||
383 | case AST::MacroFragSpec::LITERAL: | |
384 | parser.parse_literal_expr (); | |
385 | break; | |
386 | ||
387 | case AST::MacroFragSpec::ITEM: | |
388 | parser.parse_item (false); | |
389 | break; | |
390 | ||
391 | case AST::MacroFragSpec::TY: | |
392 | parser.parse_type (); | |
393 | break; | |
394 | ||
395 | case AST::MacroFragSpec::PAT: | |
396 | parser.parse_pattern (); | |
397 | break; | |
398 | ||
399 | case AST::MacroFragSpec::PATH: | |
400 | parser.parse_path_in_expression (); | |
401 | break; | |
402 | ||
403 | case AST::MacroFragSpec::VIS: | |
404 | parser.parse_visibility (); | |
405 | break; | |
406 | ||
407 | case AST::MacroFragSpec::STMT: { | |
408 | auto restrictions = ParseRestrictions (); | |
409 | restrictions.consume_semi = false; | |
410 | parser.parse_stmt (restrictions); | |
411 | break; | |
412 | } | |
413 | ||
414 | case AST::MacroFragSpec::LIFETIME: | |
415 | parser.parse_lifetime_params (); | |
416 | break; | |
417 | ||
418 | // is meta attributes? | |
419 | case AST::MacroFragSpec::META: | |
420 | parser.parse_attribute_body (); | |
421 | break; | |
422 | ||
423 | case AST::MacroFragSpec::TT: | |
424 | parser.parse_token_tree (); | |
425 | break; | |
426 | ||
427 | // i guess we just ignore invalid and just error out | |
428 | case AST::MacroFragSpec::INVALID: | |
429 | return false; | |
430 | } | |
431 | ||
432 | // it matches if the parser did not produce errors trying to parse that type | |
433 | // of item | |
434 | return !parser.has_errors (); | |
435 | } | |
436 | ||
437 | bool | |
438 | MacroExpander::match_matcher (Parser<MacroInvocLexer> &parser, | |
1fed030c | 439 | AST::MacroMatcher &matcher, bool in_repetition) |
1841081a AC |
440 | { |
441 | if (depth_exceeds_recursion_limit ()) | |
442 | { | |
443 | rust_error_at (matcher.get_match_locus (), "reached recursion limit"); | |
444 | return false; | |
445 | } | |
446 | ||
447 | auto delimiter = parser.peek_current_token (); | |
448 | ||
449 | // this is used so we can check that we delimit the stream correctly. | |
450 | switch (delimiter->get_id ()) | |
451 | { | |
452 | case LEFT_PAREN: { | |
453 | if (!parser.skip_token (LEFT_PAREN)) | |
454 | return false; | |
455 | } | |
456 | break; | |
457 | ||
458 | case LEFT_SQUARE: { | |
459 | if (!parser.skip_token (LEFT_SQUARE)) | |
460 | return false; | |
461 | } | |
462 | break; | |
463 | ||
464 | case LEFT_CURLY: { | |
465 | if (!parser.skip_token (LEFT_CURLY)) | |
466 | return false; | |
467 | } | |
468 | break; | |
469 | default: | |
470 | gcc_unreachable (); | |
471 | } | |
472 | ||
473 | const MacroInvocLexer &source = parser.get_token_source (); | |
474 | ||
475 | for (auto &match : matcher.get_matches ()) | |
476 | { | |
477 | size_t offs_begin = source.get_offs (); | |
478 | ||
479 | switch (match->get_macro_match_type ()) | |
480 | { | |
481 | case AST::MacroMatch::MacroMatchType::Fragment: { | |
482 | AST::MacroMatchFragment *fragment | |
483 | = static_cast<AST::MacroMatchFragment *> (match.get ()); | |
484 | if (!match_fragment (parser, *fragment)) | |
485 | return false; | |
486 | ||
487 | // matched fragment get the offset in the token stream | |
488 | size_t offs_end = source.get_offs (); | |
1fed030c AC |
489 | if (in_repetition) |
490 | sub_stack.append_fragment ( | |
491 | MatchedFragment (fragment->get_ident (), offs_begin, offs_end)); | |
492 | else | |
493 | sub_stack.insert_metavar ( | |
494 | MatchedFragment (fragment->get_ident (), offs_begin, offs_end)); | |
1841081a AC |
495 | } |
496 | break; | |
497 | ||
498 | case AST::MacroMatch::MacroMatchType::Tok: { | |
499 | AST::Token *tok = static_cast<AST::Token *> (match.get ()); | |
500 | if (!match_token (parser, *tok)) | |
501 | return false; | |
502 | } | |
503 | break; | |
504 | ||
505 | case AST::MacroMatch::MacroMatchType::Repetition: { | |
506 | AST::MacroMatchRepetition *rep | |
507 | = static_cast<AST::MacroMatchRepetition *> (match.get ()); | |
508 | if (!match_repetition (parser, *rep)) | |
509 | return false; | |
510 | } | |
511 | break; | |
512 | ||
513 | case AST::MacroMatch::MacroMatchType::Matcher: { | |
514 | AST::MacroMatcher *m | |
515 | = static_cast<AST::MacroMatcher *> (match.get ()); | |
516 | expansion_depth++; | |
1fed030c | 517 | if (!match_matcher (parser, *m, in_repetition)) |
1841081a AC |
518 | { |
519 | expansion_depth--; | |
520 | return false; | |
521 | } | |
522 | expansion_depth--; | |
523 | } | |
524 | break; | |
525 | } | |
526 | } | |
527 | ||
528 | switch (delimiter->get_id ()) | |
529 | { | |
530 | case LEFT_PAREN: { | |
531 | if (!parser.skip_token (RIGHT_PAREN)) | |
532 | return false; | |
533 | } | |
534 | break; | |
535 | ||
536 | case LEFT_SQUARE: { | |
537 | if (!parser.skip_token (RIGHT_SQUARE)) | |
538 | return false; | |
539 | } | |
540 | break; | |
541 | ||
542 | case LEFT_CURLY: { | |
543 | if (!parser.skip_token (RIGHT_CURLY)) | |
544 | return false; | |
545 | } | |
546 | break; | |
547 | default: | |
548 | gcc_unreachable (); | |
549 | } | |
550 | ||
551 | return true; | |
552 | } | |
553 | ||
554 | bool | |
555 | MacroExpander::match_token (Parser<MacroInvocLexer> &parser, AST::Token &token) | |
556 | { | |
557 | // FIXME this needs to actually match the content and the type | |
558 | return parser.skip_token (token.get_id ()); | |
559 | } | |
560 | ||
561 | bool | |
562 | MacroExpander::match_n_matches (Parser<MacroInvocLexer> &parser, | |
563 | AST::MacroMatchRepetition &rep, | |
564 | size_t &match_amount, size_t lo_bound, | |
565 | size_t hi_bound) | |
566 | { | |
567 | match_amount = 0; | |
568 | auto &matches = rep.get_matches (); | |
569 | ||
570 | const MacroInvocLexer &source = parser.get_token_source (); | |
571 | while (true) | |
572 | { | |
573 | // If the current token is a closing macro delimiter, break away. | |
574 | // TODO: Is this correct? | |
575 | auto t_id = parser.peek_current_token ()->get_id (); | |
576 | if (t_id == RIGHT_PAREN || t_id == RIGHT_SQUARE || t_id == RIGHT_CURLY) | |
577 | break; | |
578 | ||
579 | // Skip parsing a separator on the first match, otherwise consume it. | |
580 | // If it isn't present, this is an error | |
581 | if (rep.has_sep () && match_amount > 0) | |
582 | if (!match_token (parser, *rep.get_sep ())) | |
583 | break; | |
584 | ||
585 | bool valid_current_match = false; | |
586 | for (auto &match : matches) | |
587 | { | |
588 | size_t offs_begin = source.get_offs (); | |
589 | switch (match->get_macro_match_type ()) | |
590 | { | |
591 | case AST::MacroMatch::MacroMatchType::Fragment: { | |
592 | AST::MacroMatchFragment *fragment | |
593 | = static_cast<AST::MacroMatchFragment *> (match.get ()); | |
594 | valid_current_match = match_fragment (parser, *fragment); | |
595 | ||
596 | // matched fragment get the offset in the token stream | |
597 | size_t offs_end = source.get_offs (); | |
598 | ||
599 | // The main difference with match_matcher happens here: Instead | |
600 | // of inserting a new fragment, we append to one. If that | |
601 | // fragment does not exist, then the operation is similar to | |
602 | // `insert_fragment` with the difference that we are not | |
603 | // creating a metavariable, but a repetition of one, which is | |
604 | // really different. | |
605 | sub_stack.append_fragment ( | |
606 | MatchedFragment (fragment->get_ident (), offs_begin, | |
607 | offs_end)); | |
608 | } | |
609 | break; | |
610 | ||
611 | case AST::MacroMatch::MacroMatchType::Tok: { | |
612 | AST::Token *tok = static_cast<AST::Token *> (match.get ()); | |
613 | valid_current_match = match_token (parser, *tok); | |
614 | } | |
615 | break; | |
616 | ||
617 | case AST::MacroMatch::MacroMatchType::Repetition: { | |
618 | AST::MacroMatchRepetition *rep | |
619 | = static_cast<AST::MacroMatchRepetition *> (match.get ()); | |
620 | valid_current_match = match_repetition (parser, *rep); | |
621 | } | |
622 | break; | |
623 | ||
624 | case AST::MacroMatch::MacroMatchType::Matcher: { | |
625 | AST::MacroMatcher *m | |
626 | = static_cast<AST::MacroMatcher *> (match.get ()); | |
1fed030c | 627 | valid_current_match = match_matcher (parser, *m, true); |
1841081a AC |
628 | } |
629 | break; | |
630 | } | |
631 | } | |
632 | // If we've encountered an error once, stop trying to match more | |
633 | // repetitions | |
634 | if (!valid_current_match) | |
635 | break; | |
636 | ||
637 | match_amount++; | |
638 | ||
639 | // Break early if we notice there's too many expressions already | |
640 | if (hi_bound && match_amount > hi_bound) | |
641 | break; | |
642 | } | |
643 | ||
644 | // Check if the amount of matches we got is valid: Is it more than the lower | |
645 | // bound and less than the higher bound? | |
646 | bool did_meet_lo_bound = match_amount >= lo_bound; | |
647 | bool did_meet_hi_bound = hi_bound ? match_amount <= hi_bound : true; | |
648 | ||
649 | // If the end-result is valid, then we can clear the parse errors: Since | |
650 | // repetitions are parsed eagerly, it is okay to fail in some cases | |
651 | auto res = did_meet_lo_bound && did_meet_hi_bound; | |
652 | if (res) | |
653 | parser.clear_errors (); | |
654 | ||
655 | return res; | |
656 | } | |
657 | ||
658 | bool | |
659 | MacroExpander::match_repetition (Parser<MacroInvocLexer> &parser, | |
660 | AST::MacroMatchRepetition &rep) | |
661 | { | |
662 | size_t match_amount = 0; | |
663 | bool res = false; | |
664 | ||
665 | std::string lo_str; | |
666 | std::string hi_str; | |
667 | switch (rep.get_op ()) | |
668 | { | |
669 | case AST::MacroMatchRepetition::MacroRepOp::ANY: | |
670 | lo_str = "0"; | |
671 | hi_str = "+inf"; | |
672 | res = match_n_matches (parser, rep, match_amount); | |
673 | break; | |
674 | case AST::MacroMatchRepetition::MacroRepOp::ONE_OR_MORE: | |
675 | lo_str = "1"; | |
676 | hi_str = "+inf"; | |
677 | res = match_n_matches (parser, rep, match_amount, 1); | |
678 | break; | |
679 | case AST::MacroMatchRepetition::MacroRepOp::ZERO_OR_ONE: | |
680 | lo_str = "0"; | |
681 | hi_str = "1"; | |
682 | res = match_n_matches (parser, rep, match_amount, 0, 1); | |
683 | break; | |
684 | default: | |
685 | gcc_unreachable (); | |
686 | } | |
687 | ||
688 | if (!res) | |
689 | rust_error_at (rep.get_match_locus (), | |
690 | "invalid amount of matches for macro invocation. Expected " | |
691 | "between %s and %s, got %lu", | |
692 | lo_str.c_str (), hi_str.c_str (), | |
693 | (unsigned long) match_amount); | |
694 | ||
695 | rust_debug_loc (rep.get_match_locus (), "%s matched %lu times", | |
696 | res ? "successfully" : "unsuccessfully", | |
697 | (unsigned long) match_amount); | |
698 | ||
699 | // We have to handle zero fragments differently: They will not have been | |
700 | // "matched" but they are still valid and should be inserted as a special | |
701 | // case. So we go through the stack map, and for every fragment which doesn't | |
702 | // exist, insert a zero-matched fragment. | |
703 | auto &stack_map = sub_stack.peek (); | |
704 | for (auto &match : rep.get_matches ()) | |
705 | { | |
706 | if (match->get_macro_match_type () | |
707 | == AST::MacroMatch::MacroMatchType::Fragment) | |
708 | { | |
709 | auto fragment = static_cast<AST::MacroMatchFragment *> (match.get ()); | |
710 | auto it = stack_map.find (fragment->get_ident ()); | |
711 | ||
712 | if (it == stack_map.end ()) | |
713 | sub_stack.insert_matches (fragment->get_ident (), | |
714 | MatchedFragmentContainer::zero ()); | |
715 | } | |
716 | } | |
717 | ||
718 | return res; | |
719 | } | |
720 | ||
721 | /** | |
722 | * Helper function to refactor calling a parsing function 0 or more times | |
723 | */ | |
724 | static AST::ASTFragment | |
725 | parse_many (Parser<MacroInvocLexer> &parser, TokenId &delimiter, | |
726 | std::function<AST::SingleASTNode ()> parse_fn) | |
727 | { | |
728 | std::vector<AST::SingleASTNode> nodes; | |
729 | while (true) | |
730 | { | |
731 | if (parser.peek_current_token ()->get_id () == delimiter) | |
732 | break; | |
733 | ||
734 | auto node = parse_fn (); | |
252216dd AC |
735 | if (node.is_error ()) |
736 | { | |
737 | for (auto err : parser.get_errors ()) | |
738 | err.emit_error (); | |
739 | ||
740 | return AST::ASTFragment::create_error (); | |
741 | } | |
742 | ||
1841081a AC |
743 | nodes.emplace_back (std::move (node)); |
744 | } | |
745 | ||
746 | return AST::ASTFragment (std::move (nodes)); | |
747 | } | |
748 | ||
749 | /** | |
750 | * Transcribe 0 or more items from a macro invocation | |
751 | * | |
752 | * @param parser Parser to extract items from | |
753 | * @param delimiter Id of the token on which parsing should stop | |
754 | */ | |
755 | static AST::ASTFragment | |
756 | transcribe_many_items (Parser<MacroInvocLexer> &parser, TokenId &delimiter) | |
757 | { | |
758 | return parse_many (parser, delimiter, [&parser] () { | |
759 | auto item = parser.parse_item (true); | |
760 | return AST::SingleASTNode (std::move (item)); | |
761 | }); | |
762 | } | |
763 | ||
764 | /** | |
765 | * Transcribe 0 or more external items from a macro invocation | |
766 | * | |
767 | * @param parser Parser to extract items from | |
768 | * @param delimiter Id of the token on which parsing should stop | |
769 | */ | |
770 | static AST::ASTFragment | |
771 | transcribe_many_ext (Parser<MacroInvocLexer> &parser, TokenId &delimiter) | |
772 | { | |
773 | return parse_many (parser, delimiter, [&parser] () { | |
774 | auto item = parser.parse_external_item (); | |
775 | return AST::SingleASTNode (std::move (item)); | |
776 | }); | |
777 | } | |
778 | ||
779 | /** | |
780 | * Transcribe 0 or more trait items from a macro invocation | |
781 | * | |
782 | * @param parser Parser to extract items from | |
783 | * @param delimiter Id of the token on which parsing should stop | |
784 | */ | |
785 | static AST::ASTFragment | |
786 | transcribe_many_trait_items (Parser<MacroInvocLexer> &parser, | |
787 | TokenId &delimiter) | |
788 | { | |
789 | return parse_many (parser, delimiter, [&parser] () { | |
790 | auto item = parser.parse_trait_item (); | |
791 | return AST::SingleASTNode (std::move (item)); | |
792 | }); | |
793 | } | |
794 | ||
795 | /** | |
796 | * Transcribe 0 or more impl items from a macro invocation | |
797 | * | |
798 | * @param parser Parser to extract items from | |
799 | * @param delimiter Id of the token on which parsing should stop | |
800 | */ | |
801 | static AST::ASTFragment | |
802 | transcribe_many_impl_items (Parser<MacroInvocLexer> &parser, TokenId &delimiter) | |
803 | { | |
804 | return parse_many (parser, delimiter, [&parser] () { | |
805 | auto item = parser.parse_inherent_impl_item (); | |
806 | return AST::SingleASTNode (std::move (item)); | |
807 | }); | |
808 | } | |
809 | ||
810 | /** | |
811 | * Transcribe 0 or more trait impl items from a macro invocation | |
812 | * | |
813 | * @param parser Parser to extract items from | |
814 | * @param delimiter Id of the token on which parsing should stop | |
815 | */ | |
816 | static AST::ASTFragment | |
817 | transcribe_many_trait_impl_items (Parser<MacroInvocLexer> &parser, | |
818 | TokenId &delimiter) | |
819 | { | |
820 | return parse_many (parser, delimiter, [&parser] () { | |
821 | auto item = parser.parse_trait_impl_item (); | |
822 | return AST::SingleASTNode (std::move (item)); | |
823 | }); | |
824 | } | |
825 | ||
826 | /** | |
827 | * Transcribe 0 or more statements from a macro invocation | |
828 | * | |
829 | * @param parser Parser to extract statements from | |
830 | * @param delimiter Id of the token on which parsing should stop | |
831 | */ | |
832 | static AST::ASTFragment | |
833 | transcribe_many_stmts (Parser<MacroInvocLexer> &parser, TokenId &delimiter) | |
834 | { | |
835 | auto restrictions = ParseRestrictions (); | |
836 | restrictions.consume_semi = false; | |
837 | ||
838 | // FIXME: This is invalid! It needs to also handle cases where the macro | |
839 | // transcriber is an expression, but since the macro call is followed by | |
840 | // a semicolon, it's a valid ExprStmt | |
841 | return parse_many (parser, delimiter, [&parser, restrictions] () { | |
842 | auto stmt = parser.parse_stmt (restrictions); | |
843 | return AST::SingleASTNode (std::move (stmt)); | |
844 | }); | |
845 | } | |
846 | ||
847 | /** | |
848 | * Transcribe one expression from a macro invocation | |
849 | * | |
850 | * @param parser Parser to extract statements from | |
851 | */ | |
852 | static AST::ASTFragment | |
853 | transcribe_expression (Parser<MacroInvocLexer> &parser) | |
854 | { | |
855 | auto expr = parser.parse_expr (); | |
856 | ||
857 | return AST::ASTFragment ({std::move (expr)}); | |
858 | } | |
859 | ||
860 | /** | |
861 | * Transcribe one type from a macro invocation | |
862 | * | |
863 | * @param parser Parser to extract statements from | |
864 | */ | |
865 | static AST::ASTFragment | |
866 | transcribe_type (Parser<MacroInvocLexer> &parser) | |
867 | { | |
55fb35c5 AC |
868 | auto type = parser.parse_type (true); |
869 | for (auto err : parser.get_errors ()) | |
870 | err.emit_error (); | |
1841081a AC |
871 | |
872 | return AST::ASTFragment ({std::move (type)}); | |
873 | } | |
874 | ||
875 | static AST::ASTFragment | |
876 | transcribe_on_delimiter (Parser<MacroInvocLexer> &parser, bool semicolon, | |
877 | AST::DelimType delimiter, TokenId last_token_id) | |
878 | { | |
879 | if (semicolon || delimiter == AST::DelimType::CURLY) | |
880 | return transcribe_many_stmts (parser, last_token_id); | |
881 | else | |
882 | return transcribe_expression (parser); | |
883 | } // namespace Rust | |
884 | ||
885 | static AST::ASTFragment | |
886 | transcribe_context (MacroExpander::ContextType ctx, | |
887 | Parser<MacroInvocLexer> &parser, bool semicolon, | |
888 | AST::DelimType delimiter, TokenId last_token_id) | |
889 | { | |
890 | // The flow-chart in order to choose a parsing function is as follows: | |
891 | // | |
892 | // [switch special context] | |
893 | // -- Item --> parser.parse_item(); | |
894 | // -- Trait --> parser.parse_trait_item(); | |
895 | // -- Impl --> parser.parse_impl_item(); | |
896 | // -- Extern --> parser.parse_extern_item(); | |
897 | // -- None --> [has semicolon?] | |
898 | // -- Yes --> parser.parse_stmt(); | |
899 | // -- No --> [switch invocation.delimiter()] | |
900 | // -- { } --> parser.parse_stmt(); | |
901 | // -- _ --> parser.parse_expr(); // once! | |
902 | ||
903 | // If there is a semicolon OR we are expanding a MacroInvocationSemi, then | |
904 | // we can parse multiple items. Otherwise, parse *one* expression | |
905 | ||
906 | switch (ctx) | |
907 | { | |
908 | case MacroExpander::ContextType::ITEM: | |
909 | return transcribe_many_items (parser, last_token_id); | |
910 | break; | |
911 | case MacroExpander::ContextType::TRAIT: | |
912 | return transcribe_many_trait_items (parser, last_token_id); | |
913 | break; | |
914 | case MacroExpander::ContextType::IMPL: | |
915 | return transcribe_many_impl_items (parser, last_token_id); | |
916 | break; | |
917 | case MacroExpander::ContextType::TRAIT_IMPL: | |
918 | return transcribe_many_trait_impl_items (parser, last_token_id); | |
919 | break; | |
920 | case MacroExpander::ContextType::EXTERN: | |
921 | return transcribe_many_ext (parser, last_token_id); | |
922 | break; | |
923 | case MacroExpander::ContextType::TYPE: | |
924 | return transcribe_type (parser); | |
925 | break; | |
926 | default: | |
927 | return transcribe_on_delimiter (parser, semicolon, delimiter, | |
928 | last_token_id); | |
929 | } | |
930 | } | |
931 | ||
932 | static std::string | |
933 | tokens_to_str (std::vector<std::unique_ptr<AST::Token>> &tokens) | |
934 | { | |
935 | std::string str; | |
936 | if (!tokens.empty ()) | |
937 | { | |
938 | str += tokens[0]->as_string (); | |
939 | for (size_t i = 1; i < tokens.size (); i++) | |
940 | str += " " + tokens[i]->as_string (); | |
941 | } | |
942 | ||
943 | return str; | |
944 | } | |
945 | ||
946 | AST::ASTFragment | |
947 | MacroExpander::transcribe_rule ( | |
948 | AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree, | |
949 | std::map<std::string, MatchedFragmentContainer> &matched_fragments, | |
950 | bool semicolon, ContextType ctx) | |
951 | { | |
952 | // we can manipulate the token tree to substitute the dollar identifiers so | |
953 | // that when we call parse its already substituted for us | |
954 | AST::MacroTranscriber &transcriber = match_rule.get_transcriber (); | |
955 | AST::DelimTokenTree &transcribe_tree = transcriber.get_token_tree (); | |
956 | ||
957 | auto invoc_stream = invoc_token_tree.to_token_stream (); | |
958 | auto macro_rule_tokens = transcribe_tree.to_token_stream (); | |
959 | ||
960 | auto substitute_context | |
961 | = SubstituteCtx (invoc_stream, macro_rule_tokens, matched_fragments); | |
962 | std::vector<std::unique_ptr<AST::Token>> substituted_tokens | |
963 | = substitute_context.substitute_tokens (); | |
964 | ||
965 | rust_debug ("substituted tokens: %s", | |
966 | tokens_to_str (substituted_tokens).c_str ()); | |
967 | ||
968 | // parse it to an ASTFragment | |
969 | MacroInvocLexer lex (std::move (substituted_tokens)); | |
970 | Parser<MacroInvocLexer> parser (lex); | |
971 | ||
972 | auto last_token_id = TokenId::RIGHT_CURLY; | |
973 | ||
974 | // this is used so we can check that we delimit the stream correctly. | |
975 | switch (transcribe_tree.get_delim_type ()) | |
976 | { | |
977 | case AST::DelimType::PARENS: | |
978 | last_token_id = TokenId::RIGHT_PAREN; | |
979 | rust_assert (parser.skip_token (LEFT_PAREN)); | |
980 | break; | |
981 | ||
982 | case AST::DelimType::CURLY: | |
983 | rust_assert (parser.skip_token (LEFT_CURLY)); | |
984 | break; | |
985 | ||
986 | case AST::DelimType::SQUARE: | |
987 | last_token_id = TokenId::RIGHT_SQUARE; | |
988 | rust_assert (parser.skip_token (LEFT_SQUARE)); | |
989 | break; | |
990 | } | |
991 | ||
992 | // see https://github.com/Rust-GCC/gccrs/issues/22 | |
993 | // TL;DR: | |
994 | // - Treat all macro invocations with parentheses, (), or square brackets, | |
995 | // [], as expressions. | |
996 | // - If the macro invocation has curly brackets, {}, it may be parsed as a | |
997 | // statement depending on the context. | |
998 | // - If the macro invocation has a semicolon at the end, it must be parsed | |
999 | // as a statement (either via ExpressionStatement or | |
1000 | // MacroInvocationWithSemi) | |
1001 | ||
1002 | auto fragment | |
1003 | = transcribe_context (ctx, parser, semicolon, | |
1004 | invoc_token_tree.get_delim_type (), last_token_id); | |
1005 | ||
1006 | // emit any errors | |
1007 | if (parser.has_errors ()) | |
1008 | { | |
1009 | for (auto &err : parser.get_errors ()) | |
1010 | rust_error_at (err.locus, "%s", err.message.c_str ()); | |
1011 | return AST::ASTFragment::create_error (); | |
1012 | } | |
1013 | ||
1014 | // are all the tokens used? | |
1015 | bool did_delimit = parser.skip_token (last_token_id); | |
1016 | ||
1017 | bool reached_end_of_stream = did_delimit && parser.skip_token (END_OF_FILE); | |
1018 | if (!reached_end_of_stream) | |
1019 | { | |
1020 | const_TokenPtr current_token = parser.peek_current_token (); | |
1021 | rust_error_at (current_token->get_locus (), | |
1022 | "tokens here and after are unparsed"); | |
1023 | } | |
1024 | ||
1025 | return fragment; | |
1026 | } | |
1027 | } // namespace Rust |