// Copyright (C) 2020-2025 Free Software Foundation, Inc. // This file is part of GCC. // GCC is free software; you can redistribute it and/or modify it under // the terms of the GNU General Public License as published by the Free // Software Foundation; either version 3, or (at your option) any later // version. // GCC is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // You should have received a copy of the GNU General Public License // along with GCC; see the file COPYING3. If not see // . #ifndef RUST_NAME_RESOLVER_2_0_H #define RUST_NAME_RESOLVER_2_0_H #include "optional.h" #include "rust-forever-stack.h" #include "rust-hir-map.h" #include "rust-rib.h" #include "rust-stacked-contexts.h" #include "rust-item.h" namespace Rust { namespace Resolver2_0 { // TODO: Add missing mappings and data structures /** The data structures we need to develop need to fill in a few roles - like the original name resolver, they need to be accessible at multiple points during the pipeline to allow compiler passes such as macro expansion or typechecking to benefit from them. Unlike the original name resolution, these data structures need to be created by multiple compiler passes: Whereas the original name resolution of gccrs tries to perform name resolution in a single pass, it fails at properly handling more complex name resolution cases such as macro name resolution, imports in general, and glob imports in particular. The goal of this new name resolution algorithm is to split the name resolution in at least two passes - `Early` name resolution, which takes care of macro name resolution and import resolution, and `Late` name resolution - your typical name resolution, for types, functions, variables... 1. `Early` The Early name resolution is tied in snuggly with macro expansion: macro expansion cannot happen without some form of name resolution (pointing an invocation to its definition) but may also *depend* on name resolution (a macro generating another macro... or importing items... and funny other cases like these). It needs to work in a fixed-point fashion alongside macro expansion: While there are imports to resolve, or macros to expand, we need to keep going and resolve them. This is achieved, among other things, by a top-level name resolution pass in charge of collection use statements and macro definitions (as well as Items, which will be useful for later passes of the name resolution). This top-level pass exists because Rust enables you to call a function before having declared it (at a lexical level, i.e calling `f(15)` at line 3 while the `f` function is declared at line 1499). This Early pass needs to build the first part of our "resolution map", which will then be used in multiple contexts: 1. The MacroExpander, in a read-only fashion: fetching macro definitions for each invocation and performing the expansion. 2. `Late`, which will write more data inside that resolution map, and use it to perform its name resolution too. This is where the first challenge of this data structure lies: The existing data structures and name resolution algorithm relies on the name resolution pass happening just once. In typical name resolution fashion, when it sees a lexical scope (a new module, a function's block, a block expression...), it "pushes" a new "Scope" to a stack of these scopes, and "pops" it when exiting said lexical scope. However, because we are splitting the name resolution into two passes, we would like to avoid re-doing a bunch of work we've already done - which is why this data structure needs to allow "re-entrancy", or to at least not keep as much state as the existing one, and allow for viewing the same module multiple times without throwing a fit. We will be implementing a "forever stack" of scopes, which allows the user the pushing of new scopes onto the stack, but only simulates the popping of a scope: When pushing new scopes, more space is allocated on our stack, and we keep track of this scope as being the current one - however, when popping this scope, we do not actually delete the memory associated with it: we simply mark the previous scope (parent) as the current one. In the example below, each number indicates the "state" of our resolution map, and the carret is used to point to the current lexical scope. ```rust // [] // fn main() { // [ `main` scope: {} ] // ^ let a = 15; // [ `main` scope: { Decl(a) } ] // ^ { _PUSH_ // [ `main` scope: { Decl(a) }, anonymous scope: {} ] // ^ let a = 16; // [ `main` scope: { Decl(a) }, anonymous scope: { Decl(a) } ] // ^ f(a); // [ `main` scope: { Decl(a) }, anonymous scope: { Decl(a) } ] // ^ } _POP_ // [ `main` scope: { Decl(a) }, anonymous scope: { Decl(a) } ] // ^ f(a); // [ `main` scope: { Decl(a) }, anonymous scope: { Decl(a) } ] // ^ } ``` This allows us to revisit scopes previously visited in later phases of the name resolution, and add more information if necessary. 2. `Late` `Late` name resolution possesses some unique challenges since Rust's name resolution rules are extremely complex - variable shadowing, variable capture in closures (but not inner functions!)... You can have a look at a fucked up example here: https://rustc-dev-guide.rust-lang.org/name-resolution.html#scopes-and-ribs This requires us to think about what exactly to put in our `Scope`s and what to do with our `Rib`s - and how it affects our data structures. For example, in the above example, `rustc` demonstrates how multiple `Rib`s can be created inside of a single lexical scope for variables, as the Rust programming language allows shadowing. TODO: Mention macro hygiene and that it is the same TODO: How does this affect our data structures? TODO: Last challenge - reuse the same APIs to allow the typechecker to not change? TODO: Mention that ForeverStack is templated to make sure that behavior is correct */ // FIXME: Documentation class Usage { public: explicit Usage (NodeId id) : id (id) {} // TODO: move to name-resolution-ctx.cc // storing it as a key in a map bool operator< (const Usage other) const { return other.id < id; } NodeId id; }; // FIXME: Documentation class Definition { public: explicit Definition (NodeId id) : id (id) {} NodeId id; }; struct IdentifierMode { bool is_ref; bool is_mut; IdentifierMode (bool is_ref, bool is_mut) : is_ref (is_ref), is_mut (is_mut) {} bool operator== (const IdentifierMode &other) { return other.is_ref == is_ref && other.is_mut == is_mut; } bool operator!= (const IdentifierMode &other) { return !(*this == other); } }; struct Binding { enum class Kind { Product, Or, } kind; // used to check the correctness of or-bindings bool has_expected_bindings; std::unordered_map> idents; Binding (Binding::Kind kind) : kind (kind), has_expected_bindings (false) {} }; /** * Used to identify the source of a binding, and emit the correct error message. */ enum class BindingSource { Match, Let, IfLet, WhileLet, For, /* Closure param or function param */ Param }; class BindingLayer { BindingSource source; std::vector bindings; bool bind_test (Identifier ident, Binding::Kind kind); public: void push (Binding::Kind kind); BindingLayer (BindingSource source); /** * Identifies if the identifier has been used in a product binding context. * eg. `let (a, a) = test();` */ bool is_and_bound (Identifier ident); /** * Identifies if the identifier has been used in a or context. * eg. `let (a, 1) | (a, 2) = test()` */ bool is_or_bound (Identifier ident); void insert_ident (std::string ident, location_t locus, bool is_ref, bool is_mut); void merge (); BindingSource get_source () const; }; class NameResolutionContext; /* * Used to handle canonical paths * Similar to ForeverStack, but namespace independent and more specialized */ class CanonicalPathRecord { public: virtual Resolver::CanonicalPath as_path (const NameResolutionContext &) = 0; virtual bool is_root () const = 0; virtual ~CanonicalPathRecord () = default; }; class CanonicalPathRecordWithParent : public CanonicalPathRecord { public: CanonicalPathRecordWithParent (CanonicalPathRecord &parent) : parent (&parent) {} CanonicalPathRecord &get_parent () { return *parent; } bool is_root () const override final { return false; } private: CanonicalPathRecord *parent; }; class CanonicalPathRecordCrateRoot : public CanonicalPathRecord { public: CanonicalPathRecordCrateRoot (NodeId node_id, std::string seg) : node_id (node_id), seg (std::move (seg)) { rust_assert (Analysis::Mappings::get ().node_is_crate (node_id)); crate_num = Analysis::Mappings::get ().lookup_crate_num (node_id).value (); } Resolver::CanonicalPath as_path (const NameResolutionContext &) override; bool is_root () const override final { return true; } private: NodeId node_id; CrateNum crate_num; std::string seg; }; class CanonicalPathRecordNormal : public CanonicalPathRecordWithParent { public: CanonicalPathRecordNormal (CanonicalPathRecord &parent, NodeId node_id, std::string seg) : CanonicalPathRecordWithParent (parent), node_id (node_id), seg (std::move (seg)) { rust_assert (!Analysis::Mappings::get ().node_is_crate (node_id)); } Resolver::CanonicalPath as_path (const NameResolutionContext &) override; private: NodeId node_id; std::string seg; }; class CanonicalPathRecordLookup : public CanonicalPathRecord { public: CanonicalPathRecordLookup (NodeId lookup_id) : lookup_id (lookup_id), cache (nullptr) {} Resolver::CanonicalPath as_path (const NameResolutionContext &) override; bool is_root () const override final { return true; } private: NodeId lookup_id; CanonicalPathRecord *cache; }; class CanonicalPathRecordImpl : public CanonicalPathRecordWithParent { public: CanonicalPathRecordImpl (CanonicalPathRecord &parent, NodeId impl_id, NodeId type_id) : CanonicalPathRecordWithParent (parent), impl_id (impl_id), type_record (type_id) {} Resolver::CanonicalPath as_path (const NameResolutionContext &) override; private: NodeId impl_id; CanonicalPathRecordLookup type_record; }; class CanonicalPathRecordTraitImpl : public CanonicalPathRecordWithParent { public: CanonicalPathRecordTraitImpl (CanonicalPathRecord &parent, NodeId impl_id, NodeId type_id, NodeId trait_path_id) : CanonicalPathRecordWithParent (parent), impl_id (impl_id), type_record (type_id), trait_path_record (trait_path_id) {} Resolver::CanonicalPath as_path (const NameResolutionContext &) override; private: NodeId impl_id; CanonicalPathRecordLookup type_record; CanonicalPathRecordLookup trait_path_record; }; class CanonicalPathCtx { public: CanonicalPathCtx (const NameResolutionContext &ctx) : current_record (nullptr), nr_ctx (&ctx) {} Resolver::CanonicalPath get_path (NodeId id) const { return get_record (id).as_path (*nr_ctx); } CanonicalPathRecord &get_record (NodeId id) const { auto it = records.find (id); rust_assert (it != records.end ()); return *it->second; } tl::optional get_record_opt (NodeId id) const { auto it = records.find (id); if (it == records.end ()) return tl::nullopt; else return it->second.get (); } void insert_record (NodeId id, const Identifier &ident) { insert_record (id, ident.as_string ()); } void insert_record (NodeId id, std::string seg) { rust_assert (current_record != nullptr); auto it = records.find (id); if (it == records.end ()) { auto record = new CanonicalPathRecordNormal (*current_record, id, std::move (seg)); bool ok = records.emplace (id, std::unique_ptr (record)) .second; rust_assert (ok); } } template void scope (NodeId id, const Identifier &ident, F &&f) { scope (id, ident.as_string (), std::forward (f)); } template void scope (NodeId id, std::string seg, F &&f) { rust_assert (current_record != nullptr); scope_inner (id, std::forward (f), [this, id, &seg] () { return new CanonicalPathRecordNormal (*current_record, id, std::move (seg)); }); } template void scope_impl (AST::InherentImpl &impl, F &&f) { rust_assert (current_record != nullptr); NodeId id = impl.get_node_id (); scope_inner (id, std::forward (f), [this, id, &impl] () { return new CanonicalPathRecordImpl (*current_record, id, impl.get_type ().get_node_id ()); }); } template void scope_impl (AST::TraitImpl &impl, F &&f) { rust_assert (current_record != nullptr); NodeId id = impl.get_node_id (); scope_inner (id, std::forward (f), [this, id, &impl] () { return new CanonicalPathRecordTraitImpl ( *current_record, id, impl.get_type ().get_node_id (), impl.get_trait_path ().get_node_id ()); }); } template void scope_crate (NodeId node_id, std::string crate_name, F &&f) { scope_inner (node_id, std::forward (f), [node_id, &crate_name] () { return new CanonicalPathRecordCrateRoot (node_id, std::move (crate_name)); }); } private: template void scope_inner (NodeId id, FCallback &&f_callback, FCreate &&f_create) { auto it = records.find (id); if (it == records.end ()) { CanonicalPathRecord *record = std::forward (f_create) (); it = records.emplace (id, std::unique_ptr (record)) .first; } rust_assert (it->second->is_root () || &static_cast (*it->second) .get_parent () == current_record); CanonicalPathRecord *stash = it->second.get (); std::swap (stash, current_record); std::forward (f_callback) (); std::swap (stash, current_record); } std::unordered_map> records; CanonicalPathRecord *current_record; const NameResolutionContext *nr_ctx; }; // Now our resolver, which keeps track of all the `ForeverStack`s we could want class NameResolutionContext { public: NameResolutionContext (); /** * Insert a new value in the current rib. * * @param name Name of the value to insert. * @param id This value's ID, e.g the function definition's node ID. * @param ns Namespace in which to insert the value. */ tl::expected insert (Identifier name, NodeId id, Namespace ns); tl::expected insert_variant (Identifier name, NodeId id); tl::expected insert_shadowable (Identifier name, NodeId id, Namespace ns); tl::expected insert_globbed (Identifier name, NodeId id, Namespace ns); /** * Run a lambda in a "scoped" context, meaning that a new `Rib` will be pushed * before executing the lambda and then popped. This is useful for all kinds * of scope in the language, such as a block expression or when entering a * function. This variant of the function enters a new scope in *all* * namespaces, while the second variant enters a scope in *one* namespace. * * @param rib_kind New `Rib` to create when entering this scope. A function * `Rib`, or an item `Rib`... etc * @param scope_id node ID of the scope we are entering, e.g the block's * `NodeId`. * @param lambda Function to run within that scope * @param path Optional path of the scope. This is useful for scopes which * affect path resolution, such as modules. Defaults to an empty * option. */ // FIXME: Do we want to handle something in particular for expected within the // scoped lambda? void scoped (Rib::Kind rib_kind, NodeId scope_id, std::function lambda, tl::optional path = {}); void scoped (Rib::Kind rib_kind, Namespace ns, NodeId scope_id, std::function lambda, tl::optional path = {}); ForeverStack values; ForeverStack types; ForeverStack macros; ForeverStack labels; Analysis::Mappings &mappings; StackedContexts bindings; CanonicalPathCtx canonical_ctx; // TODO: Rename // TODO: Use newtype pattern for Usage and Definition void map_usage (Usage usage, Definition definition); tl::optional lookup (NodeId usage) const; Resolver::CanonicalPath to_canonical_path (NodeId id) const { return canonical_ctx.get_path (id); } template tl::optional resolve_path (const std::vector &segments, ResolutionMode mode, std::vector &collect_errors, Namespace ns) { std::function insert_segment_resolution = [this] (const S &seg, NodeId id) { auto seg_id = unwrap_segment_node_id (seg); if (resolved_nodes.find (Usage (seg_id)) == resolved_nodes.end ()) map_usage (Usage (seg_id), Definition (id)); }; switch (ns) { case Namespace::Values: return values.resolve_path (segments, mode, insert_segment_resolution, collect_errors); case Namespace::Types: return types.resolve_path (segments, mode, insert_segment_resolution, collect_errors); case Namespace::Macros: return macros.resolve_path (segments, mode, insert_segment_resolution, collect_errors); case Namespace::Labels: return labels.resolve_path (segments, mode, insert_segment_resolution, collect_errors); default: rust_unreachable (); } } template tl::optional resolve_path (const std::vector &segments, ResolutionMode mode, tl::optional &> collect_errors, Namespace ns_first, Args... ns_args) { std::initializer_list namespaces = {ns_first, ns_args...}; for (auto ns : namespaces) { std::vector collect_errors_inner; if (auto ret = resolve_path (segments, mode, collect_errors_inner, ns)) return ret; if (!collect_errors_inner.empty ()) { if (collect_errors.has_value ()) { std::move (collect_errors_inner.begin (), collect_errors_inner.end (), std::back_inserter (collect_errors.value ())); } else { for (auto &e : collect_errors_inner) e.emit (); } return tl::nullopt; } } return tl::nullopt; } template tl::optional resolve_path (const std::vector &path_segments, bool has_opening_scope_resolution, tl::optional &> collect_errors, Namespace ns_first, Args... ns_args) { auto mode = ResolutionMode::Normal; if (has_opening_scope_resolution) { if (get_rust_edition () == Edition::E2015) mode = ResolutionMode::FromRoot; else mode = ResolutionMode::FromExtern; } return resolve_path (path_segments, mode, collect_errors, ns_first, ns_args...); } template tl::optional resolve_path (const std::vector &path_segments, bool has_opening_scope_resolution, Namespace ns_first, Args... ns_args) { return resolve_path (path_segments, has_opening_scope_resolution, tl::nullopt, ns_first, ns_args...); } template tl::optional resolve_path (const std::vector &path_segments, ResolutionMode mode, Namespace ns_first, Args... ns_args) { return resolve_path (path_segments, mode, tl::nullopt, ns_first, ns_args...); } template tl::optional resolve_path (const AST::SimplePath &path, Args &&...args) { return resolve_path (path.get_segments (), path.has_opening_scope_resolution (), std::forward (args)...); } template tl::optional resolve_path (const AST::PathInExpression &path, Args &&...args) { return resolve_path (path.get_segments (), path.opening_scope_resolution (), std::forward (args)...); } template tl::optional resolve_path (const AST::TypePath &path, Args &&...args) { return resolve_path (path.get_segments (), path.has_opening_scope_resolution_op (), std::forward (args)...); } private: /* Map of "usage" nodes which have been resolved to a "definition" node */ std::map resolved_nodes; }; } // namespace Resolver2_0 } // namespace Rust #endif // ! RUST_NAME_RESOLVER_2_0_H