| // Copyright 2024 The Abseil Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "absl/debugging/internal/demangle_rust.h" |
| |
| #include <cstddef> |
| #include <cstdint> |
| #include <cstring> |
| #include <limits> |
| |
| #include "absl/base/attributes.h" |
| #include "absl/base/config.h" |
| #include "absl/debugging/internal/decode_rust_punycode.h" |
| |
| namespace absl { |
| ABSL_NAMESPACE_BEGIN |
| namespace debugging_internal { |
| |
| namespace { |
| |
| // Same step limit as the C++ demangler in demangle.cc uses. |
| constexpr int kMaxReturns = 1 << 17; |
| |
| bool IsDigit(char c) { return '0' <= c && c <= '9'; } |
| bool IsLower(char c) { return 'a' <= c && c <= 'z'; } |
| bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; } |
| bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); } |
| bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; } |
| bool IsLowerHexDigit(char c) { return IsDigit(c) || ('a' <= c && c <= 'f'); } |
| |
| const char* BasicTypeName(char c) { |
| switch (c) { |
| case 'a': return "i8"; |
| case 'b': return "bool"; |
| case 'c': return "char"; |
| case 'd': return "f64"; |
| case 'e': return "str"; |
| case 'f': return "f32"; |
| case 'h': return "u8"; |
| case 'i': return "isize"; |
| case 'j': return "usize"; |
| case 'l': return "i32"; |
| case 'm': return "u32"; |
| case 'n': return "i128"; |
| case 'o': return "u128"; |
| case 'p': return "_"; |
| case 's': return "i16"; |
| case 't': return "u16"; |
| case 'u': return "()"; |
| case 'v': return "..."; |
| case 'x': return "i64"; |
| case 'y': return "u64"; |
| case 'z': return "!"; |
| } |
| return nullptr; |
| } |
| |
| // Parser for Rust symbol mangling v0, whose grammar is defined here: |
| // |
| // https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary |
| class RustSymbolParser { |
| public: |
| // Prepares to demangle the given encoding, a Rust symbol name starting with |
| // _R, into the output buffer [out, out_end). The caller is expected to |
| // continue by calling the new object's Parse function. |
| RustSymbolParser(const char* encoding, char* out, char* const out_end) |
| : encoding_(encoding), out_(out), out_end_(out_end) { |
| if (out_ != out_end_) *out_ = '\0'; |
| } |
| |
| // Parses the constructor's encoding argument, writing output into the range |
| // [out, out_end). Returns true on success and false for input whose |
| // structure was not recognized or exceeded implementation limits, such as by |
| // nesting structures too deep. In either case *this should not be used |
| // again. |
| ABSL_MUST_USE_RESULT bool Parse() && { |
| // Recursively parses the grammar production named by callee, then resumes |
| // execution at the next statement. |
| // |
| // Recursive-descent parsing is a beautifully readable translation of a |
| // grammar, but it risks stack overflow if implemented by naive recursion on |
| // the C++ call stack. So we simulate recursion by goto and switch instead, |
| // keeping a bounded stack of "return addresses" in the recursion_stack_ |
| // member. |
| // |
| // The callee argument is a statement label. We goto that label after |
| // saving the "return address" on recursion_stack_. The next continue |
| // statement in the for loop below "returns" from this "call". |
| // |
| // The caller argument names the return point. Each value of caller must |
| // appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the |
| // definition of enum ReturnAddress. The switch implements the control |
| // transfer from the end of a "called" subroutine back to the statement |
| // after the "call". |
| // |
| // Note that not all the grammar productions have to be packed into the |
| // switch, but only those which appear in a cycle in the grammar. Anything |
| // acyclic can be written as ordinary functions and function calls, e.g., |
| // ParseIdentifier. |
| #define ABSL_DEMANGLER_RECURSE(callee, caller) \ |
| do { \ |
| if (recursion_depth_ == kStackSize) return false; \ |
| /* The next continue will switch on this saved value ... */ \ |
| recursion_stack_[recursion_depth_++] = caller; \ |
| goto callee; \ |
| /* ... and will land here, resuming the suspended code. */ \ |
| case caller: {} \ |
| } while (0) |
| |
| // Parse the encoding, counting completed recursive calls to guard against |
| // excessively complex input and infinite-loop bugs. |
| int iter = 0; |
| goto whole_encoding; |
| for (; iter < kMaxReturns && recursion_depth_ > 0; ++iter) { |
| // This switch resumes the code path most recently suspended by |
| // ABSL_DEMANGLER_RECURSE. |
| switch (recursion_stack_[--recursion_depth_]) { |
| // |
| // symbol-name -> |
| // _R decimal-number? path instantiating-crate? vendor-specific-suffix? |
| whole_encoding: |
| if (!Eat('_') || !Eat('R')) return false; |
| // decimal-number? is always empty today, so proceed to path, which |
| // can't start with a decimal digit. |
| ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate); |
| if (IsAlpha(Peek())) { |
| ++silence_depth_; // Print nothing more from here on. |
| ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix); |
| } |
| switch (Take()) { |
| case '.': case '$': case '\0': return true; |
| } |
| return false; // unexpected trailing content |
| |
| // path -> crate-root | inherent-impl | trait-impl | trait-definition | |
| // nested-path | generic-args | backref |
| // |
| // Note that ABSL_DEMANGLER_RECURSE does not work inside a nested switch |
| // (which would hide the generated case label). Thus we jump out of the |
| // inner switch with gotos before performing any fake recursion. |
| path: |
| switch (Take()) { |
| case 'C': goto crate_root; |
| case 'M': goto inherent_impl; |
| case 'X': goto trait_impl; |
| case 'Y': goto trait_definition; |
| case 'N': goto nested_path; |
| case 'I': goto generic_args; |
| case 'B': goto path_backref; |
| default: return false; |
| } |
| |
| // crate-root -> C identifier (C consumed above) |
| crate_root: |
| if (!ParseIdentifier()) return false; |
| continue; |
| |
| // inherent-impl -> M impl-path type (M already consumed) |
| inherent_impl: |
| if (!Emit("<")) return false; |
| ABSL_DEMANGLER_RECURSE(impl_path, kInherentImplType); |
| ABSL_DEMANGLER_RECURSE(type, kInherentImplEnding); |
| if (!Emit(">")) return false; |
| continue; |
| |
| // trait-impl -> X impl-path type path (X already consumed) |
| trait_impl: |
| if (!Emit("<")) return false; |
| ABSL_DEMANGLER_RECURSE(impl_path, kTraitImplType); |
| ABSL_DEMANGLER_RECURSE(type, kTraitImplInfix); |
| if (!Emit(" as ")) return false; |
| ABSL_DEMANGLER_RECURSE(path, kTraitImplEnding); |
| if (!Emit(">")) return false; |
| continue; |
| |
| // impl-path -> disambiguator? path (but never print it!) |
| impl_path: |
| ++silence_depth_; |
| { |
| int ignored_disambiguator; |
| if (!ParseDisambiguator(ignored_disambiguator)) return false; |
| } |
| ABSL_DEMANGLER_RECURSE(path, kImplPathEnding); |
| --silence_depth_; |
| continue; |
| |
| // trait-definition -> Y type path (Y already consumed) |
| trait_definition: |
| if (!Emit("<")) return false; |
| ABSL_DEMANGLER_RECURSE(type, kTraitDefinitionInfix); |
| if (!Emit(" as ")) return false; |
| ABSL_DEMANGLER_RECURSE(path, kTraitDefinitionEnding); |
| if (!Emit(">")) return false; |
| continue; |
| |
| // nested-path -> N namespace path identifier (N already consumed) |
| // namespace -> lower | upper |
| nested_path: |
| // Uppercase namespaces must be saved on a stack so we can print |
| // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed. |
| if (IsUpper(Peek())) { |
| if (!PushNamespace(Take())) return false; |
| ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace); |
| if (!Emit("::")) return false; |
| if (!ParseIdentifier(PopNamespace())) return false; |
| continue; |
| } |
| |
| // Lowercase namespaces, however, are never represented in the output; |
| // they all emit just ::name. |
| if (IsLower(Take())) { |
| ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace); |
| if (!Emit("::")) return false; |
| if (!ParseIdentifier()) return false; |
| continue; |
| } |
| |
| // Neither upper or lower |
| return false; |
| |
| // type -> basic-type | array-type | slice-type | tuple-type | |
| // ref-type | mut-ref-type | const-ptr-type | mut-ptr-type | |
| // fn-type | dyn-trait-type | path | backref |
| // |
| // We use ifs instead of switch (Take()) because the default case jumps |
| // to path, which will need to see the first character not yet Taken |
| // from the input. Because we do not use a nested switch here, |
| // ABSL_DEMANGLER_RECURSE works fine in the 'S' case. |
| type: |
| if (IsLower(Peek())) { |
| const char* type_name = BasicTypeName(Take()); |
| if (type_name == nullptr || !Emit(type_name)) return false; |
| continue; |
| } |
| if (Eat('A')) { |
| // array-type = A type const |
| if (!Emit("[")) return false; |
| ABSL_DEMANGLER_RECURSE(type, kArraySize); |
| if (!Emit("; ")) return false; |
| ABSL_DEMANGLER_RECURSE(constant, kFinishArray); |
| if (!Emit("]")) return false; |
| continue; |
| } |
| if (Eat('S')) { |
| if (!Emit("[")) return false; |
| ABSL_DEMANGLER_RECURSE(type, kSliceEnding); |
| if (!Emit("]")) return false; |
| continue; |
| } |
| if (Eat('T')) goto tuple_type; |
| if (Eat('R')) { |
| if (!Emit("&")) return false; |
| if (!ParseOptionalLifetime()) return false; |
| goto type; |
| } |
| if (Eat('Q')) { |
| if (!Emit("&mut ")) return false; |
| if (!ParseOptionalLifetime()) return false; |
| goto type; |
| } |
| if (Eat('P')) { |
| if (!Emit("*const ")) return false; |
| goto type; |
| } |
| if (Eat('O')) { |
| if (!Emit("*mut ")) return false; |
| goto type; |
| } |
| if (Eat('F')) goto fn_type; |
| if (Eat('D')) goto dyn_trait_type; |
| if (Eat('B')) goto type_backref; |
| goto path; |
| |
| // tuple-type -> T type* E (T already consumed) |
| tuple_type: |
| if (!Emit("(")) return false; |
| |
| // The toolchain should call the unit type u instead of TE, but the |
| // grammar and other demanglers also recognize TE, so we do too. |
| if (Eat('E')) { |
| if (!Emit(")")) return false; |
| continue; |
| } |
| |
| // A tuple with one element is rendered (type,) instead of (type). |
| ABSL_DEMANGLER_RECURSE(type, kAfterFirstTupleElement); |
| if (Eat('E')) { |
| if (!Emit(",)")) return false; |
| continue; |
| } |
| |
| // A tuple with two elements is of course (x, y). |
| if (!Emit(", ")) return false; |
| ABSL_DEMANGLER_RECURSE(type, kAfterSecondTupleElement); |
| if (Eat('E')) { |
| if (!Emit(")")) return false; |
| continue; |
| } |
| |
| // And (x, y, z) for three elements. |
| if (!Emit(", ")) return false; |
| ABSL_DEMANGLER_RECURSE(type, kAfterThirdTupleElement); |
| if (Eat('E')) { |
| if (!Emit(")")) return false; |
| continue; |
| } |
| |
| // For longer tuples we write (x, y, z, ...), printing none of the |
| // content of the fourth and later types. Thus we avoid exhausting |
| // output buffers and human readers' patience when some library has a |
| // long tuple as an implementation detail, without having to |
| // completely obfuscate all tuples. |
| if (!Emit(", ...)")) return false; |
| ++silence_depth_; |
| while (!Eat('E')) { |
| ABSL_DEMANGLER_RECURSE(type, kAfterSubsequentTupleElement); |
| } |
| --silence_depth_; |
| continue; |
| |
| // fn-type -> F fn-sig (F already consumed) |
| // fn-sig -> binder? U? (K abi)? type* E type |
| // abi -> C | undisambiguated-identifier |
| // |
| // We follow the C++ demangler in suppressing details of function |
| // signatures. Every function type is rendered "fn...". |
| fn_type: |
| if (!Emit("fn...")) return false; |
| ++silence_depth_; |
| if (!ParseOptionalBinder()) return false; |
| (void)Eat('U'); |
| if (Eat('K')) { |
| if (!Eat('C') && !ParseUndisambiguatedIdentifier()) return false; |
| } |
| while (!Eat('E')) { |
| ABSL_DEMANGLER_RECURSE(type, kContinueParameterList); |
| } |
| ABSL_DEMANGLER_RECURSE(type, kFinishFn); |
| --silence_depth_; |
| continue; |
| |
| // dyn-trait-type -> D dyn-bounds lifetime (D already consumed) |
| // dyn-bounds -> binder? dyn-trait* E |
| // |
| // The grammar strangely allows an empty trait list, even though the |
| // compiler should never output one. We follow existing demanglers in |
| // rendering DEL_ as "dyn ". |
| // |
| // Because auto traits lengthen a type name considerably without |
| // providing much value to a search for related source code, it would be |
| // desirable to abbreviate |
| // dyn main::Trait + std::marker::Copy + std::marker::Send |
| // to |
| // dyn main::Trait + ..., |
| // eliding the auto traits. But it is difficult to do so correctly, in |
| // part because there is no guarantee that the mangling will list the |
| // main trait first. So we just print all the traits in their order of |
| // appearance in the mangled name. |
| dyn_trait_type: |
| if (!Emit("dyn ")) return false; |
| if (!ParseOptionalBinder()) return false; |
| if (!Eat('E')) { |
| ABSL_DEMANGLER_RECURSE(dyn_trait, kBeginAutoTraits); |
| while (!Eat('E')) { |
| if (!Emit(" + ")) return false; |
| ABSL_DEMANGLER_RECURSE(dyn_trait, kContinueAutoTraits); |
| } |
| } |
| if (!ParseRequiredLifetime()) return false; |
| continue; |
| |
| // dyn-trait -> path dyn-trait-assoc-binding* |
| // dyn-trait-assoc-binding -> p undisambiguated-identifier type |
| // |
| // We render nonempty binding lists as <>, omitting their contents as |
| // for generic-args. |
| dyn_trait: |
| ABSL_DEMANGLER_RECURSE(path, kContinueDynTrait); |
| if (Peek() == 'p') { |
| if (!Emit("<>")) return false; |
| ++silence_depth_; |
| while (Eat('p')) { |
| if (!ParseUndisambiguatedIdentifier()) return false; |
| ABSL_DEMANGLER_RECURSE(type, kContinueAssocBinding); |
| } |
| --silence_depth_; |
| } |
| continue; |
| |
| // const -> type const-data | p | backref |
| // |
| // const is a C++ keyword, so we use the label `constant` instead. |
| constant: |
| if (Eat('B')) goto const_backref; |
| if (Eat('p')) { |
| if (!Emit("_")) return false; |
| continue; |
| } |
| |
| // Scan the type without printing it. |
| // |
| // The Rust language restricts the type of a const generic argument |
| // much more than the mangling grammar does. We do not enforce this. |
| // |
| // We also do not bother printing false, true, 'A', and '\u{abcd}' for |
| // the types bool and char. Because we do not print generic-args |
| // contents, we expect to print constants only in array sizes, and |
| // those should not be bool or char. |
| ++silence_depth_; |
| ABSL_DEMANGLER_RECURSE(type, kConstData); |
| --silence_depth_; |
| |
| // const-data -> n? hex-digit* _ |
| // |
| // Although the grammar doesn't say this, existing demanglers expect |
| // that zero is 0, not an empty digit sequence, and no nonzero value |
| // may have leading zero digits. Also n0_ is accepted and printed as |
| // -0, though a toolchain will probably never write that encoding. |
| if (Eat('n') && !EmitChar('-')) return false; |
| if (!Emit("0x")) return false; |
| if (Eat('0')) { |
| if (!EmitChar('0')) return false; |
| if (!Eat('_')) return false; |
| continue; |
| } |
| while (IsLowerHexDigit(Peek())) { |
| if (!EmitChar(Take())) return false; |
| } |
| if (!Eat('_')) return false; |
| continue; |
| |
| // generic-args -> I path generic-arg* E (I already consumed) |
| // |
| // We follow the C++ demangler in omitting all the arguments from the |
| // output, printing only the list opening and closing tokens. |
| generic_args: |
| ABSL_DEMANGLER_RECURSE(path, kBeginGenericArgList); |
| if (!Emit("::<>")) return false; |
| ++silence_depth_; |
| while (!Eat('E')) { |
| ABSL_DEMANGLER_RECURSE(generic_arg, kContinueGenericArgList); |
| } |
| --silence_depth_; |
| continue; |
| |
| // generic-arg -> lifetime | type | K const |
| generic_arg: |
| if (Peek() == 'L') { |
| if (!ParseOptionalLifetime()) return false; |
| continue; |
| } |
| if (Eat('K')) goto constant; |
| goto type; |
| |
| // backref -> B base-62-number (B already consumed) |
| // |
| // The BeginBackref call parses and range-checks the base-62-number. We |
| // always do that much. |
| // |
| // The recursive call parses and prints what the backref points at. We |
| // save CPU and stack by skipping this work if the output would be |
| // suppressed anyway. |
| path_backref: |
| if (!BeginBackref()) return false; |
| if (silence_depth_ == 0) { |
| ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding); |
| } |
| EndBackref(); |
| continue; |
| |
| // This represents the same backref production as in path_backref but |
| // parses the target as a type instead of a path. |
| type_backref: |
| if (!BeginBackref()) return false; |
| if (silence_depth_ == 0) { |
| ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding); |
| } |
| EndBackref(); |
| continue; |
| |
| const_backref: |
| if (!BeginBackref()) return false; |
| if (silence_depth_ == 0) { |
| ABSL_DEMANGLER_RECURSE(constant, kConstantBackrefEnding); |
| } |
| EndBackref(); |
| continue; |
| } |
| } |
| |
| return false; // hit iteration limit or a bug in our stack handling |
| } |
| |
| private: |
| // Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls. |
| enum ReturnAddress : uint8_t { |
| kInstantiatingCrate, |
| kVendorSpecificSuffix, |
| kIdentifierInUppercaseNamespace, |
| kIdentifierInLowercaseNamespace, |
| kInherentImplType, |
| kInherentImplEnding, |
| kTraitImplType, |
| kTraitImplInfix, |
| kTraitImplEnding, |
| kImplPathEnding, |
| kTraitDefinitionInfix, |
| kTraitDefinitionEnding, |
| kArraySize, |
| kFinishArray, |
| kSliceEnding, |
| kAfterFirstTupleElement, |
| kAfterSecondTupleElement, |
| kAfterThirdTupleElement, |
| kAfterSubsequentTupleElement, |
| kContinueParameterList, |
| kFinishFn, |
| kBeginAutoTraits, |
| kContinueAutoTraits, |
| kContinueDynTrait, |
| kContinueAssocBinding, |
| kConstData, |
| kBeginGenericArgList, |
| kContinueGenericArgList, |
| kPathBackrefEnding, |
| kTypeBackrefEnding, |
| kConstantBackrefEnding, |
| }; |
| |
| // Element counts for the stack arrays. Larger stack sizes accommodate more |
| // deeply nested names at the cost of a larger footprint on the C++ call |
| // stack. |
| enum { |
| // Maximum recursive calls outstanding at one time. |
| kStackSize = 256, |
| |
| // Maximum N<uppercase> nested-paths open at once. We do not expect |
| // closures inside closures inside closures as much as functions inside |
| // modules inside other modules, so we can use a smaller array here. |
| kNamespaceStackSize = 64, |
| |
| // Maximum number of nested backrefs. We can keep this stack pretty small |
| // because we do not follow backrefs inside generic-args or other contexts |
| // that suppress printing, so deep stacking is unlikely in practice. |
| kPositionStackSize = 16, |
| }; |
| |
| // Returns the next input character without consuming it. |
| char Peek() const { return encoding_[pos_]; } |
| |
| // Consumes and returns the next input character. |
| char Take() { return encoding_[pos_++]; } |
| |
| // If the next input character is the given character, consumes it and returns |
| // true; otherwise returns false without consuming a character. |
| ABSL_MUST_USE_RESULT bool Eat(char want) { |
| if (encoding_[pos_] != want) return false; |
| ++pos_; |
| return true; |
| } |
| |
| // Provided there is enough remaining output space, appends c to the output, |
| // writing a fresh NUL terminator afterward, and returns true. Returns false |
| // if the output buffer had less than two bytes free. |
| ABSL_MUST_USE_RESULT bool EmitChar(char c) { |
| if (silence_depth_ > 0) return true; |
| if (out_end_ - out_ < 2) return false; |
| *out_++ = c; |
| *out_ = '\0'; |
| return true; |
| } |
| |
| // Provided there is enough remaining output space, appends the C string token |
| // to the output, followed by a NUL character, and returns true. Returns |
| // false if not everything fit into the output buffer. |
| ABSL_MUST_USE_RESULT bool Emit(const char* token) { |
| if (silence_depth_ > 0) return true; |
| const size_t token_length = std::strlen(token); |
| const size_t bytes_to_copy = token_length + 1; // token and final NUL |
| if (static_cast<size_t>(out_end_ - out_) < bytes_to_copy) return false; |
| std::memcpy(out_, token, bytes_to_copy); |
| out_ += token_length; |
| return true; |
| } |
| |
| // Provided there is enough remaining output space, appends the decimal form |
| // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the |
| // output, followed by a NUL character, and returns true. Returns false if |
| // not everything fit into the output buffer. |
| ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) { |
| if (disambiguator < 0) return EmitChar('?'); // parsed but too large |
| if (disambiguator == 0) return EmitChar('0'); |
| // Convert disambiguator to decimal text. Three digits per byte is enough |
| // because 999 > 256. The bound will remain correct even if future |
| // maintenance changes the type of the disambiguator variable. |
| char digits[3 * sizeof(disambiguator)] = {}; |
| size_t leading_digit_index = sizeof(digits) - 1; |
| for (; disambiguator > 0; disambiguator /= 10) { |
| digits[--leading_digit_index] = |
| static_cast<char>('0' + disambiguator % 10); |
| } |
| return Emit(digits + leading_digit_index); |
| } |
| |
| // Consumes an optional disambiguator (s123_) from the input. |
| // |
| // On success returns true and fills value with the encoded value if it was |
| // not too big, otherwise with -1. If the optional disambiguator was omitted, |
| // value is 0. On parse failure returns false and sets value to -1. |
| ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) { |
| value = -1; |
| |
| // disambiguator = s base-62-number |
| // |
| // Disambiguators are optional. An omitted disambiguator is zero. |
| if (!Eat('s')) { |
| value = 0; |
| return true; |
| } |
| int base_62_value = 0; |
| if (!ParseBase62Number(base_62_value)) return false; |
| value = base_62_value < 0 ? -1 : base_62_value + 1; |
| return true; |
| } |
| |
| // Consumes a base-62 number like _ or 123_ from the input. |
| // |
| // On success returns true and fills value with the encoded value if it was |
| // not too big, otherwise with -1. On parse failure returns false and sets |
| // value to -1. |
| ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) { |
| value = -1; |
| |
| // base-62-number = (digit | lower | upper)* _ |
| // |
| // An empty base-62 digit sequence means 0. |
| if (Eat('_')) { |
| value = 0; |
| return true; |
| } |
| |
| // A nonempty digit sequence denotes its base-62 value plus 1. |
| int encoded_number = 0; |
| bool overflowed = false; |
| while (IsAlpha(Peek()) || IsDigit(Peek())) { |
| const char c = Take(); |
| if (encoded_number >= std::numeric_limits<int>::max()/62) { |
| // If we are close to overflowing an int, keep parsing but stop updating |
| // encoded_number and remember to return -1 at the end. The point is to |
| // avoid undefined behavior while parsing crate-root disambiguators, |
| // which are large in practice but not shown in demangling, while |
| // successfully computing closure and shim disambiguators, which are |
| // typically small and are printed out. |
| overflowed = true; |
| } else { |
| int digit; |
| if (IsDigit(c)) { |
| digit = c - '0'; |
| } else if (IsLower(c)) { |
| digit = c - 'a' + 10; |
| } else { |
| digit = c - 'A' + 36; |
| } |
| encoded_number = 62 * encoded_number + digit; |
| } |
| } |
| |
| if (!Eat('_')) return false; |
| if (!overflowed) value = encoded_number + 1; |
| return true; |
| } |
| |
| // Consumes an identifier from the input, returning true on success. |
| // |
| // A nonzero uppercase_namespace specifies the character after the N in a |
| // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to |
| // write out the name with the conventional decoration for that namespace. |
| ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') { |
| // identifier -> disambiguator? undisambiguated-identifier |
| int disambiguator = 0; |
| if (!ParseDisambiguator(disambiguator)) return false; |
| |
| return ParseUndisambiguatedIdentifier(uppercase_namespace, disambiguator); |
| } |
| |
| // Consumes from the input an identifier with no preceding disambiguator, |
| // returning true on success. |
| // |
| // When ParseIdentifier calls this, it passes the N<namespace> character and |
| // disambiguator value so that "{closure#42}" and similar forms can be |
| // rendered correctly. |
| // |
| // At other appearances of undisambiguated-identifier in the grammar, this |
| // treatment is not applicable, and the call site omits both arguments. |
| ABSL_MUST_USE_RESULT bool ParseUndisambiguatedIdentifier( |
| char uppercase_namespace = '\0', int disambiguator = 0) { |
| // undisambiguated-identifier -> u? decimal-number _? bytes |
| const bool is_punycoded = Eat('u'); |
| if (!IsDigit(Peek())) return false; |
| int num_bytes = 0; |
| if (!ParseDecimalNumber(num_bytes)) return false; |
| (void)Eat('_'); // optional separator, needed if a digit follows |
| if (is_punycoded) { |
| DecodeRustPunycodeOptions options; |
| options.punycode_begin = &encoding_[pos_]; |
| options.punycode_end = &encoding_[pos_] + num_bytes; |
| options.out_begin = out_; |
| options.out_end = out_end_; |
| out_ = DecodeRustPunycode(options); |
| if (out_ == nullptr) return false; |
| pos_ += static_cast<size_t>(num_bytes); |
| } |
| |
| // Emit the beginnings of braced forms like {shim:vtable#0}. |
| if (uppercase_namespace != '\0') { |
| switch (uppercase_namespace) { |
| case 'C': |
| if (!Emit("{closure")) return false; |
| break; |
| case 'S': |
| if (!Emit("{shim")) return false; |
| break; |
| default: |
| if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false; |
| break; |
| } |
| if (num_bytes > 0 && !Emit(":")) return false; |
| } |
| |
| // Emit the name itself. |
| if (!is_punycoded) { |
| for (int i = 0; i < num_bytes; ++i) { |
| const char c = Take(); |
| if (!IsIdentifierChar(c) && |
| // The spec gives toolchains the choice of Punycode or raw UTF-8 for |
| // identifiers containing code points above 0x7f, so accept bytes |
| // with the high bit set. |
| (c & 0x80) == 0) { |
| return false; |
| } |
| if (!EmitChar(c)) return false; |
| } |
| } |
| |
| // Emit the endings of braced forms, e.g., "#42}". |
| if (uppercase_namespace != '\0') { |
| if (!EmitChar('#')) return false; |
| if (!EmitDisambiguator(disambiguator)) return false; |
| if (!EmitChar('}')) return false; |
| } |
| |
| return true; |
| } |
| |
| // Consumes a decimal number like 0 or 123 from the input. On success returns |
| // true and fills value with the encoded value. If the encoded value is too |
| // large or otherwise unparsable, returns false and sets value to -1. |
| ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) { |
| value = -1; |
| if (!IsDigit(Peek())) return false; |
| int encoded_number = Take() - '0'; |
| if (encoded_number == 0) { |
| // Decimal numbers are never encoded with extra leading zeroes. |
| value = 0; |
| return true; |
| } |
| while (IsDigit(Peek()) && |
| // avoid overflow |
| encoded_number < std::numeric_limits<int>::max()/10) { |
| encoded_number = 10 * encoded_number + (Take() - '0'); |
| } |
| if (IsDigit(Peek())) return false; // too big |
| value = encoded_number; |
| return true; |
| } |
| |
| // Consumes a binder of higher-ranked lifetimes if one is present. On success |
| // returns true and discards the encoded lifetime count. On parse failure |
| // returns false. |
| ABSL_MUST_USE_RESULT bool ParseOptionalBinder() { |
| // binder -> G base-62-number |
| if (!Eat('G')) return true; |
| int ignored_binding_count; |
| return ParseBase62Number(ignored_binding_count); |
| } |
| |
| // Consumes a lifetime if one is present. |
| // |
| // On success returns true and discards the lifetime index. We do not print |
| // or even range-check lifetimes because they are a finer detail than other |
| // things we omit from output, such as the entire contents of generic-args. |
| // |
| // On parse failure returns false. |
| ABSL_MUST_USE_RESULT bool ParseOptionalLifetime() { |
| // lifetime -> L base-62-number |
| if (!Eat('L')) return true; |
| int ignored_de_bruijn_index; |
| return ParseBase62Number(ignored_de_bruijn_index); |
| } |
| |
| // Consumes a lifetime just like ParseOptionalLifetime, but returns false if |
| // there is no lifetime here. |
| ABSL_MUST_USE_RESULT bool ParseRequiredLifetime() { |
| if (Peek() != 'L') return false; |
| return ParseOptionalLifetime(); |
| } |
| |
| // Pushes ns onto the namespace stack and returns true if the stack is not |
| // full, else returns false. |
| ABSL_MUST_USE_RESULT bool PushNamespace(char ns) { |
| if (namespace_depth_ == kNamespaceStackSize) return false; |
| namespace_stack_[namespace_depth_++] = ns; |
| return true; |
| } |
| |
| // Pops the last pushed namespace. Requires that the namespace stack is not |
| // empty (namespace_depth_ > 0). |
| char PopNamespace() { return namespace_stack_[--namespace_depth_]; } |
| |
| // Pushes position onto the position stack and returns true if the stack is |
| // not full, else returns false. |
| ABSL_MUST_USE_RESULT bool PushPosition(int position) { |
| if (position_depth_ == kPositionStackSize) return false; |
| position_stack_[position_depth_++] = position; |
| return true; |
| } |
| |
| // Pops the last pushed input position. Requires that the position stack is |
| // not empty (position_depth_ > 0). |
| int PopPosition() { return position_stack_[--position_depth_]; } |
| |
| // Consumes a base-62-number denoting a backref target, pushes the current |
| // input position on the data stack, and sets the input position to the |
| // beginning of the backref target. Returns true on success. Returns false |
| // if parsing failed, the stack is exhausted, or the backref target position |
| // is out of range. |
| ABSL_MUST_USE_RESULT bool BeginBackref() { |
| // backref = B base-62-number (B already consumed) |
| // |
| // Reject backrefs that don't parse, overflow int, or don't point backward. |
| // If the offset looks fine, adjust it to account for the _R prefix. |
| int offset = 0; |
| const int offset_of_this_backref = |
| pos_ - 2 /* _R */ - 1 /* B already consumed */; |
| if (!ParseBase62Number(offset) || offset < 0 || |
| offset >= offset_of_this_backref) { |
| return false; |
| } |
| offset += 2; |
| |
| // Save the old position to restore later. |
| if (!PushPosition(pos_)) return false; |
| |
| // Move the input position to the backref target. |
| // |
| // Note that we do not check whether the new position points to the |
| // beginning of a construct matching the context in which the backref |
| // appeared. We just jump to it and see whether nested parsing succeeds. |
| // We therefore accept various wrong manglings, e.g., a type backref |
| // pointing to an 'l' character inside an identifier, which happens to mean |
| // i32 when parsed as a type mangling. This saves the complexity and RAM |
| // footprint of remembering which offsets began which kinds of |
| // substructures. Existing demanglers take similar shortcuts. |
| pos_ = offset; |
| return true; |
| } |
| |
| // Cleans up after a backref production by restoring the previous input |
| // position from the data stack. |
| void EndBackref() { pos_ = PopPosition(); } |
| |
| // The leftmost recursion_depth_ elements of recursion_stack_ contain the |
| // ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE calls not yet completed. |
| ReturnAddress recursion_stack_[kStackSize] = {}; |
| int recursion_depth_ = 0; |
| |
| // The leftmost namespace_depth_ elements of namespace_stack_ contain the |
| // uppercase namespace identifiers for open nested-paths, e.g., 'C' for a |
| // closure. |
| char namespace_stack_[kNamespaceStackSize] = {}; |
| int namespace_depth_ = 0; |
| |
| // The leftmost position_depth_ elements of position_stack_ contain the input |
| // positions to return to after fully printing the targets of backrefs. |
| int position_stack_[kPositionStackSize] = {}; |
| int position_depth_ = 0; |
| |
| // Anything parsed while silence_depth_ > 0 contributes nothing to the |
| // demangled output. For constructs omitted from the demangling, such as |
| // impl-path and the contents of generic-args, we will increment |
| // silence_depth_ on the way in and decrement silence_depth_ on the way out. |
| int silence_depth_ = 0; |
| |
| // Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is |
| // the next input character to be scanned. |
| int pos_ = 0; |
| const char* encoding_ = nullptr; |
| |
| // Output: *out_ is where the next output character should be written, and |
| // out_end_ points past the last byte of available space. |
| char* out_ = nullptr; |
| char* out_end_ = nullptr; |
| }; |
| |
| } // namespace |
| |
| bool DemangleRustSymbolEncoding(const char* mangled, char* out, |
| size_t out_size) { |
| return RustSymbolParser(mangled, out, out + out_size).Parse(); |
| } |
| |
| } // namespace debugging_internal |
| ABSL_NAMESPACE_END |
| } // namespace absl |