| // Protocol Buffers - Google's data interchange format |
| // Copyright 2008 Google Inc. All rights reserved. |
| // |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file or at |
| // https://developers.google.com/open-source/licenses/bsd |
| |
| // Author: kenton@google.com (Kenton Varda) |
| // Based on original Protocol Buffers design by |
| // Sanjay Ghemawat, Jeff Dean, and others. |
| |
| #include "google/protobuf/compiler/csharp/csharp_helpers.h" |
| |
| #include <algorithm> |
| #include <limits> |
| #include <sstream> |
| #include <string> |
| #include <vector> |
| |
| #include "absl/container/flat_hash_set.h" |
| #include "absl/log/absl_log.h" |
| #include "absl/strings/ascii.h" |
| #include "absl/strings/str_replace.h" |
| #include "absl/strings/string_view.h" |
| #include "google/protobuf/compiler/csharp/csharp_enum_field.h" |
| #include "google/protobuf/compiler/csharp/csharp_field_base.h" |
| #include "google/protobuf/compiler/csharp/csharp_generator.h" |
| #include "google/protobuf/compiler/csharp/csharp_map_field.h" |
| #include "google/protobuf/compiler/csharp/csharp_message_field.h" |
| #include "google/protobuf/compiler/csharp/csharp_options.h" |
| #include "google/protobuf/compiler/csharp/csharp_primitive_field.h" |
| #include "google/protobuf/compiler/csharp/csharp_repeated_enum_field.h" |
| #include "google/protobuf/compiler/csharp/csharp_repeated_message_field.h" |
| #include "google/protobuf/compiler/csharp/csharp_repeated_primitive_field.h" |
| #include "google/protobuf/compiler/csharp/csharp_wrapper_field.h" |
| #include "google/protobuf/compiler/csharp/names.h" |
| #include "google/protobuf/compiler/retention.h" |
| #include "google/protobuf/descriptor.h" |
| #include "google/protobuf/descriptor.pb.h" |
| |
| // Must be last. |
| #include "google/protobuf/port_def.inc" |
| |
| namespace google { |
| namespace protobuf { |
| namespace compiler { |
| namespace csharp { |
| |
| CSharpType GetCSharpType(FieldDescriptor::Type type) { |
| switch (type) { |
| case FieldDescriptor::TYPE_INT32: |
| return CSHARPTYPE_INT32; |
| case FieldDescriptor::TYPE_INT64: |
| return CSHARPTYPE_INT64; |
| case FieldDescriptor::TYPE_UINT32: |
| return CSHARPTYPE_UINT32; |
| case FieldDescriptor::TYPE_UINT64: |
| return CSHARPTYPE_UINT32; |
| case FieldDescriptor::TYPE_SINT32: |
| return CSHARPTYPE_INT32; |
| case FieldDescriptor::TYPE_SINT64: |
| return CSHARPTYPE_INT64; |
| case FieldDescriptor::TYPE_FIXED32: |
| return CSHARPTYPE_UINT32; |
| case FieldDescriptor::TYPE_FIXED64: |
| return CSHARPTYPE_UINT64; |
| case FieldDescriptor::TYPE_SFIXED32: |
| return CSHARPTYPE_INT32; |
| case FieldDescriptor::TYPE_SFIXED64: |
| return CSHARPTYPE_INT64; |
| case FieldDescriptor::TYPE_FLOAT: |
| return CSHARPTYPE_FLOAT; |
| case FieldDescriptor::TYPE_DOUBLE: |
| return CSHARPTYPE_DOUBLE; |
| case FieldDescriptor::TYPE_BOOL: |
| return CSHARPTYPE_BOOL; |
| case FieldDescriptor::TYPE_ENUM: |
| return CSHARPTYPE_ENUM; |
| case FieldDescriptor::TYPE_STRING: |
| return CSHARPTYPE_STRING; |
| case FieldDescriptor::TYPE_BYTES: |
| return CSHARPTYPE_BYTESTRING; |
| case FieldDescriptor::TYPE_GROUP: |
| return CSHARPTYPE_MESSAGE; |
| case FieldDescriptor::TYPE_MESSAGE: |
| return CSHARPTYPE_MESSAGE; |
| |
| // No default because we want the compiler to complain if any new |
| // types are added. |
| } |
| ABSL_LOG(FATAL) << "Can't get here."; |
| return (CSharpType)-1; |
| } |
| |
| // Convert a string which is expected to be SHOUTY_CASE (but may not be |
| // *precisely* shouty) into a PascalCase string. Precise rules implemented: |
| |
| // Previous input character Current character Case |
| // Any Non-alphanumeric Skipped |
| // None - first char of input Alphanumeric Upper |
| // Non-letter (e.g. _ or 1) Alphanumeric Upper |
| // Numeric Alphanumeric Upper |
| // Lower letter Alphanumeric Same as current |
| // Upper letter Alphanumeric Lower |
| std::string ShoutyToPascalCase(absl::string_view input) { |
| std::string result; |
| // Simple way of implementing "always start with upper" |
| char previous = '_'; |
| for (int i = 0; i < input.size(); i++) { |
| char current = input[i]; |
| if (!absl::ascii_isalnum(current)) { |
| previous = current; |
| continue; |
| } |
| if (!absl::ascii_isalnum(previous)) { |
| result += absl::ascii_toupper(current); |
| } else if (absl::ascii_isdigit(previous)) { |
| result += absl::ascii_toupper(current); |
| } else if (absl::ascii_islower(previous)) { |
| result += current; |
| } else { |
| result += absl::ascii_tolower(current); |
| } |
| previous = current; |
| } |
| return result; |
| } |
| |
| // Attempt to remove a prefix from a value, ignoring casing and skipping |
| // underscores. (foo, foo_bar) => bar - underscore after prefix is skipped (FOO, |
| // foo_bar) => bar - casing is ignored (foo_bar, foobarbaz) => baz - underscore |
| // in prefix is ignored (foobar, foo_barbaz) => baz - underscore in value is |
| // ignored (foo, bar) => bar - prefix isn't matched; return original value |
| std::string TryRemovePrefix(absl::string_view prefix, absl::string_view value) { |
| // First normalize to a lower-case no-underscores prefix to match against |
| std::string prefix_to_match = ""; |
| for (size_t i = 0; i < prefix.size(); i++) { |
| if (prefix[i] != '_') { |
| prefix_to_match += absl::ascii_tolower(prefix[i]); |
| } |
| } |
| |
| // This keeps track of how much of value we've consumed |
| size_t prefix_index, value_index; |
| for (prefix_index = 0, value_index = 0; |
| prefix_index < prefix_to_match.size() && value_index < value.size(); |
| value_index++) { |
| // Skip over underscores in the value |
| if (value[value_index] == '_') { |
| continue; |
| } |
| if (absl::ascii_tolower(value[value_index]) != |
| prefix_to_match[prefix_index++]) { |
| // Failed to match the prefix - bail out early. |
| return std::string(value); |
| } |
| } |
| |
| // If we didn't finish looking through the prefix, we can't strip it. |
| if (prefix_index < prefix_to_match.size()) { |
| return std::string(value); |
| } |
| |
| // Step over any underscores after the prefix |
| while (value_index < value.size() && value[value_index] == '_') { |
| value_index++; |
| } |
| |
| // If there's nothing left (e.g. it was a prefix with only underscores |
| // afterwards), don't strip. |
| if (value_index == value.size()) { |
| return std::string(value); |
| } |
| |
| return std::string(value.substr(value_index)); |
| } |
| |
| // Format the enum value name in a pleasant way for C#: |
| // - Strip the enum name as a prefix if possible |
| // - Convert to PascalCase. |
| // For example, an enum called Color with a value of COLOR_BLUE should |
| // result in an enum value in C# called just Blue |
| std::string GetEnumValueName(absl::string_view enum_name, |
| absl::string_view enum_value_name) { |
| std::string stripped = TryRemovePrefix(enum_name, enum_value_name); |
| std::string result = ShoutyToPascalCase(stripped); |
| // Just in case we have an enum name of FOO and a value of FOO_2... make sure |
| // the returned string is a valid identifier. |
| if (absl::ascii_isdigit(result[0])) { |
| return absl::StrCat("_", result); |
| } |
| return result; |
| } |
| |
| std::string GetFullExtensionName(const FieldDescriptor* descriptor) { |
| if (descriptor->extension_scope()) { |
| return absl::StrCat(GetClassName(descriptor->extension_scope()), |
| ".Extensions.", GetPropertyName(descriptor)); |
| } |
| |
| return absl::StrCat(GetExtensionClassUnqualifiedName(descriptor->file()), ".", |
| GetPropertyName(descriptor)); |
| } |
| |
| // Groups in proto2 are hacky: The name of the field is just the lower-cased |
| // name of the group type. In C#, though, we would like to retain the original |
| // capitalization of the type name. Fields with an encoding of "delimited" in |
| // editions are like groups, but have a real name, so we use that. |
| std::string GetFieldName(const FieldDescriptor* descriptor) { |
| if (internal::cpp::IsGroupLike(*descriptor)) { |
| return descriptor->message_type()->name(); |
| } else { |
| return descriptor->name(); |
| } |
| } |
| |
| std::string GetFieldConstantName(const FieldDescriptor* field) { |
| return absl::StrCat(GetPropertyName(field), "FieldNumber"); |
| } |
| |
| std::string GetPropertyName(const FieldDescriptor* descriptor) { |
| // Names of members declared or overridden in the message. |
| static const auto& reserved_member_names = |
| *new absl::flat_hash_set<absl::string_view>( |
| {"Types", "Descriptor", "Equals", "ToString", "GetHashCode", |
| "WriteTo", "Clone", "CalculateSize", "MergeFrom", "OnConstruction", |
| "Parser"}); |
| |
| // TODO: consider introducing csharp_property_name field option |
| std::string property_name = UnderscoresToPascalCase(GetFieldName(descriptor)); |
| // Avoid either our own type name or reserved names. |
| // There are various ways of ending up with naming collisions, but we try to |
| // avoid obvious ones. In particular, we avoid the names of all the members we |
| // generate. Note that we *don't* add an underscore for MemberwiseClone or |
| // GetType. Those generate warnings, but not errors; changing the name now |
| // could be a breaking change. |
| if (property_name == descriptor->containing_type()->name() || |
| reserved_member_names.find(property_name) != |
| reserved_member_names.end()) { |
| absl::StrAppend(&property_name, "_"); |
| } |
| return property_name; |
| } |
| |
| std::string GetOneofCaseName(const FieldDescriptor* descriptor) { |
| // The name in a oneof case enum is the same as for the property, but as we |
| // always have a "None" value as well, we need to reserve that by appending an |
| // underscore. |
| std::string property_name = GetPropertyName(descriptor); |
| return property_name == "None" ? "None_" : property_name; |
| } |
| |
| // TODO: c&p from Java protoc plugin |
| // For encodings with fixed sizes, returns that size in bytes. Otherwise |
| // returns -1. |
| int GetFixedSize(FieldDescriptor::Type type) { |
| switch (type) { |
| case FieldDescriptor::TYPE_INT32: |
| return -1; |
| case FieldDescriptor::TYPE_INT64: |
| return -1; |
| case FieldDescriptor::TYPE_UINT32: |
| return -1; |
| case FieldDescriptor::TYPE_UINT64: |
| return -1; |
| case FieldDescriptor::TYPE_SINT32: |
| return -1; |
| case FieldDescriptor::TYPE_SINT64: |
| return -1; |
| case FieldDescriptor::TYPE_FIXED32: |
| return internal::WireFormatLite::kFixed32Size; |
| case FieldDescriptor::TYPE_FIXED64: |
| return internal::WireFormatLite::kFixed64Size; |
| case FieldDescriptor::TYPE_SFIXED32: |
| return internal::WireFormatLite::kSFixed32Size; |
| case FieldDescriptor::TYPE_SFIXED64: |
| return internal::WireFormatLite::kSFixed64Size; |
| case FieldDescriptor::TYPE_FLOAT: |
| return internal::WireFormatLite::kFloatSize; |
| case FieldDescriptor::TYPE_DOUBLE: |
| return internal::WireFormatLite::kDoubleSize; |
| |
| case FieldDescriptor::TYPE_BOOL: |
| return internal::WireFormatLite::kBoolSize; |
| case FieldDescriptor::TYPE_ENUM: |
| return -1; |
| |
| case FieldDescriptor::TYPE_STRING: |
| return -1; |
| case FieldDescriptor::TYPE_BYTES: |
| return -1; |
| case FieldDescriptor::TYPE_GROUP: |
| return -1; |
| case FieldDescriptor::TYPE_MESSAGE: |
| return -1; |
| |
| // No default because we want the compiler to complain if any new |
| // types are added. |
| } |
| ABSL_LOG(FATAL) << "Can't get here."; |
| return -1; |
| } |
| |
| static const char base64_chars[] = |
| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
| |
| std::string StringToBase64(absl::string_view input) { |
| std::string result; |
| size_t remaining = input.size(); |
| const unsigned char* src = (const unsigned char*)input.data(); |
| while (remaining > 2) { |
| result += base64_chars[src[0] >> 2]; |
| result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)]; |
| result += base64_chars[((src[1] & 0xf) << 2) | (src[2] >> 6)]; |
| result += base64_chars[src[2] & 0x3f]; |
| remaining -= 3; |
| src += 3; |
| } |
| switch (remaining) { |
| case 2: |
| result += base64_chars[src[0] >> 2]; |
| result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)]; |
| result += base64_chars[(src[1] & 0xf) << 2]; |
| result += '='; |
| src += 2; |
| break; |
| case 1: |
| result += base64_chars[src[0] >> 2]; |
| result += base64_chars[((src[0] & 0x3) << 4)]; |
| result += '='; |
| result += '='; |
| src += 1; |
| break; |
| } |
| return result; |
| } |
| |
| std::string FileDescriptorToBase64(const FileDescriptor* descriptor) { |
| std::string fdp_bytes; |
| FileDescriptorProto fdp = StripSourceRetentionOptions(*descriptor); |
| fdp.SerializeToString(&fdp_bytes); |
| return StringToBase64(fdp_bytes); |
| } |
| |
| FieldGeneratorBase* CreateFieldGenerator(const FieldDescriptor* descriptor, |
| int presenceIndex, |
| const Options* options) { |
| switch (descriptor->type()) { |
| case FieldDescriptor::TYPE_GROUP: |
| case FieldDescriptor::TYPE_MESSAGE: |
| if (descriptor->is_repeated()) { |
| if (descriptor->is_map()) { |
| return new MapFieldGenerator(descriptor, presenceIndex, options); |
| } else { |
| return new RepeatedMessageFieldGenerator(descriptor, presenceIndex, |
| options); |
| } |
| } else { |
| if (IsWrapperType(descriptor)) { |
| if (descriptor->real_containing_oneof()) { |
| return new WrapperOneofFieldGenerator(descriptor, presenceIndex, |
| options); |
| } else { |
| return new WrapperFieldGenerator(descriptor, presenceIndex, |
| options); |
| } |
| } else { |
| if (descriptor->real_containing_oneof()) { |
| return new MessageOneofFieldGenerator(descriptor, presenceIndex, |
| options); |
| } else { |
| return new MessageFieldGenerator(descriptor, presenceIndex, |
| options); |
| } |
| } |
| } |
| case FieldDescriptor::TYPE_ENUM: |
| if (descriptor->is_repeated()) { |
| return new RepeatedEnumFieldGenerator(descriptor, presenceIndex, |
| options); |
| } else { |
| if (descriptor->real_containing_oneof()) { |
| return new EnumOneofFieldGenerator(descriptor, presenceIndex, |
| options); |
| } else { |
| return new EnumFieldGenerator(descriptor, presenceIndex, options); |
| } |
| } |
| default: |
| if (descriptor->is_repeated()) { |
| return new RepeatedPrimitiveFieldGenerator(descriptor, presenceIndex, |
| options); |
| } else { |
| if (descriptor->real_containing_oneof()) { |
| return new PrimitiveOneofFieldGenerator(descriptor, presenceIndex, |
| options); |
| } else { |
| return new PrimitiveFieldGenerator(descriptor, presenceIndex, |
| options); |
| } |
| } |
| } |
| } |
| |
| bool IsNullable(const FieldDescriptor* descriptor) { |
| if (descriptor->is_repeated()) { |
| return true; |
| } |
| |
| switch (descriptor->type()) { |
| case FieldDescriptor::TYPE_ENUM: |
| case FieldDescriptor::TYPE_DOUBLE: |
| case FieldDescriptor::TYPE_FLOAT: |
| case FieldDescriptor::TYPE_INT64: |
| case FieldDescriptor::TYPE_UINT64: |
| case FieldDescriptor::TYPE_INT32: |
| case FieldDescriptor::TYPE_FIXED64: |
| case FieldDescriptor::TYPE_FIXED32: |
| case FieldDescriptor::TYPE_BOOL: |
| case FieldDescriptor::TYPE_UINT32: |
| case FieldDescriptor::TYPE_SFIXED32: |
| case FieldDescriptor::TYPE_SFIXED64: |
| case FieldDescriptor::TYPE_SINT32: |
| case FieldDescriptor::TYPE_SINT64: |
| return false; |
| |
| case FieldDescriptor::TYPE_MESSAGE: |
| case FieldDescriptor::TYPE_GROUP: |
| case FieldDescriptor::TYPE_STRING: |
| case FieldDescriptor::TYPE_BYTES: |
| return true; |
| |
| default: |
| ABSL_LOG(FATAL) << "Unknown field type."; |
| return true; |
| } |
| } |
| |
| } // namespace csharp |
| } // namespace compiler |
| } // namespace protobuf |
| } // namespace google |
| |
| #include "google/protobuf/port_undef.inc" |