blob: 0bb4d0336ed8a72b7d177ba738f4c82455cd1e56 [file] [log] [blame]
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "google/protobuf/compiler/retention.h"
#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/container/flat_hash_set.h"
#include "absl/strings/match.h"
#include "absl/types/span.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/dynamic_message.h"
namespace google {
namespace protobuf {
namespace compiler {
namespace {
bool IsOptionsProto(const Message& m) {
const Descriptor* descriptor = m.GetDescriptor();
return descriptor->file()->name() ==
DescriptorProto::descriptor()->file()->name() &&
absl::EndsWith(descriptor->name(), "Options");
}
bool IsEmpty(const Message& m) { return m.ByteSizeLong() == 0; }
// Recursively strips any options with source retention from the message. If
// stripped_paths is not null, then this function will populate it with the
// paths that were stripped, using the path format from
// SourceCodeInfo.Location. The path parameter is used as a stack tracking the
// path to the current location.
void StripMessage(Message& m, std::vector<int>& path,
std::vector<std::vector<int>>* stripped_paths) {
const Reflection* reflection = m.GetReflection();
std::vector<const FieldDescriptor*> fields;
reflection->ListFields(m, &fields);
for (const FieldDescriptor* field : fields) {
path.push_back(field->number());
if (field->options().retention() == FieldOptions::RETENTION_SOURCE) {
reflection->ClearField(&m, field);
if (stripped_paths != nullptr) {
stripped_paths->push_back(path);
}
} else if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
if (field->is_repeated()) {
int field_size = reflection->FieldSize(m, field);
for (int i = 0; i < field_size; ++i) {
path.push_back(i);
StripMessage(*reflection->MutableRepeatedMessage(&m, field, i), path,
stripped_paths);
path.pop_back();
}
} else {
Message* child = reflection->MutableMessage(&m, field);
bool was_nonempty_options_proto =
IsOptionsProto(*child) && !IsEmpty(*child);
StripMessage(*child, path, stripped_paths);
// If this is an options message that became empty due to retention
// stripping, remove it.
if (was_nonempty_options_proto && IsEmpty(*child)) {
reflection->ClearField(&m, field);
if (stripped_paths != nullptr) {
stripped_paths->push_back(path);
}
}
}
}
path.pop_back();
}
}
// Converts the descriptor to a dynamic message if necessary, and then strips
// out all source-retention options.
//
// The options message may have custom options set on it, and these would
// ordinarily appear as unknown fields since they are not linked into protoc.
// Using a dynamic message allows us to see these custom options. To convert
// back and forth between the generated type and the dynamic message, we have
// to serialize one and parse that into the other.
//
// If stripped_paths is not null, it will be populated with the paths that were
// stripped, using the path format from SourceCodeInfo.Location.
void ConvertToDynamicMessageAndStripOptions(
Message& m, const DescriptorPool& pool,
std::vector<std::vector<int>>* stripped_paths = nullptr) {
// We need to look up the descriptor in the pool so that we can get a
// descriptor which knows about any custom options that were used in the
// .proto file.
const Descriptor* descriptor = pool.FindMessageTypeByName(m.GetTypeName());
std::vector<int> path;
if (descriptor == nullptr || &pool == DescriptorPool::generated_pool()) {
// If the pool does not contain the descriptor, then this proto file does
// not transitively depend on descriptor.proto, in which case we know there
// are no custom options to worry about. If we are working with the
// generated pool, then we can still access any custom options without
// having to resort to DynamicMessage.
StripMessage(m, path, stripped_paths);
} else {
// To convert to a dynamic message, we need to serialize the original
// descriptor and parse it back again. This can fail if the descriptor is
// invalid, so in that case we try to handle it gracefully by stripping the
// original descriptor without using DynamicMessage. In this situation we
// will generally not be able to strip custom options, but we can at least
// strip built-in options.
DynamicMessageFactory factory;
std::unique_ptr<Message> dynamic_message(
factory.GetPrototype(descriptor)->New());
std::string serialized;
if (!m.SerializePartialToString(&serialized)) {
ABSL_LOG_EVERY_N_SEC(ERROR, 1)
<< "Failed to fully strip source-retention options";
StripMessage(m, path, stripped_paths);
return;
}
if (!dynamic_message->ParsePartialFromString(serialized)) {
ABSL_LOG_EVERY_N_SEC(ERROR, 1)
<< "Failed to fully strip source-retention options";
StripMessage(m, path, stripped_paths);
return;
}
StripMessage(*dynamic_message, path, stripped_paths);
if (!dynamic_message->SerializePartialToString(&serialized)) {
ABSL_LOG_EVERY_N_SEC(ERROR, 1)
<< "Failed to fully strip source-retention options";
StripMessage(m, path, stripped_paths);
return;
}
if (!m.ParsePartialFromString(serialized)) {
ABSL_LOG_EVERY_N_SEC(ERROR, 1)
<< "Failed to fully strip source-retention options";
StripMessage(m, path, stripped_paths);
return;
}
}
}
// Returns a const reference to the descriptor pool associated with the given
// descriptor.
template <typename DescriptorType>
const google::protobuf::DescriptorPool& GetPool(const DescriptorType& descriptor) {
return *descriptor.file()->pool();
}
// Specialization for FileDescriptor.
const google::protobuf::DescriptorPool& GetPool(const FileDescriptor& descriptor) {
return *descriptor.pool();
}
// Returns the options associated with the given descriptor, with all
// source-retention options stripped out.
template <typename DescriptorType>
auto StripLocalOptions(const DescriptorType& descriptor) {
auto options = descriptor.options();
ConvertToDynamicMessageAndStripOptions(options, GetPool(descriptor));
return options;
}
// Returns true if x is a prefix of y.
bool IsPrefix(absl::Span<const int> x, absl::Span<const int> y) {
return x == y.subspan(0, x.size());
}
// Strips the paths in stripped_paths from the SourceCodeInfo.
void StripSourceCodeInfo(std::vector<std::vector<int>>& stripped_paths,
SourceCodeInfo& source_code_info) {
RepeatedPtrField<SourceCodeInfo::Location>* locations =
source_code_info.mutable_location();
// We sort the locations lexicographically by their paths and include an
// index pointing back to the original location.
std::vector<std::pair<absl::Span<const int>, int>> sorted_locations;
sorted_locations.reserve(locations->size());
for (int i = 0; i < locations->size(); ++i) {
sorted_locations.emplace_back((*locations)[i].path(), i);
}
absl::c_sort(sorted_locations);
absl::c_sort(stripped_paths);
// With both arrays sorted, we can efficiently step through them in tandem.
// If a stripped path is a prefix of any location, then that is a location
// we need to delete from the SourceCodeInfo.
absl::flat_hash_set<int> indices_to_delete;
auto i = stripped_paths.cbegin();
auto j = sorted_locations.cbegin();
while (i != stripped_paths.cend() && j != sorted_locations.cend()) {
if (IsPrefix(*i, j->first)) {
indices_to_delete.insert(j->second);
++j;
} else if (*i < j->first) {
++i;
} else {
++j;
}
}
// We delete the locations in descending order to avoid invalidating
// indices.
std::vector<SourceCodeInfo::Location*> old_locations;
old_locations.resize(locations->size());
locations->ExtractSubrange(0, locations->size(), old_locations.data());
locations->Reserve(old_locations.size() - indices_to_delete.size());
for (size_t i = 0; i < old_locations.size(); ++i) {
if (indices_to_delete.contains(i)) {
delete old_locations[i];
} else {
locations->AddAllocated(old_locations[i]);
}
}
}
} // namespace
FileDescriptorProto StripSourceRetentionOptions(const FileDescriptor& file,
bool include_source_code_info) {
FileDescriptorProto file_proto;
file.CopyTo(&file_proto);
if (include_source_code_info) {
file.CopySourceCodeInfoTo(&file_proto);
}
StripSourceRetentionOptions(*file.pool(), file_proto);
return file_proto;
}
void StripSourceRetentionOptions(const DescriptorPool& pool,
FileDescriptorProto& file_proto) {
std::vector<std::vector<int>> stripped_paths;
ConvertToDynamicMessageAndStripOptions(file_proto, pool, &stripped_paths);
if (file_proto.has_source_code_info()) {
StripSourceCodeInfo(stripped_paths, *file_proto.mutable_source_code_info());
}
}
DescriptorProto StripSourceRetentionOptions(const Descriptor& message) {
DescriptorProto message_proto;
message.CopyTo(&message_proto);
ConvertToDynamicMessageAndStripOptions(message_proto,
*message.file()->pool());
return message_proto;
}
DescriptorProto::ExtensionRange StripSourceRetentionOptions(
const Descriptor& message, const Descriptor::ExtensionRange& range) {
DescriptorProto::ExtensionRange range_proto;
range.CopyTo(&range_proto);
ConvertToDynamicMessageAndStripOptions(range_proto, *message.file()->pool());
return range_proto;
}
EnumDescriptorProto StripSourceRetentionOptions(const EnumDescriptor& enm) {
EnumDescriptorProto enm_proto;
enm.CopyTo(&enm_proto);
ConvertToDynamicMessageAndStripOptions(enm_proto, *enm.file()->pool());
return enm_proto;
}
FieldDescriptorProto StripSourceRetentionOptions(const FieldDescriptor& field) {
FieldDescriptorProto field_proto;
field.CopyTo(&field_proto);
ConvertToDynamicMessageAndStripOptions(field_proto, *field.file()->pool());
return field_proto;
}
OneofDescriptorProto StripSourceRetentionOptions(const OneofDescriptor& oneof) {
OneofDescriptorProto oneof_proto;
oneof.CopyTo(&oneof_proto);
ConvertToDynamicMessageAndStripOptions(oneof_proto, *oneof.file()->pool());
return oneof_proto;
}
EnumOptions StripLocalSourceRetentionOptions(const EnumDescriptor& descriptor) {
return StripLocalOptions(descriptor);
}
EnumValueOptions StripLocalSourceRetentionOptions(
const EnumValueDescriptor& descriptor) {
return StripLocalOptions(descriptor);
}
FieldOptions StripLocalSourceRetentionOptions(
const FieldDescriptor& descriptor) {
return StripLocalOptions(descriptor);
}
FileOptions StripLocalSourceRetentionOptions(const FileDescriptor& descriptor) {
return StripLocalOptions(descriptor);
}
MessageOptions StripLocalSourceRetentionOptions(const Descriptor& descriptor) {
return StripLocalOptions(descriptor);
}
ExtensionRangeOptions StripLocalSourceRetentionOptions(
const Descriptor& descriptor, const Descriptor::ExtensionRange& range) {
ExtensionRangeOptions options = range.options();
ConvertToDynamicMessageAndStripOptions(options, GetPool(descriptor));
return options;
}
MethodOptions StripLocalSourceRetentionOptions(
const MethodDescriptor& descriptor) {
return StripLocalOptions(descriptor);
}
OneofOptions StripLocalSourceRetentionOptions(
const OneofDescriptor& descriptor) {
return StripLocalOptions(descriptor);
}
ServiceOptions StripLocalSourceRetentionOptions(
const ServiceDescriptor& descriptor) {
return StripLocalOptions(descriptor);
}
} // namespace compiler
} // namespace protobuf
} // namespace google