Add "absl::StatusOr<PythonConstMessagePointer> GetConstMessagePointer(PyObject* msg)" in proto_api (#19398)

* Fix GetMutableMessagePointer() in python proto_api to find from generated factory before check with dynamic message factory

PiperOrigin-RevId: 693428774

* Add "absl::StatusOr<PythonConstMessagePointer> GetConstMessagePointer(PyObject* msg)" in proto_api which works with cpp extension, upb and pure python.

Cherry pick
https://github.com/protocolbuffers/protobuf/commit/b9e69e8c11ecb87249b0cbf772911e2c3a80971b
source files
PiperOrigin-RevId: 699316527
diff --git a/python/google/protobuf/proto_api.cc b/python/google/protobuf/proto_api.cc
index 50277a3..299c03a 100644
--- a/python/google/protobuf/proto_api.cc
+++ b/python/google/protobuf/proto_api.cc
@@ -1,8 +1,12 @@
 #include "google/protobuf/proto_api.h"
 
+#include <Python.h>
+
+#include <memory>
 #include <string>
 
 #include "absl/log/absl_check.h"
+#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
 #include "google/protobuf/message.h"
 namespace google {
 namespace protobuf {
@@ -52,6 +56,87 @@
   return PythonMessageMutator(owned_msg, msg, py_msg);
 }
 
+PythonConstMessagePointer::PythonConstMessagePointer(Message* owned_msg,
+                                                     const Message* message,
+                                                     PyObject* py_msg)
+    : owned_msg_(owned_msg), message_(message), py_msg_(py_msg) {
+  ABSL_DCHECK(py_msg != nullptr);
+  ABSL_DCHECK(message != nullptr);
+  Py_INCREF(py_msg_);
+}
+
+PythonConstMessagePointer::PythonConstMessagePointer(
+    PythonConstMessagePointer&& other)
+    : owned_msg_(other.owned_msg_ == nullptr ? nullptr
+                                             : other.owned_msg_.release()),
+      message_(other.message_),
+      py_msg_(other.py_msg_) {
+  other.message_ = nullptr;
+  other.py_msg_ = nullptr;
+}
+
+bool PythonConstMessagePointer::NotChanged() {
+  ABSL_DCHECK(!PyErr_Occurred());
+  if (owned_msg_ == nullptr) {
+    return false;
+  }
+
+  PyObject* py_serialized_pb(
+      PyObject_CallMethod(py_msg_, "SerializeToString", nullptr));
+  if (py_serialized_pb == nullptr) {
+    PyErr_Format(PyExc_ValueError, "Fail to serialize py_msg");
+    return false;
+  }
+  char* data;
+  Py_ssize_t len;
+  if (PyBytes_AsStringAndSize(py_serialized_pb, &data, &len) < 0) {
+    Py_DECREF(py_serialized_pb);
+    PyErr_Format(PyExc_ValueError, "Fail to get bytes from serialized data");
+    return false;
+  }
+
+  // Even if serialize python message deterministic above, the
+  // serialize result may still diff between languages. So parse to
+  // another c++ message for compare.
+  std::unique_ptr<google::protobuf::Message> parsed_msg(owned_msg_->New());
+  parsed_msg->ParseFromArray(data, static_cast<int>(len));
+  std::string wire_other;
+  google::protobuf::io::StringOutputStream stream_other(&wire_other);
+  google::protobuf::io::CodedOutputStream output_other(&stream_other);
+  output_other.SetSerializationDeterministic(true);
+  parsed_msg->SerializeToCodedStream(&output_other);
+
+  std::string wire;
+  google::protobuf::io::StringOutputStream stream(&wire);
+  google::protobuf::io::CodedOutputStream output(&stream);
+  output.SetSerializationDeterministic(true);
+  owned_msg_->SerializeToCodedStream(&output);
+
+  if (wire == wire_other) {
+    Py_DECREF(py_serialized_pb);
+    return true;
+  }
+  PyErr_Format(PyExc_ValueError, "pymessage has been changed");
+  Py_DECREF(py_serialized_pb);
+  return false;
+}
+
+PythonConstMessagePointer::~PythonConstMessagePointer() {
+  if (py_msg_ == nullptr) {
+    ABSL_DCHECK(message_ == nullptr);
+    ABSL_DCHECK(owned_msg_ == nullptr);
+    return;
+  }
+  ABSL_DCHECK(owned_msg_ != nullptr);
+  ABSL_DCHECK(NotChanged());
+  Py_DECREF(py_msg_);
+}
+
+PythonConstMessagePointer PyProto_API::CreatePythonConstMessagePointer(
+    Message* owned_msg, const Message* msg, PyObject* py_msg) const {
+  return PythonConstMessagePointer(owned_msg, msg, py_msg);
+}
+
 }  // namespace python
 }  // namespace protobuf
 }  // namespace google
diff --git a/python/google/protobuf/proto_api.h b/python/google/protobuf/proto_api.h
index 5e2957e..1a906d2 100644
--- a/python/google/protobuf/proto_api.h
+++ b/python/google/protobuf/proto_api.h
@@ -40,7 +40,7 @@
 //        PyProtoAPICapsuleName(), 0));
 //    if (!py_proto_api) { ...handle ImportError... }
 // Then use the methods of the returned class:
-//    py_proto_api->GetMessagePointer(...);
+//    py_proto_api->GetConstMessagePointer(...);
 
 #ifndef GOOGLE_PROTOBUF_PYTHON_PROTO_API_H__
 #define GOOGLE_PROTOBUF_PYTHON_PROTO_API_H__
@@ -54,11 +54,14 @@
 #include "google/protobuf/descriptor_database.h"
 #include "google/protobuf/message.h"
 
+PyObject* pymessage_mutate_const(PyObject* self, PyObject* args);
+
 namespace google {
 namespace protobuf {
 namespace python {
 
 class PythonMessageMutator;
+class PythonConstMessagePointer;
 
 // Note on the implementation:
 // This API is designed after
@@ -78,16 +81,28 @@
   // Side-effect: The message will definitely be cleared. *When* the message
   // gets cleared is undefined (C++ will clear it up-front, python/upb will
   // clear it on destruction).  Nothing should rely on the python message
-  // during the lifetime of this object
+  // during the lifetime of this object.
   // User should not hold onto the returned PythonMessageMutator while
-  // calling back into Python
+  // calling back into Python.
   // Warning: there is a risk of deadlock with Python/C++ if users use the
   // returned message->GetDescriptor()->file->pool()
   virtual absl::StatusOr<PythonMessageMutator> GetClearedMessageMutator(
       PyObject* msg) const = 0;
 
+  // Returns a PythonConstMessagePointer. For UPB and Pure Python, it points
+  // to a new c++ message copied from python message. For cpp extension, it
+  // points the internal c++ message.
+  // User should not hold onto the returned PythonConstMessagePointer
+  // while calling back into Python.
+  virtual absl::StatusOr<PythonConstMessagePointer> GetConstMessagePointer(
+      PyObject* msg) const = 0;
+
   // If the passed object is a Python Message, returns its internal pointer.
   // Otherwise, returns NULL with an exception set.
+  // TODO: Remove deprecated GetMessagePointer().
+  [[deprecated(
+      "GetMessagePointer() only work with Cpp Extension, "
+      "please migrate to GetConstMessagePointer().")]]
   virtual const Message* GetMessagePointer(PyObject* msg) const = 0;
 
   // If the passed object is a Python Message, returns a mutable pointer.
@@ -95,6 +110,7 @@
   // This function will succeed only if there are no other Python objects
   // pointing to the message, like submessages or repeated containers.
   // With the current implementation, only empty messages are in this case.
+  // TODO: Remove deprecated GetMutableMessagePointer().
   [[deprecated(
       "GetMutableMessagePointer() only work with Cpp Extension, "
       "please migrate to GetClearedMessageMutator().")]]
@@ -156,6 +172,8 @@
   PythonMessageMutator CreatePythonMessageMutator(Message* owned_msg,
                                                   Message* msg,
                                                   PyObject* py_msg) const;
+  PythonConstMessagePointer CreatePythonConstMessagePointer(
+      Message* owned_msg, const Message* msg, PyObject* py_msg) const;
 };
 
 // User should not hold onto this object while calling back into Python
@@ -184,6 +202,26 @@
   PyObject* py_msg_;
 };
 
+class PythonConstMessagePointer {
+ public:
+  PythonConstMessagePointer(PythonConstMessagePointer&& other);
+  ~PythonConstMessagePointer();
+
+  const Message& get() { return *message_; }
+
+ private:
+  friend struct google::protobuf::python::PyProto_API;
+  PythonConstMessagePointer(Message* owned_msg, const Message* message,
+                            PyObject* py_msg);
+
+  friend PyObject* ::pymessage_mutate_const(PyObject* self, PyObject* args);
+  // Check if the const message has been changed.
+  bool NotChanged();
+  std::unique_ptr<Message> owned_msg_;
+  const Message* message_;
+  PyObject* py_msg_;
+};
+
 inline const char* PyProtoAPICapsuleName() {
   static const char kCapsuleName[] =
       "google.protobuf.pyext._message.proto_API";
diff --git a/python/google/protobuf/pyext/message_module.cc b/python/google/protobuf/pyext/message_module.cc
index c242f2b..f1d40c4 100644
--- a/python/google/protobuf/pyext/message_module.cc
+++ b/python/google/protobuf/pyext/message_module.cc
@@ -181,6 +181,52 @@
   return factory;
 }
 
+absl::StatusOr<google::protobuf::Message*> CreateNewMessage(PyObject* py_msg) {
+  PyObject* pyd = PyObject_GetAttrString(py_msg, "DESCRIPTOR");
+  if (pyd == nullptr) {
+    return absl::InvalidArgumentError("py_msg has no attribute 'DESCRIPTOR'");
+  }
+
+  PyObject* fn = PyObject_GetAttrString(pyd, "full_name");
+  if (fn == nullptr) {
+    return absl::InvalidArgumentError(
+        "DESCRIPTOR has no attribute 'full_name'");
+  }
+
+  const char* descriptor_full_name = PyUnicode_AsUTF8(fn);
+  if (descriptor_full_name == nullptr) {
+    return absl::InternalError("Fail to convert descriptor full name");
+  }
+
+  PyObject* pyfile = PyObject_GetAttrString(pyd, "file");
+  Py_DECREF(pyd);
+  if (pyfile == nullptr) {
+    return absl::InvalidArgumentError("DESCRIPTOR has no attribute 'file'");
+  }
+  auto gen_d = google::protobuf::DescriptorPool::generated_pool()->FindMessageTypeByName(
+      descriptor_full_name);
+  if (gen_d) {
+    Py_DECREF(pyfile);
+    Py_DECREF(fn);
+    return google::protobuf::MessageFactory::generated_factory()
+        ->GetPrototype(gen_d)
+        ->New();
+  }
+  auto d = FindMessageDescriptor(pyfile, descriptor_full_name);
+  Py_DECREF(pyfile);
+  RETURN_IF_ERROR(d.status());
+  Py_DECREF(fn);
+  return GetFactory()->GetPrototype(*d)->New();
+}
+
+bool CopyToOwnedMsg(google::protobuf::Message** copy, const google::protobuf::Message& message) {
+  *copy = message.New();
+  std::string wire;
+  message.SerializeToString(&wire);
+  (*copy)->ParseFromArray(wire.data(), wire.size());
+  return true;
+}
+
 // C++ API.  Clients get at this via proto_api.h
 struct ApiImplementation : google::protobuf::python::PyProto_API {
   absl::StatusOr<google::protobuf::python::PythonMessageMutator> GetClearedMessageMutator(
@@ -188,36 +234,50 @@
     if (PyObject_TypeCheck(py_msg, google::protobuf::python::CMessage_Type)) {
       google::protobuf::Message* message =
           google::protobuf::python::PyMessage_GetMutableMessagePointer(py_msg);
+      if (message == nullptr) {
+        return absl::InternalError(
+            "Fail to get message pointer. The message "
+            "may already had a reference.");
+      }
       message->Clear();
       return CreatePythonMessageMutator(nullptr, message, py_msg);
     }
-    PyObject* pyd = PyObject_GetAttrString(py_msg, "DESCRIPTOR");
-    if (pyd == nullptr) {
-      return absl::InvalidArgumentError("py_msg has no attribute 'DESCRIPTOR'");
-    }
 
-    PyObject* fn = PyObject_GetAttrString(pyd, "full_name");
-    if (fn == nullptr) {
-      return absl::InvalidArgumentError(
-          "DESCRIPTOR has no attribute 'full_name'");
-    }
+    auto msg = CreateNewMessage(py_msg);
+    RETURN_IF_ERROR(msg.status());
+    return CreatePythonMessageMutator(*msg, *msg, py_msg);
+  }
 
-    const char* descriptor_full_name = PyUnicode_AsUTF8(fn);
-    if (descriptor_full_name == nullptr) {
-      return absl::InternalError("Fail to convert descriptor full name");
+  absl::StatusOr<google::protobuf::python::PythonConstMessagePointer>
+  GetConstMessagePointer(PyObject* py_msg) const override {
+    if (PyObject_TypeCheck(py_msg, google::protobuf::python::CMessage_Type)) {
+      const google::protobuf::Message* message =
+          google::protobuf::python::PyMessage_GetMessagePointer(py_msg);
+      google::protobuf::Message* owned_msg = nullptr;
+      ABSL_DCHECK(CopyToOwnedMsg(&owned_msg, *message));
+      return CreatePythonConstMessagePointer(owned_msg, message, py_msg);
     }
-
-    PyObject* pyfile = PyObject_GetAttrString(pyd, "file");
-    Py_DECREF(pyd);
-    if (pyfile == nullptr) {
-      return absl::InvalidArgumentError("DESCRIPTOR has no attribute 'file'");
+    auto msg = CreateNewMessage(py_msg);
+    RETURN_IF_ERROR(msg.status());
+    PyObject* serialized_pb(
+        PyObject_CallMethod(py_msg, "SerializeToString", nullptr));
+    if (serialized_pb == nullptr) {
+      return absl::InternalError("Fail to serialize py_msg");
     }
-    auto d = FindMessageDescriptor(pyfile, descriptor_full_name);
-    Py_DECREF(pyfile);
-    RETURN_IF_ERROR(d.status());
-    Py_DECREF(fn);
-    google::protobuf::Message* msg = GetFactory()->GetPrototype(*d)->New();
-    return CreatePythonMessageMutator(msg, msg, py_msg);
+    char* data;
+    Py_ssize_t len;
+    if (PyBytes_AsStringAndSize(serialized_pb, &data, &len) < 0) {
+      Py_DECREF(serialized_pb);
+      return absl::InternalError(
+          "Fail to get bytes from py_msg serialized data");
+    }
+    if (!(*msg)->ParseFromArray(data, len)) {
+      Py_DECREF(serialized_pb);
+      return absl::InternalError(
+          "Couldn't parse py_message to google::protobuf::Message*!");
+    }
+    Py_DECREF(serialized_pb);
+    return CreatePythonConstMessagePointer(*msg, *msg, py_msg);
   }
 
   const google::protobuf::Message* GetMessagePointer(PyObject* msg) const override {