blob: 3f76052a05cd46cc9c0d68b0e084887143f3a783 [file] [log] [blame]
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// From https://docs.python.org/3/c-api/intro.html:
// Since Python may define some pre-processor definitions which affect the
// standard headers on some systems, you must include Python.h before any
// standard headers are included.
#define PY_SSIZE_T_CLEAN
#include <Python.h>
// clang-format: do not reorder the above include.
#include "python/riegeli/bytes/python_writer.h"
// clang-format: do not reorder the above include.
#include <stddef.h>
#include <limits>
#include <memory>
#include <string>
#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "python/riegeli/base/utils.h"
#include "riegeli/base/base.h"
#include "riegeli/bytes/buffered_writer.h"
namespace riegeli {
namespace python {
PythonWriter::PythonWriter(PyObject* dest, Options options)
: BufferedWriter(options.buffer_size()), owns_dest_(options.owns_dest()) {
PythonLock::AssertHeld();
Py_INCREF(dest);
dest_.reset(dest);
if (options.assumed_pos() != absl::nullopt) {
set_start_pos(*options.assumed_pos());
} else {
static constexpr Identifier id_seekable("seekable");
const PythonPtr seekable_result(
PyObject_CallMethodObjArgs(dest_.get(), id_seekable.get(), nullptr));
if (ABSL_PREDICT_FALSE(seekable_result == nullptr)) {
FailOperation("seekable()");
return;
}
const int seekable_is_true = PyObject_IsTrue(seekable_result.get());
if (ABSL_PREDICT_FALSE(seekable_is_true < 0)) return;
if (seekable_is_true == 0) {
// Random access is not supported. Assume 0 as the initial position.
return;
}
static constexpr Identifier id_tell("tell");
const PythonPtr tell_result(
PyObject_CallMethodObjArgs(dest_.get(), id_tell.get(), nullptr));
if (ABSL_PREDICT_FALSE(tell_result == nullptr)) {
FailOperation("tell()");
return;
}
const absl::optional<Position> file_pos =
PositionFromPython(tell_result.get());
if (ABSL_PREDICT_FALSE(file_pos == absl::nullopt)) {
FailOperation("PositionFromPython() after tell()");
return;
}
set_start_pos(*file_pos);
supports_random_access_ = true;
}
}
void PythonWriter::Done() {
BufferedWriter::Done();
if (owns_dest_ && dest_ != nullptr) {
PythonLock lock;
static constexpr Identifier id_close("close");
const PythonPtr close_result(
PyObject_CallMethodObjArgs(dest_.get(), id_close.get(), nullptr));
if (ABSL_PREDICT_FALSE(close_result == nullptr)) FailOperation("close()");
}
}
bool PythonWriter::FailOperation(absl::string_view operation) {
RIEGELI_ASSERT(is_open())
<< "Failed precondition of PythonWriter::FailOperation(): "
"Object closed";
PythonLock::AssertHeld();
if (ABSL_PREDICT_FALSE(!healthy())) {
// Ignore this error because `PythonWriter` already failed.
PyErr_Clear();
return false;
}
exception_ = Exception::Fetch();
return Fail(absl::UnknownError(
absl::StrCat(operation, " failed: ", exception_.message())));
}
bool PythonWriter::WriteInternal(absl::string_view src) {
RIEGELI_ASSERT(!src.empty())
<< "Failed precondition of BufferedWriter::WriteInternal(): "
"nothing to write";
RIEGELI_ASSERT(healthy())
<< "Failed precondition of BufferedWriter::WriteInternal(): " << status();
if (ABSL_PREDICT_FALSE(src.size() >
std::numeric_limits<Position>::max() - start_pos())) {
return FailOverflow();
}
PythonLock lock;
if (ABSL_PREDICT_FALSE(write_function_ == nullptr)) {
static constexpr Identifier id_write("write");
write_function_.reset(PyObject_GetAttr(dest_.get(), id_write.get()));
if (ABSL_PREDICT_FALSE(write_function_ == nullptr)) {
return FailOperation("write()");
}
}
do {
size_t length_written;
{
const size_t length_to_write = UnsignedMin(
src.size(), size_t{std::numeric_limits<Py_ssize_t>::max()});
PythonPtr write_result;
if (!use_bytes_) {
// Prefer passing a `memoryview` to avoid copying memory.
MemoryView memory_view;
PyObject* const memory_view_object = memory_view.ToPython(
absl::string_view(src.data(), length_to_write));
if (ABSL_PREDICT_FALSE(memory_view_object == nullptr)) {
return FailOperation("MemoryView::ToPython()");
}
write_result.reset(PyObject_CallFunctionObjArgs(
write_function_.get(), memory_view_object, nullptr));
if (ABSL_PREDICT_FALSE(write_result == nullptr)) {
if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
return FailOperation("write()");
}
PyErr_Clear();
use_bytes_ = true;
}
if (ABSL_PREDICT_FALSE(!memory_view.Release())) {
return FailOperation("MemoryView::Release()");
}
}
if (use_bytes_) {
// `write()` does not support `memoryview`. Use `bytes`.
const PythonPtr bytes = BytesToPython(src.substr(0, length_to_write));
if (ABSL_PREDICT_FALSE(bytes == nullptr)) {
return FailOperation("BytesToPython()");
}
write_result.reset(PyObject_CallFunctionObjArgs(write_function_.get(),
bytes.get(), nullptr));
if (ABSL_PREDICT_FALSE(write_result == nullptr)) {
return FailOperation("write()");
}
}
if (write_result.get() == Py_None) {
// Python2 `file.write()` returns `None`, and would raise an exception
// if less than the full length had been written. Python2 is dead, but
// some classes still behave like that.
length_written = length_to_write;
} else {
// `io.IOBase.write()` returns the length written.
const absl::optional<size_t> length_written_opt =
SizeFromPython(write_result.get());
if (ABSL_PREDICT_FALSE(length_written_opt == absl::nullopt)) {
return FailOperation("SizeFromPython() after write()");
}
length_written = *length_written_opt;
}
}
if (ABSL_PREDICT_FALSE(length_written > src.size())) {
return Fail(absl::InternalError("write() wrote more than requested"));
}
move_start_pos(length_written);
src.remove_prefix(length_written);
} while (!src.empty());
return true;
}
bool PythonWriter::FlushImpl(FlushType flush_type) {
if (ABSL_PREDICT_FALSE(!BufferedWriter::FlushImpl(flush_type))) return false;
switch (flush_type) {
case FlushType::kFromObject:
if (!owns_dest_) return true;
ABSL_FALLTHROUGH_INTENDED;
case FlushType::kFromProcess:
case FlushType::kFromMachine:
PythonLock lock;
static constexpr Identifier id_flush("flush");
const PythonPtr flush_result(
PyObject_CallMethodObjArgs(dest_.get(), id_flush.get(), nullptr));
if (ABSL_PREDICT_FALSE(flush_result == nullptr)) {
return FailOperation("flush()");
}
return true;
}
RIEGELI_ASSERT_UNREACHABLE()
<< "Unknown flush type: " << static_cast<int>(flush_type);
}
bool PythonWriter::SeekBehindBuffer(Position new_pos) {
RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
<< "Failed precondition of BufferedWriter::SeekBehindBuffer(): "
"buffer not empty";
if (ABSL_PREDICT_FALSE(!supports_random_access_)) {
return Fail(absl::UnimplementedError("PythonWriter::Seek() not supported"));
}
PythonLock lock;
if (new_pos >= start_pos()) {
// Seeking forwards.
const absl::optional<Position> size = SizeInternal();
if (ABSL_PREDICT_FALSE(size == absl::nullopt)) return false;
if (ABSL_PREDICT_FALSE(new_pos > *size)) {
// File ends.
set_start_pos(*size);
return false;
}
}
set_start_pos(new_pos);
const PythonPtr file_pos = PositionToPython(start_pos());
if (ABSL_PREDICT_FALSE(file_pos == nullptr)) {
return FailOperation("PositionToPython()");
}
static constexpr Identifier id_seek("seek");
const PythonPtr seek_result(PyObject_CallMethodObjArgs(
dest_.get(), id_seek.get(), file_pos.get(), nullptr));
if (ABSL_PREDICT_FALSE(seek_result == nullptr)) {
return FailOperation("seek()");
}
return true;
}
inline absl::optional<Position> PythonWriter::SizeInternal() {
RIEGELI_ASSERT(healthy())
<< "Failed precondition of PythonWriter::SizeInternal(): " << status();
RIEGELI_ASSERT(supports_random_access_)
<< "Failed precondition of PythonWriter::SizeInternal(): "
"random access not supported";
RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
<< "Failed precondition of PythonWriter::SizeInternal(): "
"buffer not empty";
PythonLock::AssertHeld();
absl::string_view operation;
const PythonPtr file_pos = PositionToPython(0);
if (ABSL_PREDICT_FALSE(file_pos == nullptr)) {
FailOperation("PositionToPython()");
return absl::nullopt;
}
const PythonPtr whence = IntToPython(2); // `io.SEEK_END`
if (ABSL_PREDICT_FALSE(whence == nullptr)) {
FailOperation("IntToPython()");
return absl::nullopt;
}
static constexpr Identifier id_seek("seek");
PythonPtr result(PyObject_CallMethodObjArgs(
dest_.get(), id_seek.get(), file_pos.get(), whence.get(), nullptr));
if (result.get() == Py_None) {
// Python2 `file.seek()` returns `None`. Python2 is dead, but some classes
// still behave like that.
static constexpr Identifier id_tell("tell");
result.reset(
PyObject_CallMethodObjArgs(dest_.get(), id_tell.get(), nullptr));
operation = "tell()";
} else {
// `io.IOBase.seek()` returns the new position.
operation = "seek()";
}
if (ABSL_PREDICT_FALSE(result == nullptr)) {
FailOperation(operation);
return absl::nullopt;
}
const absl::optional<Position> size = PositionFromPython(result.get());
if (ABSL_PREDICT_FALSE(size == absl::nullopt)) {
FailOperation(absl::StrCat("PositionFromPython() after ", operation));
return absl::nullopt;
}
return *size;
}
absl::optional<Position> PythonWriter::SizeBehindBuffer() {
RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
<< "Failed precondition of BufferedWriter::SizeBehindBuffer(): "
"buffer not empty";
if (ABSL_PREDICT_FALSE(!healthy())) return absl::nullopt;
if (ABSL_PREDICT_FALSE(!supports_random_access_)) {
Fail(absl::UnimplementedError("PythonWriter::Size() not supported"));
return absl::nullopt;
}
PythonLock lock;
const absl::optional<Position> size = SizeInternal();
if (ABSL_PREDICT_FALSE(size == absl::nullopt)) return absl::nullopt;
const PythonPtr file_pos = PositionToPython(start_pos());
if (ABSL_PREDICT_FALSE(file_pos == nullptr)) {
FailOperation("PositionToPython()");
return absl::nullopt;
}
static constexpr Identifier id_seek("seek");
const PythonPtr seek_result(PyObject_CallMethodObjArgs(
dest_.get(), id_seek.get(), file_pos.get(), nullptr));
if (ABSL_PREDICT_FALSE(seek_result == nullptr)) {
FailOperation("seek()");
return absl::nullopt;
}
return *size;
}
bool PythonWriter::TruncateBehindBuffer(Position new_size) {
RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
<< "Failed precondition of BufferedWriter::TruncateBehindBuffer(): "
"buffer not empty";
if (ABSL_PREDICT_FALSE(!healthy())) return false;
if (ABSL_PREDICT_FALSE(!supports_random_access_)) {
return Fail(
absl::UnimplementedError("PythonWriter::Truncate() not supported"));
}
PythonLock lock;
const absl::optional<Position> size = SizeInternal();
if (ABSL_PREDICT_FALSE(size == absl::nullopt)) return false;
if (ABSL_PREDICT_FALSE(new_size > *size)) {
// File ends.
set_start_pos(*size);
return false;
}
{
const PythonPtr file_pos = PositionToPython(new_size);
if (ABSL_PREDICT_FALSE(file_pos == nullptr)) {
return FailOperation("PositionToPython()");
}
static constexpr Identifier id_seek("seek");
const PythonPtr seek_result(PyObject_CallMethodObjArgs(
dest_.get(), id_seek.get(), file_pos.get(), nullptr));
if (ABSL_PREDICT_FALSE(seek_result == nullptr)) {
return FailOperation("seek()");
}
}
set_start_pos(new_size);
static constexpr Identifier id_truncate("truncate");
const PythonPtr truncate_result(
PyObject_CallMethodObjArgs(dest_.get(), id_truncate.get(), nullptr));
if (ABSL_PREDICT_FALSE(truncate_result == nullptr)) {
return FailOperation("truncate()");
}
return true;
}
} // namespace python
} // namespace riegeli