kotlin-native/runtime/src/main/cpp/KString.cpp - third_party/github/JetBrains/kotlin - Git at Google

 /*
  * Copyright 2010-2017 JetBrains s.r.o.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <cstdio>
 #include <cstdlib>
 #include <limits>
 #include <string.h>
 #include <string>
 #include <optional>

 #include "KAssert.h"
 #include "Exceptions.h"
 #include "Memory.h"
 #include "Natives.h"
 #include "KString.h"
 #include "Porting.h"
 #include "Types.h"

 #include "utf8.h"

 #include "polyhash/PolyHash.h"
 #include "polyhash/naive.h"

 using namespace kotlin;

 namespace {

 static constexpr const uint32_t MAX_STRING_SIZE =
     static_cast<uint32_t>(std::numeric_limits<int32_t>::max());

 template <StringEncoding encoding>
 static constexpr const bool isFixedLengthEncoding =
     std::is_convertible_v<StringData<encoding>&, FixedLengthUnitStringData<encoding, typename StringData<encoding>::unit>&>;

 size_t encodingUnitSize(StringEncoding encoding) {
     switch (encoding) {
     case StringEncoding::kUTF16: return sizeof(typename StringData<StringEncoding::kUTF16>::unit);
     case StringEncoding::kLatin1: return sizeof(typename StringData<StringEncoding::kLatin1>::unit);
     default: return 0;
     }
 }

 template <typename F /* = R(StringData<*>) */>
 auto encodingAware(KConstRef string, F&& impl) {
     auto header = StringHeader::of(string);
     switch (header->encoding()) {
     case StringEncoding::kUTF16: return impl(StringData<StringEncoding::kUTF16>(header));
     case StringEncoding::kLatin1: return impl(StringData<StringEncoding::kLatin1>(header));
     default: ThrowIllegalArgumentException();
     }
 }

 template <typename F /* = R(StringData<*>, StringData<*>) */>
 auto encodingAware(KConstRef string1, KConstRef string2, F&& impl) {
     return encodingAware(string1, [&](auto string1) {
         return encodingAware(string2, [&](auto string2) { return impl(string1, string2); });
     });
 }

 template <uint64_t maskT, uint64_t mask64, typename T>
 bool allZeroWhenMasked(const T* data, size_t size) {
     auto vectorSize = sizeof(uint64_t) / sizeof(T);
     if (size >= vectorSize * 2) {
         auto misalignment = (vectorSize - reinterpret_cast<uintptr_t>(data) / sizeof(T)) % vectorSize;
         for (; misalignment--; size--) if (*data++ & maskT) return false;
         // now (uintptr_t)data % sizeof(uint64_t) == 0, so it's safe to cast
         for (; size >= vectorSize; data += vectorSize, size -= vectorSize) {
             if (*reinterpret_cast<const uint64_t*>(data) & mask64) return false;
         }
     }
     while (size--) if (*data++ & maskT) return false;
     return true;
 }

 bool utf8StringIsASCII(const char* utf8, size_t lengthBytes) {
     return allZeroWhenMasked<0x80, 0x8080808080808080ull>(utf8, lengthBytes);
 }

 bool utf16StringIsLatin1(const uint16_t* utf16, size_t lengthChars) {
     return allZeroWhenMasked<0xFF00, 0xFF00FF00FF00FF00ull>(utf16, lengthChars);
 }

 template <typename String, typename It>
 bool isInSurrogatePair(String&& string, It&& it) {
     return string.at(it.ptr()) != it;
 }

 template <typename Allocator /*= KRef(size_t sizeInChars) */>
 KRef allocateString(StringEncoding encoding, uint32_t sizeInUnits, Allocator&& allocate) {
     auto sizeInBytes = sizeInUnits * encodingUnitSize(encoding);
     auto flags = (static_cast<uint32_t>(encoding) << StringHeader::ENCODING_OFFSET) |
         (StringHeader::IGNORE_LAST_BYTE * (sizeInBytes % 2));
     // All strings are stored as KChar arrays regardless of the actual byte encoding
     auto result = allocate((sizeInBytes + StringHeader::extraLength(flags)) / sizeof(KChar));
     StringHeader::of(result)->flags_ = flags;
     return result;
 }

 KRef allocatePermanentString(StringEncoding encoding, size_t sizeInUnits) {
     return allocateString(encoding, sizeInUnits, [](size_t sizeInChars) {
         auto result = reinterpret_cast<ObjHeader*>(std::calloc(sizeof(ArrayHeader) + sizeInChars * sizeof(KChar), 1));
         result->typeInfoOrMeta_ = setPointerBits((TypeInfo *)theStringTypeInfo, OBJECT_TAG_PERMANENT_CONTAINER);
         result->array()->count_ = sizeInChars;
         return result;
     });
 }

 template <StringEncoding encoding, typename F /*= void(UnitType*) */>
 OBJ_GETTER(createString, uint32_t lengthUnits, F&& initializer) {
     if (lengthUnits == 0) RETURN_RESULT_OF0(TheEmptyString);
     auto result = CreateUninitializedString(encoding, lengthUnits, OBJ_RESULT);
     initializer(reinterpret_cast<typename StringData<encoding>::unit*>(StringHeader::of(result)->data()));
     return result;
 }

 OBJ_GETTER(createStringFromUTF8, const char* utf8, uint32_t lengthBytes, bool ensureValid) {
     if (utf8 == nullptr) RETURN_OBJ(nullptr);
     if (lengthBytes == 0) RETURN_RESULT_OF0(TheEmptyString);
     if (utf8StringIsASCII(utf8, lengthBytes)) {
         RETURN_RESULT_OF(createString<StringEncoding::kLatin1>, lengthBytes,
             [=](uint8_t* out) { std::copy_n(utf8, lengthBytes, out); })
     }
     size_t lengthChars;
     try {
         lengthChars = ensureValid
             ? utf8::utf16_length(utf8, utf8 + lengthBytes)
             : utf8::with_replacement::utf16_length(utf8, utf8 + lengthBytes);
     } catch (...) {
         ThrowCharacterCodingException();
     }
     RETURN_RESULT_OF(createString<StringEncoding::kUTF16>, lengthChars, [=](KChar* out) {
         return ensureValid
             ? utf8::unchecked::utf8to16(utf8, utf8 + lengthBytes, out) // already known to be valid
             : utf8::with_replacement::utf8to16(utf8, utf8 + lengthBytes, out);
     });
 }

 template <KStringConversionMode mode>
 OBJ_GETTER(unsafeConvertToUTF8, KConstRef thiz, KInt start, KInt size) {
     std::string utf8 = kotlin::to_string<mode>(thiz, static_cast<size_t>(start), static_cast<size_t>(size));
     auto result = AllocArrayInstance(theByteArrayTypeInfo, utf8.size(), OBJ_RESULT);
     std::copy(utf8.begin(), utf8.end(), ByteArrayAddressOfElementAt(result->array(), 0));
     return result;
 }

 const char* unsafeGetByteArrayData(KConstRef thiz, KInt start) {
     RuntimeAssert(thiz->type_info() == theByteArrayTypeInfo, "Must use a byte array");
     return reinterpret_cast<const char*>(ByteArrayAddressOfElementAt(thiz->array(), start));
 }

 template <typename T>
 PERFORMANCE_INLINE inline auto boundsCheckedIteratorAt(T string, KInt index) {
     // We couldn't have created a string bigger than max KInt value.
     // So if index is < 0, conversion to an unsigned value would make it bigger
     // than the array size.
     if (static_cast<uint32_t>(index) >= string.sizeInChars()) {
         ThrowArrayIndexOutOfBoundsException();
     }
     return string.begin() + index;
 }

 } // namespace

 extern "C" OBJ_GETTER(CreateStringFromCString, const char* cstring) {
     RETURN_RESULT_OF(CreateStringFromUtf8, cstring, cstring ? strlen(cstring) : 0);
 }

 extern "C" OBJ_GETTER(CreateStringFromUtf8, const char* utf8, uint32_t length) {
     RETURN_RESULT_OF(createStringFromUTF8, utf8, length, false);
 }

 extern "C" OBJ_GETTER(CreateStringFromUtf8OrThrow, const char* utf8, uint32_t length) {
     RETURN_RESULT_OF(createStringFromUTF8, utf8, length, true);
 }

 extern "C" OBJ_GETTER(CreateStringFromUtf16, const KChar* utf16, uint32_t length) {
     if (utf16 == nullptr) RETURN_OBJ(nullptr);
     RETURN_RESULT_OF(createString<StringEncoding::kUTF16>, length, [=](KChar* out) { std::copy_n(utf16, length, out); });
 }

 extern "C" OBJ_GETTER(CreateUninitializedString, StringEncoding encoding, uint32_t length) {
     if (length == 0) RETURN_RESULT_OF0(TheEmptyString);
     return allocateString(encoding, length, [=](size_t sizeInChars) {
         RETURN_RESULT_OF(AllocArrayInstance, theStringTypeInfo, sizeInChars);
     });
 }

 extern "C" char* CreateCStringFromString(KConstRef kref) {
     if (kref == nullptr) return nullptr;
     std::string utf8 = kotlin::to_string<KStringConversionMode::UNCHECKED>(kref);
     char* result = reinterpret_cast<char*>(std::calloc(1, utf8.size() + 1));
     std::copy(utf8.begin(), utf8.end(), result);
     return result;
 }

 extern "C" void DisposeCString(char* cstring) {
     if (cstring) std::free(cstring);
 }

 extern "C" KRef CreatePermanentStringFromCString(const char* nullTerminatedUTF8) {
     // Note: this function can be called in "Native" thread state. But this is fine:
     //   while it indeed manipulates Kotlin objects, it doesn't in fact access _Kotlin heap_,
     //   because the accessed object is off-heap, imitating permanent static objects.
     auto sizeInBytes = strlen(nullTerminatedUTF8);
     if (utf8StringIsASCII(nullTerminatedUTF8, sizeInBytes)) {
         auto result = allocatePermanentString(StringEncoding::kLatin1, sizeInBytes);
         std::copy_n(nullTerminatedUTF8, sizeInBytes, StringHeader::of(result)->data());
         return result;
     } else {
         auto end = nullTerminatedUTF8 + sizeInBytes;
         auto sizeInChars = utf8::with_replacement::utf16_length(nullTerminatedUTF8, end);
         auto result = allocatePermanentString(StringEncoding::kUTF16, sizeInChars);
         utf8::with_replacement::utf8to16(nullTerminatedUTF8, end, reinterpret_cast<KChar*>(StringHeader::of(result)->data()));
         return result;
     }
 }

 extern "C" void FreePermanentStringForTests(KConstRef header) {
     std::free(const_cast<KRef>(header));
 }

 // String.kt
 extern "C" KInt Kotlin_String_getStringLength(KConstRef thiz) {
     return encodingAware(thiz, [](auto thiz) { return thiz.sizeInChars(); });
 }

 extern "C" OBJ_GETTER(Kotlin_String_replace, KConstRef thizPtr, KChar oldChar, KChar newChar) {
     return encodingAware(thizPtr, [=](auto thiz) {
         if (!thiz.canEncode(oldChar)) RETURN_OBJ(const_cast<KRef>(thizPtr));
         if constexpr (thiz.encoding != StringEncoding::kUTF16 && isFixedLengthEncoding<thiz.encoding>) {
             if (thiz.canEncode(newChar)) {
                 RETURN_RESULT_OF(createString<thiz.encoding>, thiz.sizeInUnits(),
                     [=](auto* out) { std::replace_copy(thiz.begin().ptr(), thiz.end().ptr(), out, oldChar, newChar); })
             }
         }
         RETURN_RESULT_OF(createString<StringEncoding::kUTF16>, thiz.sizeInChars(),
             [=](KChar* out) { std::replace_copy(thiz.begin(), thiz.end(), out, oldChar, newChar); });
     });
 }

 extern "C" OBJ_GETTER(Kotlin_String_plusImpl, KConstRef thiz, KConstRef other) {
     if (StringHeader::of(thiz)->size() == 0) RETURN_OBJ(const_cast<KRef>(other));
     if (StringHeader::of(other)->size() == 0) RETURN_OBJ(const_cast<KRef>(thiz));
     return encodingAware(thiz, other, [=](auto thiz, auto other) {
         RuntimeAssert(thiz.sizeInChars() <= MAX_STRING_SIZE, "this cannot be this large");
         RuntimeAssert(other.sizeInChars() <= MAX_STRING_SIZE, "other cannot be this large");
         auto resultLength = thiz.sizeInChars() + other.sizeInChars(); // can't overflow since MAX_STRING_SIZE is (max value)/2
         if (resultLength > MAX_STRING_SIZE) {
             ThrowOutOfMemoryError();
         }

         if (thiz.encoding == other.encoding &&
             // In non-UTF-16 encodings, the total size in units could still overflow, e.g.
             // UTF-8 has characters that encode to 3 bytes while only needing 2 in UTF-16.
             (thiz.encoding == StringEncoding::kUTF16 || thiz.sizeInUnits() < std::numeric_limits<size_t>::max() - other.sizeInUnits())
         ) {
             RETURN_RESULT_OF(createString<thiz.encoding>, thiz.sizeInUnits() + other.sizeInUnits(), [=](auto* out) {
                 auto halfway = std::copy(thiz.begin().ptr(), thiz.end().ptr(), out);
                 std::copy(other.begin().ptr(), other.end().ptr(), halfway);
             });
         } else {
             RETURN_RESULT_OF(createString<StringEncoding::kUTF16>, thiz.sizeInChars() + other.sizeInChars(), [=](KChar* out) {
                 auto halfway = std::copy(thiz.begin(), thiz.end(), out);
                 std::copy(other.begin(), other.end(), halfway);
             });
         }
     });
 }

 extern "C" OBJ_GETTER(Kotlin_String_unsafeStringFromCharArray, KConstRef thiz, KInt start, KInt size) {
     RuntimeAssert(thiz->type_info() == theCharArrayTypeInfo, "Must use a char array");
     if (utf16StringIsLatin1(CharArrayAddressOfElementAt(thiz->array(), start), size)) {
         RETURN_RESULT_OF(createString<StringEncoding::kLatin1>, size,
             [=](uint8_t* out) { std::copy_n(CharArrayAddressOfElementAt(thiz->array(), start), size, out); });
     }
     RETURN_RESULT_OF(createString<StringEncoding::kUTF16>, size,
         [=](KChar* out) { std::copy_n(CharArrayAddressOfElementAt(thiz->array(), start), size, out); });
 }

 extern "C" OBJ_GETTER(Kotlin_String_toCharArray, KConstRef string, KRef destination, KInt destinationOffset, KInt start, KInt size) {
     encodingAware(string, [=](auto string) {
         auto it = string.begin() + start;
         auto out = CharArrayAddressOfElementAt(destination->array(), destinationOffset);
         if constexpr (string.encoding == StringEncoding::kUTF16) {
             std::copy_n(it.ptr(), size, out);
         } else {
             std::copy_n(it, size, out);
         }
     });
     RETURN_OBJ(destination);
 }

 extern "C" OBJ_GETTER(Kotlin_String_subSequence, KConstRef thiz, KInt startIndex, KInt endIndex) {
     return encodingAware(thiz, [=](auto thiz) {
         if (startIndex < 0 || static_cast<uint32_t>(endIndex) > thiz.sizeInChars() || startIndex > endIndex) {
             // Kotlin/JVM uses StringIndexOutOfBounds, but Native doesn't have it and this is close enough.
             ThrowArrayIndexOutOfBoundsException();
         }

         if (startIndex == endIndex) {
             RETURN_RESULT_OF0(TheEmptyString);
         }

         auto start = thiz.begin() + startIndex;
         auto end = start + (endIndex - startIndex);
         if (isInSurrogatePair(thiz, start) || isInSurrogatePair(thiz, end)) {
             RETURN_RESULT_OF(createString<StringEncoding::kUTF16>, endIndex - startIndex,
                 [=](KChar* out) { std::copy(start, end, out); });
         }
         RETURN_RESULT_OF(createString<thiz.encoding>, end.ptr() - start.ptr(),
             [=](auto* out) { std::copy(start.ptr(), end.ptr(), out); });
     });
 }

 template <typename It1, typename It2>
 static KInt Kotlin_String_compareAt(It1 it1, It1 end1, It2 it2, It2 end2) {
     if (it1 == end1 && it2 == end2) return 0;
     if (it1 == end1) return -1;
     if (it2 == end2) return 1;
     KChar c1 = *it1, c2 = *it2;
     if (c1 == c2) {
         // Assuming the iterators were produced by std::mismatch, this is only possible
         // when searching in raw memory then rolling back to the previous unit in non-UTF-16
         // encodings. In this case this must be a surrogate pair where the first element is
         // equal, but the second element is not.
         c1 = *++it1;
         c2 = *++it2;
     }
     return c1 < c2 ? -1 : 1;
 }

 extern "C" KInt Kotlin_String_compareTo(KConstRef thiz, KConstRef other) {
     return encodingAware(thiz, other, [=](auto thiz, auto other) {
         auto begin1 = thiz.begin(), end1 = thiz.end();
         auto begin2 = other.begin(), end2 = other.end();
         if constexpr (thiz.encoding == other.encoding) {
             auto [ptr1, ptr2] = std::mismatch(begin1.ptr(), end1.ptr(), begin2.ptr(), end2.ptr());
             return Kotlin_String_compareAt(thiz.at(ptr1), end1, other.at(ptr2), end2);
         } else {
             auto [it1, it2] = std::mismatch(begin1, end1, begin2, end2);
             return Kotlin_String_compareAt(it1, end1, it2, end2);
         }
     });
 }

 extern "C" KChar Kotlin_String_get(KConstRef thiz, KInt index) {
     return encodingAware(thiz, [=](auto thiz) { return *boundsCheckedIteratorAt(thiz, index); });
 }

 extern "C" OBJ_GETTER(Kotlin_ByteArray_unsafeStringFromUtf8OrThrow, KConstRef thiz, KInt start, KInt size) {
     RETURN_RESULT_OF(CreateStringFromUtf8OrThrow, unsafeGetByteArrayData(thiz, start), size);
 }

 extern "C" OBJ_GETTER(Kotlin_ByteArray_unsafeStringFromUtf8, KConstRef thiz, KInt start, KInt size) {
     RETURN_RESULT_OF(CreateStringFromUtf8, unsafeGetByteArrayData(thiz, start), size);
 }

 extern "C" OBJ_GETTER(Kotlin_String_unsafeStringToUtf8, KConstRef thiz, KInt start, KInt size) {
     RETURN_RESULT_OF(unsafeConvertToUTF8<KStringConversionMode::REPLACE_INVALID>, thiz, start, size);
 }

 extern "C" OBJ_GETTER(Kotlin_String_unsafeStringToUtf8OrThrow, KConstRef thiz, KInt start, KInt size) {
     RETURN_RESULT_OF(unsafeConvertToUTF8<KStringConversionMode::CHECKED>, thiz, start, size);
 }

 static std::optional<KInt> Kotlin_String_cachedHashCode(KConstRef thiz) {
     auto header = StringHeader::of(thiz);
     if (header->size() == 0) return 0;
     auto hash = kotlin::std_support::atomic_ref{header->hashCode_}.load(std::memory_order_relaxed);
     if (hash || kotlin::std_support::atomic_ref{header->flags_}.load(std::memory_order_relaxed) & StringHeader::HASHCODE_IS_ZERO) {
         return hash;
     }
     return {};
 }

 extern "C" KBoolean Kotlin_String_equals(KConstRef thiz, KConstRef other) {
     if (thiz == other) return true;
     if (other == nullptr || other->type_info() != theStringTypeInfo) return false;

     if (auto thizHash = Kotlin_String_cachedHashCode(thiz)) {
         if (auto otherHash = Kotlin_String_cachedHashCode(other)) {
             if (*thizHash != *otherHash) return false;
         }
     }

     return encodingAware(thiz, other, [=](auto thiz, auto other) {
         if constexpr (thiz.encoding == other.encoding) {
             return std::equal(thiz.begin().ptr(), thiz.end().ptr(), other.begin().ptr(), other.end().ptr());
         } else {
             return std::equal(thiz.begin(), thiz.end(), other.begin(), other.end());
         }
     });
 }

 // Bounds checks are performed on Kotlin side
 extern "C" KBoolean Kotlin_String_unsafeRangeEquals(KConstRef thiz, KInt thizOffset, KConstRef other, KInt otherOffset, KInt length) {
     return length == 0 || encodingAware(thiz, other, [=](auto thiz, auto other) {
         auto begin1 = thiz.begin() + thizOffset;
         auto begin2 = other.begin() + otherOffset;
         // Questionable moment: in variable-length encodings, is it more efficient to advance the iterator first
         // and then compare the known fixed range, or to decode characters one by one and count while comparing?
         auto end1 = begin1 + length;
         auto end2 = begin2 + length;
         if constexpr (thiz.encoding == other.encoding) {
             // Assuming only one "canonical" encoding, can byte-compare encoded values.
             // Since ptr() is only well-defined at unit boundaries, surrogates at ends should be checked separately.
             bool startsWithUnequalLowSurrogate = isInSurrogatePair(thiz, begin1)
                 ? !isInSurrogatePair(other, begin2) || *begin1++ != *begin2++ // safe because length != 0
                 : isInSurrogatePair(other, begin2);
             if (startsWithUnequalLowSurrogate) return false;
             bool endsWithUnequalHighSurrogate = isInSurrogatePair(thiz, end1)
                 ? !isInSurrogatePair(other, end2) || *--end1 != *--end2 // safe because begin1 and begin2 are not in a surrogate pair
                 : isInSurrogatePair(other, end2);
             if (endsWithUnequalHighSurrogate) return false;
             return std::equal(begin1.ptr(), end1.ptr(), begin2.ptr(), end2.ptr());
         } else {
             return std::equal(begin1, end1, begin2, end2);
         }
     });
 }

 extern "C" KBoolean Kotlin_Char_isISOControl(KChar ch) {
     return (ch <= 0x1F) || (ch >= 0x7F && ch <= 0x9F);
 }

 extern "C" KBoolean Kotlin_Char_isHighSurrogate(KChar ch) {
     return ((ch & 0xfc00) == 0xd800);
 }

 extern "C" KBoolean Kotlin_Char_isLowSurrogate(KChar ch) {
     return ((ch & 0xfc00) == 0xdc00);
 }

 extern "C" KInt Kotlin_String_indexOfChar(KConstRef thiz, KChar ch, KInt fromIndex) {
     auto unsignedIndex = fromIndex < 0 ? 0 : static_cast<size_t>(fromIndex);
     return encodingAware(thiz, [=](auto thiz) {
         auto i = std::min(unsignedIndex, thiz.sizeInChars());
         for (auto it = thiz.begin() + i; i < thiz.sizeInChars(); ++i) {
             if (*it++ == ch) return static_cast<KInt>(i);
         }
         return -1;
     });
 }

 extern "C" KInt Kotlin_String_lastIndexOfChar(KConstRef thiz, KChar ch, KInt fromIndex) {
     if (fromIndex < 0) return -1;
     auto unsignedIndex = static_cast<size_t>(fromIndex) + 1; // convert to exclusive bound
     return encodingAware(thiz, [=](auto thiz) {
         auto i = std::min(unsignedIndex, thiz.sizeInChars());
         for (auto it = thiz.begin() + i; i-- > 0; ) {
             if (*--it == ch) return static_cast<KInt>(i);
         }
         return -1;
     });
 }

 // TODO: or code up Knuth-Moris-Pratt, or use std::boyer_moore_searcher (might need backporting)
 extern "C" KInt Kotlin_String_indexOfString(KConstRef thiz, KConstRef other, KInt fromIndex) {
     auto unsignedIndex = fromIndex < 0 ? 0 : static_cast<size_t>(fromIndex);
     return encodingAware(thiz, other, [=](auto thiz, auto other) {
         auto thizLength = thiz.sizeInChars();
         auto otherLength = other.sizeInChars();
         if (unsignedIndex >= thizLength) {
             return otherLength == 0 ? static_cast<KInt>(thizLength) : -1;
         } else if (otherLength > thizLength) {
             return -1;
         } else if (otherLength == 0) {
             return static_cast<KInt>(unsignedIndex);
         }

         auto start = thiz.begin() + unsignedIndex, end = thiz.end();
         auto patternStart = other.begin(), patternEnd = other.end();
         if constexpr (thiz.encoding == other.encoding) {
             auto shift = unsignedIndex;
             while (start != end) {
                 if (isInSurrogatePair(thiz, start)) {
                     // `start` points into a surrogate pair, skip its second half since presumably
                     // this encoding doesn't allow `other` to start with it anyway.
                     ++start;
                     ++shift;
                 }
                 auto ptr = std::search(start.ptr(), end.ptr(), patternStart.ptr(), patternEnd.ptr());
                 if (ptr == end.ptr()) break;
                 auto it = thiz.at(ptr);
                 if (ptr == it.ptr()) return static_cast<KInt>(it - start + shift);
                 // Found a bytewise match, but it starts in the middle of a unit, so it's not a character-wise match.
                 shift += it - start + 1;
                 start = ++it;
             }
             return -1;
         } else {
             auto it = std::search(start, end, patternStart, patternEnd);
             return it == end ? -1 : static_cast<KInt>(it - start + unsignedIndex);
         }
     });
 }

 extern "C" KInt Kotlin_String_hashCode(KRef thiz) {
     if (auto cached = Kotlin_String_cachedHashCode(thiz)) {
         return *cached;
     }
     KInt result = encodingAware(thiz, [](auto thiz) {
         if constexpr (isFixedLengthEncoding<thiz.encoding>) {
             return polyHash(thiz.sizeInUnits(), thiz.begin().ptr());
         } else {
             return polyHash_naive(thiz.begin(), thiz.end());
         }
     });
     auto header = StringHeader::of(thiz);
     // Having exactly one write per computation allows them to be relaxed, since there's no need to order them with any other write.
     // Since most relevant platforms have atomic word-sized writes by default, this is theoretically much faster.
     if (result != 0) {
         kotlin::std_support::atomic_ref{header->hashCode_}.store(result, std::memory_order_relaxed);
     } else {
         kotlin::std_support::atomic_ref{header->flags_}.fetch_or(StringHeader::HASHCODE_IS_ZERO, std::memory_order_relaxed);
     }
     return result;
 }

 extern "C" const KChar* Kotlin_String_utf16pointer(KConstRef message) {
     auto header = StringHeader::of(message);
     if (header->encoding() != StringEncoding::kUTF16) ThrowIllegalArgumentException();
     return reinterpret_cast<const KChar*>(header->data());
 }

 extern "C" KInt Kotlin_String_utf16length(KConstRef message) {
     auto header = StringHeader::of(message);
     if (header->encoding() != StringEncoding::kUTF16) ThrowIllegalArgumentException();
     return header->size();
 }

 extern "C" KConstNativePtr Kotlin_Arrays_getStringAddressOfElement(KConstRef thiz, KInt index) {
     return encodingAware(thiz, [=](auto thiz) { return reinterpret_cast<KConstNativePtr>(boundsCheckedIteratorAt(thiz, index).ptr()); });
 }

 template <KStringConversionMode mode>
 static std::string to_string_impl(const KChar* it, const KChar* end) noexcept(mode != KStringConversionMode::CHECKED) {
     std::string utf8;
     utf8.reserve(end - it);
     switch (mode) {
     case KStringConversionMode::UNCHECKED:
         utf8::unchecked::utf16to8(it, end, back_inserter(utf8));
         break;
     case KStringConversionMode::CHECKED:
         try {
             utf8::utf16to8(it, end, back_inserter(utf8));
         } catch (...) {
             ThrowCharacterCodingException();
         }
         break;
     case KStringConversionMode::REPLACE_INVALID:
         utf8::with_replacement::utf16to8(it, end, back_inserter(utf8));
         break;
     }
     return utf8;
 }

 template <KStringConversionMode>
 static std::string to_string_impl(const uint8_t* it, const uint8_t* end) noexcept {
     std::string result;
     result.resize((end - it) + std::count_if(it, end, [](uint8_t c) { return c & 0x80; }));
     auto out = result.begin();
     while (it != end) {
         auto latin1 = *it++;
         if (latin1 & 0x80) {
             *out++ = 0xC0 | (latin1 >> 6);
             *out++ = latin1 & 0xBF;
         } else {
             *out++ = latin1;
         }
     }
     return result;
 }

 template <KStringConversionMode mode>
 std::string kotlin::to_string(KConstRef kstring, size_t start, size_t size) noexcept(mode != KStringConversionMode::CHECKED) {
     return encodingAware(kstring, [=](auto kstring) {
         auto length = kstring.sizeInChars();
         RuntimeAssert(start <= length, "start index out of bounds");
         auto it = kstring.begin() + start;
         auto end = kstring.end();
         if (size != std::string::npos) {
             RuntimeAssert(size <= length - start, "size out of bounds");
             end = it + size;
         }
         return to_string_impl<mode>(it.ptr(), end.ptr());
     });
 }

 template std::string kotlin::to_string<KStringConversionMode::CHECKED>(KConstRef, size_t, size_t);
 template std::string kotlin::to_string<KStringConversionMode::UNCHECKED>(KConstRef, size_t, size_t) noexcept;
 template std::string kotlin::to_string<KStringConversionMode::REPLACE_INVALID>(KConstRef, size_t, size_t) noexcept;

 // StringBuilder.kt

 extern "C" OBJ_GETTER(Kotlin_StringBuilder_unsafeStringCopy, KConstRef thiz, KInt length) {
     return encodingAware(thiz, [=](auto thiz) {
         if constexpr (isFixedLengthEncoding<thiz.encoding>) {
             RETURN_RESULT_OF(createString<thiz.encoding>, length,
                 [=](auto* out) { std::copy(thiz.begin().ptr(), thiz.end().ptr(), out); });
         } else {
             RETURN_RESULT_OF(createString<StringEncoding::kUTF16>, length,
                 [=](auto* out) { std::copy(thiz.begin(), thiz.end(), out); });
         }
     });
 }

 template <StringEncoding encoding>
 static typename StringData<encoding>::unit* unsafeWritableStringData(const StringData<encoding>& string, KInt index) {
     return const_cast<typename StringData<encoding>::unit*>((string.begin() + index).ptr());
 }

 template <typename F /* = R(FixedLengthUnitStringData<*, *>) */>
 static auto fixedLengthEncodingAware(KConstRef string, F&& impl) {
     return encodingAware(string, [&](auto string) {
         if constexpr (isFixedLengthEncoding<string.encoding>)
             return impl(string);
         else
             ThrowIllegalArgumentException();
     });
 }

 static OBJ_GETTER(Kotlin_StringBuilder_unsafeStringSetUTF16Array, KRef thizPtr, KInt index, const KChar* array, size_t size) {
     if (size == 0) RETURN_OBJ(thizPtr);
     return fixedLengthEncodingAware(thizPtr, [=](auto thiz) {
         if (thiz.encoding == StringEncoding::kUTF16 || utf16StringIsLatin1(array, size)) {
             std::copy_n(array, size, unsafeWritableStringData(thiz, index));
             RETURN_OBJ(thizPtr);
         }
         RETURN_RESULT_OF(createString<StringEncoding::kUTF16>, thiz.sizeInChars(), [=](KChar* out) {
             std::copy_n(thiz.begin(), thiz.sizeInChars(), out);
             std::copy_n(array, size, out + index);
         });
     });
 }

 extern "C" OBJ_GETTER(Kotlin_StringBuilder_unsafeStringSetChar, KRef thiz, KInt index, KChar value) {
     RETURN_RESULT_OF(Kotlin_StringBuilder_unsafeStringSetUTF16Array, thiz, index, &value, 1);
 }

 extern "C" OBJ_GETTER(Kotlin_StringBuilder_unsafeStringSetArray, KRef thiz, KInt index, KConstRef value, KInt start, KInt end) {
     RETURN_RESULT_OF(Kotlin_StringBuilder_unsafeStringSetUTF16Array, thiz, index,
                      CharArrayAddressOfElementAt(value->array(), start), end - start);
 }

 extern "C" OBJ_GETTER(Kotlin_StringBuilder_unsafeStringSetString, KRef thizPtr, KInt index, KConstRef value, KInt start, KInt end) {
     if (start == end) RETURN_OBJ(thizPtr);
     return fixedLengthEncodingAware(thizPtr, [=](auto thiz) {
         return encodingAware(value, [=](auto value) {
             auto a = value.begin() + start;
             auto out = unsafeWritableStringData(thiz, index);
             if constexpr (thiz.encoding == value.encoding) {
                 // `thiz` and `value` could be the same string, in which case the ranges may overlap.
                 memmove(out, a.ptr(), (end - start) * sizeof(*out));
                 RETURN_OBJ(thizPtr);
             } else if constexpr (thiz.encoding == StringEncoding::kUTF16) {
                 std::copy_n(a, end - start, out);
                 RETURN_OBJ(thizPtr);
             } else if (value.encoding == StringEncoding::kUTF16 && utf16StringIsLatin1(a.ptr(), end - start)) {
                 std::copy_n(a.ptr(), end - start, out);
                 RETURN_OBJ(thizPtr);
             } else {
                 RETURN_RESULT_OF(createString<StringEncoding::kUTF16>, thiz.sizeInChars(), [=](KChar* out) {
                     std::copy_n(thiz.begin(), thiz.sizeInChars(), out);
                     std::copy_n(value.begin() + start, end - start, out + index);
                 });
             }
         });
     });
 }

 extern "C" KInt Kotlin_StringBuilder_unsafeStringSetInt(KRef thiz, KInt index, KInt value) {
     char cstring[12];
     auto length = std::snprintf(cstring, sizeof(cstring), "%d", value);
     RuntimeAssert(length >= 0, "This should never happen"); // may be overkill
     RuntimeAssert(static_cast<size_t>(length) < sizeof(cstring), "Unexpectedly large value"); // Can't be, but this is what sNprintf for
     encodingAware(thiz, [&](auto thiz) {
         auto from = &cstring[0];
         auto to = unsafeWritableStringData(thiz, index);
         memmove(to + length, to, length * sizeof(*to));
         while (*from) {
             *to++ = static_cast<KChar>(*from++); // always ASCII
         }
     });
     return length;
 }