| /* |
| * Copyright 2010-2017 JetBrains s.r.o. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <cstdio> |
| #include <cstdlib> |
| #include <limits> |
| #include <string.h> |
| #include <string> |
| |
| #include "KAssert.h" |
| #include "Exceptions.h" |
| #include "Memory.h" |
| #include "Natives.h" |
| #include "KString.h" |
| #include "Porting.h" |
| #include "Types.h" |
| |
| #include "utf8.h" |
| |
| #include "polyhash/PolyHash.h" |
| |
| using namespace kotlin; |
| |
| namespace { |
| |
| static constexpr const uint32_t MAX_STRING_SIZE = |
| static_cast<uint32_t>(std::numeric_limits<int32_t>::max()); |
| |
| KChar* StringUtf16Data(ObjHeader* kstring) { |
| return reinterpret_cast<KChar*>(StringRawData(kstring)); |
| } |
| |
| const KChar* StringUtf16Data(KConstRef kstring) { |
| return reinterpret_cast<const KChar*>(StringRawData(kstring)); |
| } |
| |
| size_t StringUtf16Length(KConstRef kstring) { |
| return kstring->array()->count_; |
| } |
| |
| template <typename CharCountF /*= uint32_t(const char*, const char*) */, typename ConvertF /*= void(const char*, const char*, KChar*) */> |
| OBJ_GETTER(convertToUTF16, const char* rawString, size_t rawStringLength, CharCountF&& countChars, ConvertF&& convert) { |
| if (rawString == nullptr) RETURN_OBJ(nullptr); |
| if (rawStringLength == 0) RETURN_RESULT_OF0(TheEmptyString); |
| |
| auto rawStringEnd = rawString + rawStringLength; |
| auto result = CreateUninitializedUtf16String(countChars(rawString, rawStringEnd), OBJ_RESULT); |
| convert(rawString, rawStringEnd, StringUtf16Data(result)); |
| RETURN_OBJ(result); |
| } |
| |
| template <KStringConversionMode mode> |
| OBJ_GETTER(unsafeConvertToUTF8, KConstRef thiz, KInt start, KInt size) { |
| RuntimeAssert(thiz->type_info() == theStringTypeInfo, "Must use String"); |
| |
| std::string utf8; |
| try { |
| utf8 = kotlin::to_string<mode>(thiz, static_cast<size_t>(start), static_cast<size_t>(size)); |
| } catch (...) { |
| ThrowCharacterCodingException(); |
| } |
| |
| ArrayHeader* result = AllocArrayInstance(theByteArrayTypeInfo, utf8.size(), OBJ_RESULT)->array(); |
| ::memcpy(ByteArrayAddressOfElementAt(result, 0), utf8.data(), utf8.size()); |
| RETURN_OBJ(result->obj()); |
| } |
| |
| uint32_t mismatch(const uint16_t* first, const uint16_t* second, uint32_t size) { |
| const long* firstLong = reinterpret_cast<const long*>(first); |
| const long* secondLong = reinterpret_cast<const long*>(second); |
| constexpr int step = sizeof(long) / sizeof(uint16_t); |
| uint32_t sizeLong = size / step; |
| uint32_t iLong; |
| for (iLong = 0; iLong < sizeLong; iLong++) { |
| if (firstLong[iLong] != secondLong[iLong]) { |
| break; |
| } |
| } |
| for (uint32_t i = iLong * step; i < size; i++) { |
| if (first[i] != second[i]) { |
| return i; |
| } |
| } |
| return size; |
| } |
| |
| const char* unsafeGetByteArrayData(KConstRef thiz, KInt start) { |
| RuntimeAssert(thiz->type_info() == theByteArrayTypeInfo, "Must use a byte array"); |
| return reinterpret_cast<const char*>(ByteArrayAddressOfElementAt(thiz->array(), start)); |
| } |
| |
| template <typename T> |
| int threeWayCompare(T a, T b) { |
| return (a == b) ? 0 : (a < b ? -1 : 1); |
| } |
| |
| PERFORMANCE_INLINE inline const KChar* boundsCheckedIteratorAt(KConstRef string, KInt index) { |
| // We couldn't have created a string bigger than max KInt value. |
| // So if index is < 0, conversion to an unsigned value would make it bigger |
| // than the array size. |
| if (static_cast<uint32_t>(index) >= StringUtf16Length(string)) { |
| ThrowArrayIndexOutOfBoundsException(); |
| } |
| return StringUtf16Data(string) + index; |
| } |
| |
| #if KONAN_WINDOWS |
| void* memmem(const void *big, size_t bigLen, const void *little, size_t littleLen) { |
| for (size_t i = 0; i + littleLen <= bigLen; ++i) { |
| void* pos = ((char*)big) + i; |
| if (memcmp(little, pos, littleLen) == 0) return pos; |
| } |
| return nullptr; |
| } |
| #endif |
| |
| } // namespace |
| |
| extern "C" OBJ_GETTER(CreateStringFromCString, const char* cstring) { |
| RETURN_RESULT_OF(CreateStringFromUtf8, cstring, cstring ? strlen(cstring) : 0); |
| } |
| |
| extern "C" OBJ_GETTER(CreateStringFromUtf8, const char* utf8, uint32_t lengthBytes) { |
| RETURN_RESULT_OF(convertToUTF16, utf8, lengthBytes, |
| [](auto data, auto end) { return utf8::with_replacement::utf16_length(data, end); }, |
| [](auto data, auto end, auto out) { utf8::with_replacement::utf8to16(data, end, out); }); |
| } |
| |
| extern "C" OBJ_GETTER(CreateStringFromUtf8OrThrow, const char* utf8, uint32_t lengthBytes) { |
| RETURN_RESULT_OF(convertToUTF16, utf8, lengthBytes, |
| [](const char* data, const char* end) { |
| try { |
| return utf8::utf16_length(data, end); |
| } catch (...) { |
| ThrowCharacterCodingException(); |
| } |
| }, |
| [](auto data, auto end, auto out) { utf8::unchecked::utf8to16(data, end, out); }); |
| } |
| |
| extern "C" OBJ_GETTER(CreateStringFromUtf16, const KChar* utf16, uint32_t lengthChars) { |
| if (utf16 == nullptr) RETURN_OBJ(nullptr); |
| if (lengthChars == 0) RETURN_RESULT_OF0(TheEmptyString); |
| |
| auto result = CreateUninitializedUtf16String(lengthChars, OBJ_RESULT); |
| memcpy(StringRawData(result), utf16, StringRawSize(result)); |
| RETURN_OBJ(result); |
| } |
| |
| extern "C" OBJ_GETTER(CreateUninitializedUtf16String, uint32_t lengthChars) { |
| RETURN_RESULT_OF(AllocArrayInstance, theStringTypeInfo, lengthChars); |
| } |
| |
| extern "C" char* CreateCStringFromString(KConstRef kref) { |
| if (kref == nullptr) return nullptr; |
| std::string utf8 = kotlin::to_string<KStringConversionMode::UNCHECKED>(kref); |
| char* result = reinterpret_cast<char*>(std::calloc(1, utf8.size() + 1)); |
| ::memcpy(result, utf8.data(), utf8.size()); |
| return result; |
| } |
| |
| extern "C" void DisposeCString(char* cstring) { |
| if (cstring) std::free(cstring); |
| } |
| |
| extern "C" KRef CreatePermanentStringFromCString(const char* nullTerminatedUTF8) { |
| // Note: this function can be called in "Native" thread state. But this is fine: |
| // while it indeed manipulates Kotlin objects, it doesn't in fact access _Kotlin heap_, |
| // because the accessed object is off-heap, imitating permanent static objects. |
| const char* end = nullTerminatedUTF8 + strlen(nullTerminatedUTF8); |
| size_t count = utf8::with_replacement::utf16_length(nullTerminatedUTF8, end); |
| size_t headerSize = alignUp(sizeof(ArrayHeader), alignof(char16_t)); |
| size_t arraySize = headerSize + count * sizeof(char16_t); |
| |
| auto header = (ObjHeader*)std::calloc(arraySize, 1); |
| header->typeInfoOrMeta_ = setPointerBits((TypeInfo *)theStringTypeInfo, OBJECT_TAG_PERMANENT_CONTAINER); |
| header->array()->count_ = count; |
| utf8::with_replacement::utf8to16(nullTerminatedUTF8, end, StringUtf16Data(header)); |
| return header; |
| } |
| |
| extern "C" void FreePermanentStringForTests(KConstRef header) { |
| std::free(const_cast<KRef>(header)); |
| } |
| |
| // String.kt |
| extern "C" KInt Kotlin_String_getStringLength(KConstRef thiz) { |
| return StringUtf16Length(thiz); |
| } |
| |
| extern "C" OBJ_GETTER(Kotlin_String_replace, KConstRef thiz, KChar oldChar, KChar newChar) { |
| auto count = StringUtf16Length(thiz); |
| auto result = CreateUninitializedUtf16String(count, OBJ_RESULT); |
| auto resultRaw = StringUtf16Data(result); |
| for (auto it = StringUtf16Data(thiz), end = it + count; it != end; it++) { |
| KChar thizChar = *it; |
| *resultRaw++ = thizChar == oldChar ? newChar : thizChar; |
| } |
| RETURN_OBJ(result); |
| } |
| |
| extern "C" OBJ_GETTER(Kotlin_String_plusImpl, KConstRef thiz, KConstRef other) { |
| RuntimeAssert(thiz != nullptr, "this cannot be null"); |
| RuntimeAssert(other != nullptr, "other cannot be null"); |
| RuntimeAssert(thiz->type_info() == theStringTypeInfo, "Must be a string"); |
| RuntimeAssert(other->type_info() == theStringTypeInfo, "Must be a string"); |
| |
| auto thizLength = StringUtf16Length(thiz); |
| auto otherLength = StringUtf16Length(other); |
| RuntimeAssert(thizLength <= MAX_STRING_SIZE, "this cannot be this large"); |
| RuntimeAssert(otherLength <= MAX_STRING_SIZE, "other cannot be this large"); |
| auto resultLength = thizLength + otherLength; // can't overflow since MAX_STRING_SIZE is (max value)/2 |
| if (resultLength > MAX_STRING_SIZE) { |
| ThrowOutOfMemoryError(); |
| } |
| |
| auto result = CreateUninitializedUtf16String(resultLength, OBJ_RESULT); |
| auto resultRaw = StringUtf16Data(result); |
| memcpy(resultRaw, StringUtf16Data(thiz), StringRawSize(thiz)); |
| memcpy(resultRaw + thizLength, StringUtf16Data(other), StringRawSize(other)); |
| RETURN_OBJ(result); |
| } |
| |
| extern "C" OBJ_GETTER(Kotlin_String_unsafeStringFromCharArray, KConstRef thiz, KInt start, KInt size) { |
| RuntimeAssert(thiz->type_info() == theCharArrayTypeInfo, "Must use a char array"); |
| |
| if (size == 0) { |
| RETURN_RESULT_OF0(TheEmptyString); |
| } |
| |
| auto result = CreateUninitializedUtf16String(size, OBJ_RESULT); |
| memcpy(StringRawData(result), CharArrayAddressOfElementAt(thiz->array(), start), size * sizeof(KChar)); |
| RETURN_OBJ(result); |
| } |
| |
| extern "C" OBJ_GETTER(Kotlin_String_toCharArray, KConstRef string, KRef destination, KInt destinationOffset, KInt start, KInt size) { |
| memcpy(CharArrayAddressOfElementAt(destination->array(), destinationOffset), |
| StringUtf16Data(string) + start, size * sizeof(KChar)); |
| RETURN_OBJ(destination); |
| } |
| |
| extern "C" OBJ_GETTER(Kotlin_String_subSequence, KConstRef thiz, KInt startIndex, KInt endIndex) { |
| if (startIndex < 0 || static_cast<uint32_t>(endIndex) > StringUtf16Length(thiz) || startIndex > endIndex) { |
| // TODO: is it correct exception? |
| ThrowArrayIndexOutOfBoundsException(); |
| } |
| |
| if (startIndex == endIndex) { |
| RETURN_RESULT_OF0(TheEmptyString); |
| } |
| |
| KInt length = endIndex - startIndex; |
| auto result = CreateUninitializedUtf16String(length, OBJ_RESULT); |
| memcpy(StringUtf16Data(result), StringUtf16Data(thiz) + startIndex, length * sizeof(KChar)); |
| RETURN_OBJ(result); |
| } |
| |
| extern "C" KInt Kotlin_String_compareTo(KConstRef thiz, KConstRef other) { |
| auto first = StringUtf16Data(thiz); |
| auto firstSize = StringUtf16Length(thiz); |
| auto second = StringUtf16Data(other); |
| auto secondSize = StringUtf16Length(other); |
| auto minSize = std::min(firstSize, secondSize); |
| auto mismatch_position = mismatch(first, second, minSize); |
| if (mismatch_position != minSize) { |
| return threeWayCompare(first[mismatch_position], second[mismatch_position]); |
| } |
| return threeWayCompare(firstSize, secondSize); |
| } |
| |
| extern "C" KChar Kotlin_String_get(KConstRef thiz, KInt index) { |
| return *boundsCheckedIteratorAt(thiz, index); |
| } |
| |
| extern "C" OBJ_GETTER(Kotlin_ByteArray_unsafeStringFromUtf8OrThrow, KConstRef thiz, KInt start, KInt size) { |
| RETURN_RESULT_OF(CreateStringFromUtf8OrThrow, unsafeGetByteArrayData(thiz, start), size); |
| } |
| |
| extern "C" OBJ_GETTER(Kotlin_ByteArray_unsafeStringFromUtf8, KConstRef thiz, KInt start, KInt size) { |
| RETURN_RESULT_OF(CreateStringFromUtf8, unsafeGetByteArrayData(thiz, start), size); |
| } |
| |
| extern "C" OBJ_GETTER(Kotlin_String_unsafeStringToUtf8, KConstRef thiz, KInt start, KInt size) { |
| RETURN_RESULT_OF(unsafeConvertToUTF8<KStringConversionMode::REPLACE_INVALID>, thiz, start, size); |
| } |
| |
| extern "C" OBJ_GETTER(Kotlin_String_unsafeStringToUtf8OrThrow, KConstRef thiz, KInt start, KInt size) { |
| RETURN_RESULT_OF(unsafeConvertToUTF8<KStringConversionMode::CHECKED>, thiz, start, size); |
| } |
| |
| extern "C" KInt Kotlin_StringBuilder_insertString(KRef builder, KInt distIndex, KConstRef fromString, KInt sourceIndex, KInt count) { |
| auto toArray = builder->array(); |
| RuntimeAssert(sourceIndex >= 0 && static_cast<uint32_t>(sourceIndex + count) <= StringUtf16Length(fromString), "must be true"); |
| RuntimeAssert(distIndex >= 0 && static_cast<uint32_t>(distIndex + count) <= toArray->count_, "must be true"); |
| memcpy(CharArrayAddressOfElementAt(toArray, distIndex), StringUtf16Data(fromString) + sourceIndex, count * sizeof(KChar)); |
| return count; |
| } |
| |
| extern "C" KInt Kotlin_StringBuilder_insertInt(KRef builder, KInt position, KInt value) { |
| auto toArray = builder->array(); |
| RuntimeAssert(toArray->count_ >= static_cast<uint32_t>(11 + position), "must be true"); |
| char cstring[12]; |
| auto length = std::snprintf(cstring, sizeof(cstring), "%d", value); |
| RuntimeAssert(length >= 0, "This should never happen"); // may be overkill |
| RuntimeAssert(static_cast<size_t>(length) < sizeof(cstring), "Unexpectedly large value"); // Can't be, but this is what sNprintf for |
| auto* from = &cstring[0]; |
| auto* to = CharArrayAddressOfElementAt(toArray, position); |
| while (*from) { |
| *to++ = *from++; |
| } |
| return from - cstring; |
| } |
| |
| |
| extern "C" KBoolean Kotlin_String_equals(KConstRef thiz, KConstRef other) { |
| if (other == nullptr || other->type_info() != theStringTypeInfo) return false; |
| if (thiz == other) return true; |
| // TODO: this assumes identical encodings |
| return StringRawSize(thiz) == StringRawSize(other) && |
| memcmp(StringRawData(thiz), StringRawData(other), StringRawSize(thiz)) == 0; |
| } |
| |
| // Bounds checks is are performed on Kotlin side |
| extern "C" KBoolean Kotlin_String_unsafeRangeEquals(KConstRef thiz, KInt thizOffset, KConstRef other, KInt otherOffset, KInt length) { |
| return memcmp(StringUtf16Data(thiz) + thizOffset, StringUtf16Data(other) + otherOffset, length * sizeof(KChar)) == 0; |
| } |
| |
| extern "C" KBoolean Kotlin_Char_isISOControl(KChar ch) { |
| return (ch <= 0x1F) || (ch >= 0x7F && ch <= 0x9F); |
| } |
| |
| extern "C" KBoolean Kotlin_Char_isHighSurrogate(KChar ch) { |
| return ((ch & 0xfc00) == 0xd800); |
| } |
| |
| extern "C" KBoolean Kotlin_Char_isLowSurrogate(KChar ch) { |
| return ((ch & 0xfc00) == 0xdc00); |
| } |
| |
| extern "C" KInt Kotlin_String_indexOfChar(KConstRef thiz, KChar ch, KInt fromIndex) { |
| if (fromIndex < 0) { |
| fromIndex = 0; |
| } |
| KInt count = Kotlin_String_getStringLength(thiz); |
| if (static_cast<uint32_t>(fromIndex) > static_cast<uint32_t>(count)) { |
| return -1; |
| } |
| auto thizRaw = StringUtf16Data(thiz) + fromIndex; |
| while (fromIndex < count) { |
| if (*thizRaw++ == ch) return fromIndex; |
| fromIndex++; |
| } |
| return -1; |
| } |
| |
| extern "C" KInt Kotlin_String_lastIndexOfChar(KConstRef thiz, KChar ch, KInt fromIndex) { |
| auto length = static_cast<uint32_t>(Kotlin_String_getStringLength(thiz)); |
| if (fromIndex < 0 || length == 0) { |
| return -1; |
| } |
| if (static_cast<uint32_t>(fromIndex) >= length) { |
| fromIndex = length - 1; |
| } |
| KInt index = fromIndex; |
| const KChar* thizRaw = StringUtf16Data(thiz) + index; |
| while (index >= 0) { |
| if (*thizRaw-- == ch) return index; |
| index--; |
| } |
| return -1; |
| } |
| |
| // TODO: or code up Knuth-Moris-Pratt, |
| // or use std::search with std::boyer_moore_searcher: |
| // https://en.cppreference.com/w/cpp/algorithm/search |
| extern "C" KInt Kotlin_String_indexOfString(KConstRef thiz, KConstRef other, KInt fromIndex) { |
| if (fromIndex < 0) { |
| fromIndex = 0; |
| } |
| KInt thizLength = Kotlin_String_getStringLength(thiz); |
| KInt otherLength = Kotlin_String_getStringLength(other); |
| if (static_cast<uint32_t>(fromIndex) >= static_cast<uint32_t>(thizLength)) { |
| return otherLength == 0 ? thizLength : -1; |
| } |
| if (otherLength > thizLength - fromIndex) { |
| return -1; |
| } |
| // An empty string can be always found. |
| if (otherLength == 0) { |
| return fromIndex; |
| } |
| |
| auto thizRaw = StringUtf16Data(thiz); |
| auto otherRaw = StringUtf16Data(other); |
| auto otherRawSize = StringRawSize(other); |
| while (true) { |
| void* result = memmem(thizRaw + fromIndex, (thizLength - fromIndex) * sizeof(KChar), |
| otherRaw, otherRawSize); |
| if (result == nullptr) return -1; |
| auto byteIndex = reinterpret_cast<intptr_t>(result) - reinterpret_cast<intptr_t>(thizRaw); |
| if (byteIndex % sizeof(KChar) == 0) { |
| return byteIndex / sizeof(KChar); |
| } else { |
| fromIndex = byteIndex / sizeof(KChar) + 1; |
| } |
| } |
| } |
| |
| extern "C" KInt Kotlin_String_lastIndexOfString(KConstRef thiz, KConstRef other, KInt fromIndex) { |
| KInt count = Kotlin_String_getStringLength(thiz); |
| KInt otherCount = Kotlin_String_getStringLength(other); |
| |
| if (fromIndex < 0 || otherCount > count) { |
| return -1; |
| } |
| if (otherCount == 0) { |
| return fromIndex < count ? fromIndex : count; |
| } |
| |
| KInt start = std::min(fromIndex, count - otherCount); |
| KChar firstChar = Kotlin_String_get(other, 0); |
| while (true) { |
| KInt candidate = Kotlin_String_lastIndexOfChar(thiz, firstChar, start); |
| if (candidate == -1) return -1; |
| if (memcmp(StringUtf16Data(thiz) + candidate, StringUtf16Data(other), otherCount * sizeof(KChar)) == 0) { |
| return candidate; |
| } |
| start = candidate - 1; |
| } |
| } |
| |
| extern "C" KInt Kotlin_String_hashCode(KConstRef thiz) { |
| // TODO: consider caching strings hashes. |
| return polyHash(StringUtf16Length(thiz), StringUtf16Data(thiz)); |
| } |
| |
| extern "C" const KChar* Kotlin_String_utf16pointer(KConstRef message) { |
| RuntimeAssert(message->type_info() == theStringTypeInfo, "Must use a string"); |
| return StringUtf16Data(message); |
| } |
| |
| extern "C" KInt Kotlin_String_utf16length(KConstRef message) { |
| RuntimeAssert(message->type_info() == theStringTypeInfo, "Must use a string"); |
| return StringRawSize(message); |
| } |
| |
| extern "C" KConstNativePtr Kotlin_Arrays_getStringAddressOfElement(KConstRef thiz, KInt index) { |
| return reinterpret_cast<KConstNativePtr>(boundsCheckedIteratorAt(thiz, index)); |
| } |
| |
| template <KStringConversionMode mode> |
| std::string kotlin::to_string(KConstRef kstring, size_t start, size_t size) noexcept(mode != KStringConversionMode::CHECKED) { |
| RuntimeAssert(kstring->type_info() == theStringTypeInfo, "A Kotlin String expected"); |
| auto length = StringUtf16Length(kstring); |
| RuntimeAssert(start <= length, "start index out of bounds"); |
| auto utf16 = StringUtf16Data(kstring) + start; |
| if (size == std::string::npos) { |
| size = length - start; |
| } else { |
| RuntimeAssert(size <= length - start, "size out of bounds"); |
| } |
| std::string utf8; |
| utf8.reserve(size); |
| switch (mode) { |
| case KStringConversionMode::UNCHECKED: |
| utf8::unchecked::utf16to8(utf16, utf16 + size, back_inserter(utf8)); |
| break; |
| case KStringConversionMode::CHECKED: |
| utf8::utf16to8(utf16, utf16 + size, back_inserter(utf8)); |
| break; |
| case KStringConversionMode::REPLACE_INVALID: |
| utf8::with_replacement::utf16to8(utf16, utf16 + size, back_inserter(utf8)); |
| break; |
| } |
| return utf8; |
| } |
| |
| template std::string kotlin::to_string<KStringConversionMode::CHECKED>(KConstRef, size_t, size_t); |
| template std::string kotlin::to_string<KStringConversionMode::UNCHECKED>(KConstRef, size_t, size_t) noexcept; |
| template std::string kotlin::to_string<KStringConversionMode::REPLACE_INVALID>(KConstRef, size_t, size_t) noexcept; |