[experiment] Direct encoding into UTF-8 bytes, without the 8-to-7-bit conversion
diff --git a/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/BitEncoding.java b/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/BitEncoding.java index 9db1c7d..d1e545b 100644 --- a/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/BitEncoding.java +++ b/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/BitEncoding.java
@@ -22,6 +22,8 @@ import java.util.List; public class BitEncoding { + private static boolean NEW = true; + private BitEncoding() { } @@ -34,6 +36,10 @@ */ @NotNull public static String[] encodeBytes(@NotNull byte[] data) { + if (NEW) { + List<String> strings = UtfEncodingKt.bytesToStrings(data); + return strings.toArray(new String[strings.size()]); + } byte[] bytes = encode8to7(data); // Since 0x0 byte is encoded as two bytes in the Modified UTF-8 (0xc0 0x80) and zero is rather common to byte arrays, we increment // every byte by one modulo max byte value, so that the less common value 0x7f will be represented as two bytes instead. @@ -157,6 +163,9 @@ */ @NotNull public static byte[] decodeBytes(@NotNull String[] data) { + if (NEW) { + return UtfEncodingKt.stringsToBytes(data); + } byte[] bytes = combineStringArrayIntoBytes(data); // Adding 0x7f modulo max byte value is equivalent to subtracting 1 the same modulo, which is inverse to what happens in encodeBytes addModuloByte(bytes, 0x7f);
diff --git a/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/utfEncoding.kt b/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/utfEncoding.kt new file mode 100644 index 0000000..b18abc9 --- /dev/null +++ b/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/utfEncoding.kt
@@ -0,0 +1,98 @@ +/* + * Copyright 2010-2016 JetBrains s.r.o. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.jetbrains.kotlin.serialization.jvm + +import java.util.* + +// The maximum possible length of the byte array in the CONSTANT_Utf8_info structure in the bytecode, as per JVMS7 4.4.7 +private val MAX_UTF8_INFO_LENGTH = 65535 + +// Leading bytes are prefixed with 110 in UTF-8 +private val LEADING_BYTE_MASK = 0b11000000 +// Continuation bytes are prefixed with 10 in UTF-8 +private val CONTINUATION_BYTE_MASK = 0b10000000 + +private val TWO_HIGHER_BITS_MASK = 0b11000000 +private val TWO_LOWER_BITS_MASK = 0b00000011 +private val SIX_LOWER_BITS_MASK = 0b00111111 + +fun bytesToStrings(bytes: ByteArray): List<String> { + val result = ArrayList<String>(1) + val buffer = StringBuilder() + var bytesInBuffer = 0 + + for (b in bytes) { + if (b >= 0) { + buffer.append(b.toChar()) + bytesInBuffer++ + } + else { + val int = b.toInt() and 0xFF + val leadingByte = LEADING_BYTE_MASK or ((int and TWO_HIGHER_BITS_MASK) shr 6) + val continuationByte = CONTINUATION_BYTE_MASK or (int and SIX_LOWER_BITS_MASK) + val encodedByte = (leadingByte shl 8) or continuationByte + + buffer.append(encodedByte.toChar()) + bytesInBuffer += 2 + + if (bytesInBuffer > MAX_UTF8_INFO_LENGTH) { + result.add(buffer.substring(0, buffer.length - 1)) + buffer.setLength(0) + buffer.append(encodedByte.toChar()) + bytesInBuffer = 2 + } + } + + if (bytesInBuffer == MAX_UTF8_INFO_LENGTH) { + result.add(buffer.toString()) + buffer.setLength(0) + bytesInBuffer = 0 + } + } + + if (!buffer.isEmpty()) { + result.add(buffer.toString()) + } + + return result +} + +fun stringsToBytes(strings: Array<String>): ByteArray { + val resultLength = strings.sumBy { it.length } + val result = ByteArray(resultLength) + + var i = 0 + for (s in strings) { + for (si in 0..s.length - 1) { + val c = s[si] + + val int = c.toInt() + if (int <= 127) { + result[i++] = c.toByte() + } + else { + val leadingByte = (int and 0xFFFF) shr 8 + val continuationByte = int and 0xFF + val higherBits = (leadingByte and TWO_LOWER_BITS_MASK) shl 6 + val lowerBits = continuationByte and SIX_LOWER_BITS_MASK + result[i++] = (higherBits or lowerBits).toByte() + } + } + } + + return result +} \ No newline at end of file