| // Copyright 2022 The Pigweed Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| // use this file except in compliance with the License. You may obtain a copy of |
| // the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| // License for the specific language governing permissions and limitations under |
| // the License. |
| |
| /** Decodes and detokenizes strings from binary or Base64 input. */ |
| import { Buffer } from 'buffer'; |
| import { Frame } from 'pigweedjs/pw_hdlc'; |
| import { TokenDatabase } from './token_database'; |
| import { PrintfDecoder } from './printf_decoder'; |
| |
| const MAX_RECURSIONS = 9; |
| const BASE64CHARS = '[A-Za-z0-9+\\-\\/_]'; |
| const PATTERN = new RegExp( |
| // Base64 tokenized strings start with the prefix character ($) |
| '\\$' + |
| // Tokenized strings contain 0 or more blocks of four Base64 chars. |
| `(?:${BASE64CHARS}{4})*` + |
| // The last block of 4 chars may have one or two padding chars (=). |
| `(?:${BASE64CHARS}{3}=|${BASE64CHARS}{2}==)?`, |
| 'g', |
| ); |
| const BASE10_TOKEN_REGEX = '(?<base10>[0-9]{10})'; |
| const BASE16_TOKEN_REGEX = '(?<base16>[A-Fa-f0-9]{8})'; |
| const BASE64_TOKEN_REGEX = `(?<base64>(?:${BASE64CHARS}{4})*(?:${BASE64CHARS}{3}=|${BASE64CHARS}{2}==)?)`; |
| const NESTED_TOKEN_BASE_PREFIX = '#'; |
| const NESTED_DOMAIN_START_PREFIX = '{'; |
| const NESTED_DOMAIN_END_PREFIX = '}'; |
| const NESTED_TOKEN_PREFIX = '$'; |
| const NESTED_TOKEN_FORMATS = [ |
| BASE10_TOKEN_REGEX, |
| BASE16_TOKEN_REGEX, |
| BASE64_TOKEN_REGEX, |
| ]; |
| |
| function tokenRegex(prefix: string): RegExp { |
| const escapedPrefix = prefix.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // Escape special regex characters |
| return new RegExp( |
| // Tokenized strings start with the prefix character ($). |
| escapedPrefix + |
| // Optional; no domain specifier defaults to (empty) domain. |
| // Brackets ({}) specifies domain string |
| '(?<domainspec>(' + |
| NESTED_DOMAIN_START_PREFIX + |
| '(?<domain>\\s*|\\s*[a-zA-Z_:][a-zA-Z0-9_:\\s]*)' + |
| NESTED_DOMAIN_END_PREFIX + |
| '))?' + |
| // Optional; no base specifier defaults to BASE64. |
| // Hash (#) with no number specified defaults to Base-16. |
| '(?<basespec>(?<base>[0-9]*)?' + |
| NESTED_TOKEN_BASE_PREFIX + |
| ')?' + |
| // Match one of the following token formats. |
| '(' + |
| NESTED_TOKEN_FORMATS.join('|') + |
| ')', |
| 'g', |
| ); |
| } |
| |
| interface TokenAndArgs { |
| token: number; |
| args: Uint8Array; |
| } |
| |
| export class Detokenizer { |
| private database: TokenDatabase; |
| private _token_regex: RegExp; |
| private prefix: string; |
| |
| constructor(csvDatabase: string) { |
| this.prefix = NESTED_TOKEN_PREFIX; // Initialize prefix first |
| this.database = new TokenDatabase(csvDatabase); |
| this._token_regex = tokenRegex(this.prefix); // Now it's safe to use this.prefix |
| } |
| |
| /** |
| * Detokenize frame data into actual string messages using the provided |
| * token database. |
| * |
| * If the frame doesn't match any token from database, the frame will be |
| * returned as string as is. |
| */ |
| detokenize( |
| tokenizedFrame: Frame, |
| domain = '', |
| maxRecursion: number = MAX_RECURSIONS, |
| ): string { |
| return this.detokenizeUint8Array(tokenizedFrame.data, domain, maxRecursion); |
| } |
| |
| /** |
| * Detokenize uint8 into actual string messages using the provided |
| * token database. |
| * |
| * If the data doesn't match any token from database, the data will be |
| * returned as string as is. |
| */ |
| detokenizeUint8Array( |
| data: Uint8Array, |
| domain = '', |
| recursion: number = MAX_RECURSIONS, |
| ): string { |
| const decodedString = new TextDecoder().decode(data); |
| if (PATTERN.test(decodedString)) { |
| return this.detokenizeBase64String(decodedString, domain, recursion); |
| } |
| const { token, args } = this.decodeUint8Array(data); |
| // Parse arguments if this is printf-style text. |
| const formatString = this.database.get(token, domain); |
| if (!formatString) { |
| return decodedString; |
| } |
| |
| if (recursion > 0) { |
| return this.detokenizeText(formatString, recursion); |
| } |
| |
| return new PrintfDecoder().decode(String(formatString), args); |
| } |
| |
| public detokenizeBase64String( |
| base64String: string, |
| domain: string, |
| recursions: number, |
| ): string { |
| return base64String.replace(PATTERN, (base64Substring) => { |
| const { token, args } = this.decodeBase64TokenFrame(base64Substring); |
| const format = this.database.get(token, domain); |
| // Parse arguments if this is printf-style text. |
| if (format) { |
| const decodedOriginal = new PrintfDecoder().decode( |
| String(format), |
| args, |
| ); |
| // Detokenize nested Base64 tokens and their arguments. |
| if (recursions > 0) { |
| return this.detokenizeBase64String( |
| decodedOriginal, |
| // Update the domain value for nested tokens. |
| this.getDomain(base64Substring), |
| recursions - 1, |
| ); |
| } |
| return decodedOriginal; |
| } |
| return base64Substring; |
| }); |
| } |
| |
| private decodeUint8Array(data: Uint8Array): TokenAndArgs { |
| const token = new DataView(data.buffer, data.byteOffset, 4).getUint32( |
| 0, |
| true, |
| ); |
| const args = new Uint8Array(data.buffer.slice(data.byteOffset + 4)); |
| |
| return { token, args }; |
| } |
| |
| private decodeBase64TokenFrame(base64Data: string): TokenAndArgs { |
| // Remove the prefix '$' and convert from Base64. |
| const prefixRemoved = base64Data.slice(1); |
| const noBase64 = Buffer.from(prefixRemoved, 'base64').toString('binary'); |
| // Convert back to bytes and return token and arguments. |
| const bytes = noBase64.split('').map((ch) => ch.charCodeAt(0)); |
| let uIntArray = new Uint8Array(bytes); |
| // Check if there are enough bytes to create a DataView of length 4. |
| if (uIntArray.length < 4) { |
| // If there are not enough bytes, pad with zeroes. |
| const paddedArray = new Uint8Array(4); |
| paddedArray.set(uIntArray); |
| uIntArray = paddedArray; |
| } |
| const token = new DataView( |
| uIntArray.buffer, |
| uIntArray.byteOffset, |
| 4, |
| ).getUint32(0, true); |
| const args = new Uint8Array(bytes.slice(4)); |
| |
| return { token, args }; |
| } |
| |
| private detokenizeScan(match: string, ...args: any[]): string { |
| const groups = args.pop(); |
| const basespec = groups.basespec; |
| const base = groups.base; |
| let domain = groups.domain; |
| const base10 = groups.base10; |
| const base16 = groups.base16; |
| const base64 = groups.base64; |
| |
| if (domain === undefined) { |
| domain = ''; |
| } else { |
| domain = domain.replace(/\s/g, ''); |
| } |
| |
| if (base64 || basespec == undefined) { |
| // Detokenize Base64-encoded frame data into actual string messages using the |
| // provided token database. |
| // |
| // If the frame doesn't match any token from database, the frame will be |
| // returned as string as is. |
| const { token, args } = this.decodeBase64TokenFrame(match); |
| const format = this.database.get(token, domain); |
| if (format) { |
| return new PrintfDecoder().decode(String(format), args); |
| } |
| return match; |
| } |
| |
| let usedBase = base; |
| if (!usedBase) { |
| usedBase = '16'; |
| } |
| |
| let tokenString = ''; |
| if (base10) { |
| tokenString = base10; |
| } else if (base16) { |
| tokenString = base16; |
| } |
| |
| return this.detokenizeOnce(tokenString, usedBase, domain, match); |
| } |
| |
| private detokenizeOnce( |
| tokenString: string, |
| base: string, |
| domain: string, |
| match: string, |
| ): string { |
| if (!this.database) |
| return `${this.prefix}${ |
| domain.length > 0 |
| ? NESTED_DOMAIN_START_PREFIX + domain + NESTED_DOMAIN_END_PREFIX |
| : '' |
| }${base.length > 0 ? base + NESTED_TOKEN_BASE_PREFIX : ''}${tokenString}`; |
| const token = parseInt(tokenString, parseInt(base)); |
| const entries = this.database.get(token, domain); |
| if (entries) { |
| return String(entries); |
| } |
| return match; |
| } |
| |
| public detokenizeText( |
| message: string, |
| recursion: number = MAX_RECURSIONS, |
| ): string { |
| if (!this.database) { |
| return message; |
| } |
| |
| let result: string = message; |
| for (let i = 0; i < recursion - 1; i++) { |
| const newResult = result.replace(this._token_regex, (match, ...args) => { |
| return this.detokenizeScan(match, ...args); |
| }); |
| |
| if (newResult === result) { |
| return result; |
| } |
| result = newResult; |
| } |
| return result; |
| } |
| |
| private getDomain(base64String: string): string { |
| const result = base64String.replace(this._token_regex, (_, ...args) => { |
| const groups = args.pop(); |
| let domain = groups.domain; |
| if (domain === undefined) { |
| domain = ''; |
| } else { |
| domain = domain.replace(/\s/g, ''); |
| } |
| return domain; |
| }); |
| return result; |
| } |
| } |