| // Protocol Buffers - Google's data interchange format |
| // Copyright 2008 Google Inc. All rights reserved. |
| // https://developers.google.com/protocol-buffers/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| // from google3/strings/strutil.cc |
| |
| #include <google/protobuf/stubs/strutil.h> |
| #include <errno.h> |
| #include <float.h> // FLT_DIG and DBL_DIG |
| #include <limits> |
| #include <limits.h> |
| #include <stdio.h> |
| #include <iterator> |
| |
| #ifdef _WIN32 |
| // MSVC has only _snprintf, not snprintf. |
| // |
| // MinGW has both snprintf and _snprintf, but they appear to be different |
| // functions. The former is buggy. When invoked like so: |
| // char buffer[32]; |
| // snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f); |
| // it prints "1.23000e+10". This is plainly wrong: %g should never print |
| // trailing zeros after the decimal point. For some reason this bug only |
| // occurs with some input values, not all. In any case, _snprintf does the |
| // right thing, so we use it. |
| #define snprintf _snprintf |
| #endif |
| |
| namespace google { |
| namespace protobuf { |
| |
| inline bool IsNaN(double value) { |
| // NaN is never equal to anything, even itself. |
| return value != value; |
| } |
| |
| // These are defined as macros on some platforms. #undef them so that we can |
| // redefine them. |
| #undef isxdigit |
| #undef isprint |
| |
| // The definitions of these in ctype.h change based on locale. Since our |
| // string manipulation is all in relation to the protocol buffer and C++ |
| // languages, we always want to use the C locale. So, we re-define these |
| // exactly as we want them. |
| inline bool isxdigit(char c) { |
| return ('0' <= c && c <= '9') || |
| ('a' <= c && c <= 'f') || |
| ('A' <= c && c <= 'F'); |
| } |
| |
| inline bool isprint(char c) { |
| return c >= 0x20 && c <= 0x7E; |
| } |
| |
| // ---------------------------------------------------------------------- |
| // StripString |
| // Replaces any occurrence of the character 'remove' (or the characters |
| // in 'remove') with the character 'replacewith'. |
| // ---------------------------------------------------------------------- |
| void StripString(string* s, const char* remove, char replacewith) { |
| const char * str_start = s->c_str(); |
| const char * str = str_start; |
| for (str = strpbrk(str, remove); |
| str != NULL; |
| str = strpbrk(str + 1, remove)) { |
| (*s)[str - str_start] = replacewith; |
| } |
| } |
| |
| void StripWhitespace(string* str) { |
| int str_length = str->length(); |
| |
| // Strip off leading whitespace. |
| int first = 0; |
| while (first < str_length && ascii_isspace(str->at(first))) { |
| ++first; |
| } |
| // If entire string is white space. |
| if (first == str_length) { |
| str->clear(); |
| return; |
| } |
| if (first > 0) { |
| str->erase(0, first); |
| str_length -= first; |
| } |
| |
| // Strip off trailing whitespace. |
| int last = str_length - 1; |
| while (last >= 0 && ascii_isspace(str->at(last))) { |
| --last; |
| } |
| if (last != (str_length - 1) && last >= 0) { |
| str->erase(last + 1, string::npos); |
| } |
| } |
| |
| // ---------------------------------------------------------------------- |
| // StringReplace() |
| // Replace the "old" pattern with the "new" pattern in a string, |
| // and append the result to "res". If replace_all is false, |
| // it only replaces the first instance of "old." |
| // ---------------------------------------------------------------------- |
| |
| void StringReplace(const string& s, const string& oldsub, |
| const string& newsub, bool replace_all, |
| string* res) { |
| if (oldsub.empty()) { |
| res->append(s); // if empty, append the given string. |
| return; |
| } |
| |
| string::size_type start_pos = 0; |
| string::size_type pos; |
| do { |
| pos = s.find(oldsub, start_pos); |
| if (pos == string::npos) { |
| break; |
| } |
| res->append(s, start_pos, pos - start_pos); |
| res->append(newsub); |
| start_pos = pos + oldsub.size(); // start searching again after the "old" |
| } while (replace_all); |
| res->append(s, start_pos, s.length() - start_pos); |
| } |
| |
| // ---------------------------------------------------------------------- |
| // StringReplace() |
| // Give me a string and two patterns "old" and "new", and I replace |
| // the first instance of "old" in the string with "new", if it |
| // exists. If "global" is true; call this repeatedly until it |
| // fails. RETURN a new string, regardless of whether the replacement |
| // happened or not. |
| // ---------------------------------------------------------------------- |
| |
| string StringReplace(const string& s, const string& oldsub, |
| const string& newsub, bool replace_all) { |
| string ret; |
| StringReplace(s, oldsub, newsub, replace_all, &ret); |
| return ret; |
| } |
| |
| // ---------------------------------------------------------------------- |
| // SplitStringUsing() |
| // Split a string using a character delimiter. Append the components |
| // to 'result'. |
| // |
| // Note: For multi-character delimiters, this routine will split on *ANY* of |
| // the characters in the string, not the entire string as a single delimiter. |
| // ---------------------------------------------------------------------- |
| template <typename ITR> |
| static inline |
| void SplitStringToIteratorUsing(const string& full, |
| const char* delim, |
| ITR& result) { |
| // Optimize the common case where delim is a single character. |
| if (delim[0] != '\0' && delim[1] == '\0') { |
| char c = delim[0]; |
| const char* p = full.data(); |
| const char* end = p + full.size(); |
| while (p != end) { |
| if (*p == c) { |
| ++p; |
| } else { |
| const char* start = p; |
| while (++p != end && *p != c); |
| *result++ = string(start, p - start); |
| } |
| } |
| return; |
| } |
| |
| string::size_type begin_index, end_index; |
| begin_index = full.find_first_not_of(delim); |
| while (begin_index != string::npos) { |
| end_index = full.find_first_of(delim, begin_index); |
| if (end_index == string::npos) { |
| *result++ = full.substr(begin_index); |
| return; |
| } |
| *result++ = full.substr(begin_index, (end_index - begin_index)); |
| begin_index = full.find_first_not_of(delim, end_index); |
| } |
| } |
| |
| void SplitStringUsing(const string& full, |
| const char* delim, |
| vector<string>* result) { |
| back_insert_iterator< vector<string> > it(*result); |
| SplitStringToIteratorUsing(full, delim, it); |
| } |
| |
| // Split a string using a character delimiter. Append the components |
| // to 'result'. If there are consecutive delimiters, this function |
| // will return corresponding empty strings. The string is split into |
| // at most the specified number of pieces greedily. This means that the |
| // last piece may possibly be split further. To split into as many pieces |
| // as possible, specify 0 as the number of pieces. |
| // |
| // If "full" is the empty string, yields an empty string as the only value. |
| // |
| // If "pieces" is negative for some reason, it returns the whole string |
| // ---------------------------------------------------------------------- |
| template <typename StringType, typename ITR> |
| static inline |
| void SplitStringToIteratorAllowEmpty(const StringType& full, |
| const char* delim, |
| int pieces, |
| ITR& result) { |
| string::size_type begin_index, end_index; |
| begin_index = 0; |
| |
| for (int i = 0; (i < pieces-1) || (pieces == 0); i++) { |
| end_index = full.find_first_of(delim, begin_index); |
| if (end_index == string::npos) { |
| *result++ = full.substr(begin_index); |
| return; |
| } |
| *result++ = full.substr(begin_index, (end_index - begin_index)); |
| begin_index = end_index + 1; |
| } |
| *result++ = full.substr(begin_index); |
| } |
| |
| void SplitStringAllowEmpty(const string& full, const char* delim, |
| vector<string>* result) { |
| back_insert_iterator<vector<string> > it(*result); |
| SplitStringToIteratorAllowEmpty(full, delim, 0, it); |
| } |
| |
| // ---------------------------------------------------------------------- |
| // JoinStrings() |
| // This merges a vector of string components with delim inserted |
| // as separaters between components. |
| // |
| // ---------------------------------------------------------------------- |
| template <class ITERATOR> |
| static void JoinStringsIterator(const ITERATOR& start, |
| const ITERATOR& end, |
| const char* delim, |
| string* result) { |
| GOOGLE_CHECK(result != NULL); |
| result->clear(); |
| int delim_length = strlen(delim); |
| |
| // Precompute resulting length so we can reserve() memory in one shot. |
| int length = 0; |
| for (ITERATOR iter = start; iter != end; ++iter) { |
| if (iter != start) { |
| length += delim_length; |
| } |
| length += iter->size(); |
| } |
| result->reserve(length); |
| |
| // Now combine everything. |
| for (ITERATOR iter = start; iter != end; ++iter) { |
| if (iter != start) { |
| result->append(delim, delim_length); |
| } |
| result->append(iter->data(), iter->size()); |
| } |
| } |
| |
| void JoinStrings(const vector<string>& components, |
| const char* delim, |
| string * result) { |
| JoinStringsIterator(components.begin(), components.end(), delim, result); |
| } |
| |
| // ---------------------------------------------------------------------- |
| // UnescapeCEscapeSequences() |
| // This does all the unescaping that C does: \ooo, \r, \n, etc |
| // Returns length of resulting string. |
| // The implementation of \x parses any positive number of hex digits, |
| // but it is an error if the value requires more than 8 bits, and the |
| // result is truncated to 8 bits. |
| // |
| // The second call stores its errors in a supplied string vector. |
| // If the string vector pointer is NULL, it reports the errors with LOG(). |
| // ---------------------------------------------------------------------- |
| |
| #define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7')) |
| |
| inline int hex_digit_to_int(char c) { |
| /* Assume ASCII. */ |
| assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61); |
| assert(isxdigit(c)); |
| int x = static_cast<unsigned char>(c); |
| if (x > '9') { |
| x += 9; |
| } |
| return x & 0xf; |
| } |
| |
| // Protocol buffers doesn't ever care about errors, but I don't want to remove |
| // the code. |
| #define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false) |
| |
| int UnescapeCEscapeSequences(const char* source, char* dest) { |
| return UnescapeCEscapeSequences(source, dest, NULL); |
| } |
| |
| int UnescapeCEscapeSequences(const char* source, char* dest, |
| vector<string> *errors) { |
| GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented."; |
| |
| char* d = dest; |
| const char* p = source; |
| |
| // Small optimization for case where source = dest and there's no escaping |
| while ( p == d && *p != '\0' && *p != '\\' ) |
| p++, d++; |
| |
| while (*p != '\0') { |
| if (*p != '\\') { |
| *d++ = *p++; |
| } else { |
| switch ( *++p ) { // skip past the '\\' |
| case '\0': |
| LOG_STRING(ERROR, errors) << "String cannot end with \\"; |
| *d = '\0'; |
| return d - dest; // we're done with p |
| case 'a': *d++ = '\a'; break; |
| case 'b': *d++ = '\b'; break; |
| case 'f': *d++ = '\f'; break; |
| case 'n': *d++ = '\n'; break; |
| case 'r': *d++ = '\r'; break; |
| case 't': *d++ = '\t'; break; |
| case 'v': *d++ = '\v'; break; |
| case '\\': *d++ = '\\'; break; |
| case '?': *d++ = '\?'; break; // \? Who knew? |
| case '\'': *d++ = '\''; break; |
| case '"': *d++ = '\"'; break; |
| case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits |
| case '4': case '5': case '6': case '7': { |
| char ch = *p - '0'; |
| if ( IS_OCTAL_DIGIT(p[1]) ) |
| ch = ch * 8 + *++p - '0'; |
| if ( IS_OCTAL_DIGIT(p[1]) ) // safe (and easy) to do this twice |
| ch = ch * 8 + *++p - '0'; // now points at last digit |
| *d++ = ch; |
| break; |
| } |
| case 'x': case 'X': { |
| if (!isxdigit(p[1])) { |
| if (p[1] == '\0') { |
| LOG_STRING(ERROR, errors) << "String cannot end with \\x"; |
| } else { |
| LOG_STRING(ERROR, errors) << |
| "\\x cannot be followed by non-hex digit: \\" << *p << p[1]; |
| } |
| break; |
| } |
| unsigned int ch = 0; |
| const char *hex_start = p; |
| while (isxdigit(p[1])) // arbitrarily many hex digits |
| ch = (ch << 4) + hex_digit_to_int(*++p); |
| if (ch > 0xFF) |
| LOG_STRING(ERROR, errors) << "Value of " << |
| "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits"; |
| *d++ = ch; |
| break; |
| } |
| #if 0 // TODO(kenton): Support \u and \U? Requires runetochar(). |
| case 'u': { |
| // \uhhhh => convert 4 hex digits to UTF-8 |
| char32 rune = 0; |
| const char *hex_start = p; |
| for (int i = 0; i < 4; ++i) { |
| if (isxdigit(p[1])) { // Look one char ahead. |
| rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. |
| } else { |
| LOG_STRING(ERROR, errors) |
| << "\\u must be followed by 4 hex digits: \\" |
| << string(hex_start, p+1-hex_start); |
| break; |
| } |
| } |
| d += runetochar(d, &rune); |
| break; |
| } |
| case 'U': { |
| // \Uhhhhhhhh => convert 8 hex digits to UTF-8 |
| char32 rune = 0; |
| const char *hex_start = p; |
| for (int i = 0; i < 8; ++i) { |
| if (isxdigit(p[1])) { // Look one char ahead. |
| // Don't change rune until we're sure this |
| // is within the Unicode limit, but do advance p. |
| char32 newrune = (rune << 4) + hex_digit_to_int(*++p); |
| if (newrune > 0x10FFFF) { |
| LOG_STRING(ERROR, errors) |
| << "Value of \\" |
| << string(hex_start, p + 1 - hex_start) |
| << " exceeds Unicode limit (0x10FFFF)"; |
| break; |
| } else { |
| rune = newrune; |
| } |
| } else { |
| LOG_STRING(ERROR, errors) |
| << "\\U must be followed by 8 hex digits: \\" |
| << string(hex_start, p+1-hex_start); |
| break; |
| } |
| } |
| d += runetochar(d, &rune); |
| break; |
| } |
| #endif |
| default: |
| LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p; |
| } |
| p++; // read past letter we escaped |
| } |
| } |
| *d = '\0'; |
| return d - dest; |
| } |
| |
| // ---------------------------------------------------------------------- |
| // UnescapeCEscapeString() |
| // This does the same thing as UnescapeCEscapeSequences, but creates |
| // a new string. The caller does not need to worry about allocating |
| // a dest buffer. This should be used for non performance critical |
| // tasks such as printing debug messages. It is safe for src and dest |
| // to be the same. |
| // |
| // The second call stores its errors in a supplied string vector. |
| // If the string vector pointer is NULL, it reports the errors with LOG(). |
| // |
| // In the first and second calls, the length of dest is returned. In the |
| // the third call, the new string is returned. |
| // ---------------------------------------------------------------------- |
| int UnescapeCEscapeString(const string& src, string* dest) { |
| return UnescapeCEscapeString(src, dest, NULL); |
| } |
| |
| int UnescapeCEscapeString(const string& src, string* dest, |
| vector<string> *errors) { |
| scoped_array<char> unescaped(new char[src.size() + 1]); |
| int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors); |
| GOOGLE_CHECK(dest); |
| dest->assign(unescaped.get(), len); |
| return len; |
| } |
| |
| string UnescapeCEscapeString(const string& src) { |
| scoped_array<char> unescaped(new char[src.size() + 1]); |
| int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL); |
| return string(unescaped.get(), len); |
| } |
| |
| // ---------------------------------------------------------------------- |
| // CEscapeString() |
| // CHexEscapeString() |
| // Copies 'src' to 'dest', escaping dangerous characters using |
| // C-style escape sequences. This is very useful for preparing query |
| // flags. 'src' and 'dest' should not overlap. The 'Hex' version uses |
| // hexadecimal rather than octal sequences. |
| // Returns the number of bytes written to 'dest' (not including the \0) |
| // or -1 if there was insufficient space. |
| // |
| // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. |
| // ---------------------------------------------------------------------- |
| int CEscapeInternal(const char* src, int src_len, char* dest, |
| int dest_len, bool use_hex, bool utf8_safe) { |
| const char* src_end = src + src_len; |
| int used = 0; |
| bool last_hex_escape = false; // true if last output char was \xNN |
| |
| for (; src < src_end; src++) { |
| if (dest_len - used < 2) // Need space for two letter escape |
| return -1; |
| |
| bool is_hex_escape = false; |
| switch (*src) { |
| case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break; |
| case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break; |
| case '\t': dest[used++] = '\\'; dest[used++] = 't'; break; |
| case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break; |
| case '\'': dest[used++] = '\\'; dest[used++] = '\''; break; |
| case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break; |
| default: |
| // Note that if we emit \xNN and the src character after that is a hex |
| // digit then that digit must be escaped too to prevent it being |
| // interpreted as part of the character code by C. |
| if ((!utf8_safe || static_cast<uint8>(*src) < 0x80) && |
| (!isprint(*src) || |
| (last_hex_escape && isxdigit(*src)))) { |
| if (dest_len - used < 4) // need space for 4 letter escape |
| return -1; |
| sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"), |
| static_cast<uint8>(*src)); |
| is_hex_escape = use_hex; |
| used += 4; |
| } else { |
| dest[used++] = *src; break; |
| } |
| } |
| last_hex_escape = is_hex_escape; |
| } |
| |
| if (dest_len - used < 1) // make sure that there is room for \0 |
| return -1; |
| |
| dest[used] = '\0'; // doesn't count towards return value though |
| return used; |
| } |
| |
| int CEscapeString(const char* src, int src_len, char* dest, int dest_len) { |
| return CEscapeInternal(src, src_len, dest, dest_len, false, false); |
| } |
| |
| // ---------------------------------------------------------------------- |
| // CEscape() |
| // CHexEscape() |
| // Copies 'src' to result, escaping dangerous characters using |
| // C-style escape sequences. This is very useful for preparing query |
| // flags. 'src' and 'dest' should not overlap. The 'Hex' version |
| // hexadecimal rather than octal sequences. |
| // |
| // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. |
| // ---------------------------------------------------------------------- |
| string CEscape(const string& src) { |
| const int dest_length = src.size() * 4 + 1; // Maximum possible expansion |
| scoped_array<char> dest(new char[dest_length]); |
| const int len = CEscapeInternal(src.data(), src.size(), |
| dest.get(), dest_length, false, false); |
| GOOGLE_DCHECK_GE(len, 0); |
| return string(dest.get(), len); |
| } |
| |
| namespace strings { |
| |
| string Utf8SafeCEscape(const string& src) { |
| const int dest_length = src.size() * 4 + 1; // Maximum possible expansion |
| scoped_array<char> dest(new char[dest_length]); |
| const int len = CEscapeInternal(src.data(), src.size(), |
| dest.get(), dest_length, false, true); |
| GOOGLE_DCHECK_GE(len, 0); |
| return string(dest.get(), len); |
| } |
| |
| string CHexEscape(const string& src) { |
| const int dest_length = src.size() * 4 + 1; // Maximum possible expansion |
| scoped_array<char> dest(new char[dest_length]); |
| const int len = CEscapeInternal(src.data(), src.size(), |
| dest.get(), dest_length, true, false); |
| GOOGLE_DCHECK_GE(len, 0); |
| return string(dest.get(), len); |
| } |
| |
| } // namespace strings |
| |
| // ---------------------------------------------------------------------- |
| // strto32_adaptor() |
| // strtou32_adaptor() |
| // Implementation of strto[u]l replacements that have identical |
| // overflow and underflow characteristics for both ILP-32 and LP-64 |
| // platforms, including errno preservation in error-free calls. |
| // ---------------------------------------------------------------------- |
| |
| int32 strto32_adaptor(const char *nptr, char **endptr, int base) { |
| const int saved_errno = errno; |
| errno = 0; |
| const long result = strtol(nptr, endptr, base); |
| if (errno == ERANGE && result == LONG_MIN) { |
| return kint32min; |
| } else if (errno == ERANGE && result == LONG_MAX) { |
| return kint32max; |
| } else if (errno == 0 && result < kint32min) { |
| errno = ERANGE; |
| return kint32min; |
| } else if (errno == 0 && result > kint32max) { |
| errno = ERANGE; |
| return kint32max; |
| } |
| if (errno == 0) |
| errno = saved_errno; |
| return static_cast<int32>(result); |
| } |
| |
| uint32 strtou32_adaptor(const char *nptr, char **endptr, int base) { |
| const int saved_errno = errno; |
| errno = 0; |
| const unsigned long result = strtoul(nptr, endptr, base); |
| if (errno == ERANGE && result == ULONG_MAX) { |
| return kuint32max; |
| } else if (errno == 0 && result > kuint32max) { |
| errno = ERANGE; |
| return kuint32max; |
| } |
| if (errno == 0) |
| errno = saved_errno; |
| return static_cast<uint32>(result); |
| } |
| |
| inline bool safe_parse_sign(string* text /*inout*/, |
| bool* negative_ptr /*output*/) { |
| const char* start = text->data(); |
| const char* end = start + text->size(); |
| |
| // Consume whitespace. |
| while (start < end && (start[0] == ' ')) { |
| ++start; |
| } |
| while (start < end && (end[-1] == ' ')) { |
| --end; |
| } |
| if (start >= end) { |
| return false; |
| } |
| |
| // Consume sign. |
| *negative_ptr = (start[0] == '-'); |
| if (*negative_ptr || start[0] == '+') { |
| ++start; |
| if (start >= end) { |
| return false; |
| } |
| } |
| *text = text->substr(start - text->data(), end - start); |
| return true; |
| } |
| |
| inline bool safe_parse_positive_int( |
| string text, int32* value_p) { |
| int base = 10; |
| int32 value = 0; |
| const int32 vmax = std::numeric_limits<int32>::max(); |
| assert(vmax > 0); |
| assert(vmax >= base); |
| const int32 vmax_over_base = vmax / base; |
| const char* start = text.data(); |
| const char* end = start + text.size(); |
| // loop over digits |
| for (; start < end; ++start) { |
| unsigned char c = static_cast<unsigned char>(start[0]); |
| int digit = c - '0'; |
| if (digit >= base || digit < 0) { |
| *value_p = value; |
| return false; |
| } |
| if (value > vmax_over_base) { |
| *value_p = vmax; |
| return false; |
| } |
| value *= base; |
| if (value > vmax - digit) { |
| *value_p = vmax; |
| return false; |
| } |
| value += digit; |
| } |
| *value_p = value; |
| return true; |
| } |
| |
| inline bool safe_parse_negative_int( |
| string text, int32* value_p) { |
| int base = 10; |
| int32 value = 0; |
| const int32 vmin = std::numeric_limits<int32>::min(); |
| assert(vmin < 0); |
| assert(vmin <= 0 - base); |
| int32 vmin_over_base = vmin / base; |
| // 2003 c++ standard [expr.mul] |
| // "... the sign of the remainder is implementation-defined." |
| // Although (vmin/base)*base + vmin%base is always vmin. |
| // 2011 c++ standard tightens the spec but we cannot rely on it. |
| if (vmin % base > 0) { |
| vmin_over_base += 1; |
| } |
| const char* start = text.data(); |
| const char* end = start + text.size(); |
| // loop over digits |
| for (; start < end; ++start) { |
| unsigned char c = static_cast<unsigned char>(start[0]); |
| int digit = c - '0'; |
| if (digit >= base || digit < 0) { |
| *value_p = value; |
| return false; |
| } |
| if (value < vmin_over_base) { |
| *value_p = vmin; |
| return false; |
| } |
| value *= base; |
| if (value < vmin + digit) { |
| *value_p = vmin; |
| return false; |
| } |
| value -= digit; |
| } |
| *value_p = value; |
| return true; |
| } |
| |
| bool safe_int(string text, int32* value_p) { |
| *value_p = 0; |
| bool negative; |
| if (!safe_parse_sign(&text, &negative)) { |
| return false; |
| } |
| if (!negative) { |
| return safe_parse_positive_int(text, value_p); |
| } else { |
| return safe_parse_negative_int(text, value_p); |
| } |
| } |
| |
| // ---------------------------------------------------------------------- |
| // FastIntToBuffer() |
| // FastInt64ToBuffer() |
| // FastHexToBuffer() |
| // FastHex64ToBuffer() |
| // FastHex32ToBuffer() |
| // ---------------------------------------------------------------------- |
| |
| // Offset into buffer where FastInt64ToBuffer places the end of string |
| // null character. Also used by FastInt64ToBufferLeft. |
| static const int kFastInt64ToBufferOffset = 21; |
| |
| char *FastInt64ToBuffer(int64 i, char* buffer) { |
| // We could collapse the positive and negative sections, but that |
| // would be slightly slower for positive numbers... |
| // 22 bytes is enough to store -2**64, -18446744073709551616. |
| char* p = buffer + kFastInt64ToBufferOffset; |
| *p-- = '\0'; |
| if (i >= 0) { |
| do { |
| *p-- = '0' + i % 10; |
| i /= 10; |
| } while (i > 0); |
| return p + 1; |
| } else { |
| // On different platforms, % and / have different behaviors for |
| // negative numbers, so we need to jump through hoops to make sure |
| // we don't divide negative numbers. |
| if (i > -10) { |
| i = -i; |
| *p-- = '0' + i; |
| *p = '-'; |
| return p; |
| } else { |
| // Make sure we aren't at MIN_INT, in which case we can't say i = -i |
| i = i + 10; |
| i = -i; |
| *p-- = '0' + i % 10; |
| // Undo what we did a moment ago |
| i = i / 10 + 1; |
| do { |
| *p-- = '0' + i % 10; |
| i /= 10; |
| } while (i > 0); |
| *p = '-'; |
| return p; |
| } |
| } |
| } |
| |
| // Offset into buffer where FastInt32ToBuffer places the end of string |
| // null character. Also used by FastInt32ToBufferLeft |
| static const int kFastInt32ToBufferOffset = 11; |
| |
| // Yes, this is a duplicate of FastInt64ToBuffer. But, we need this for the |
| // compiler to generate 32 bit arithmetic instructions. It's much faster, at |
| // least with 32 bit binaries. |
| char *FastInt32ToBuffer(int32 i, char* buffer) { |
| // We could collapse the positive and negative sections, but that |
| // would be slightly slower for positive numbers... |
| // 12 bytes is enough to store -2**32, -4294967296. |
| char* p = buffer + kFastInt32ToBufferOffset; |
| *p-- = '\0'; |
| if (i >= 0) { |
| do { |
| *p-- = '0' + i % 10; |
| i /= 10; |
| } while (i > 0); |
| return p + 1; |
| } else { |
| // On different platforms, % and / have different behaviors for |
| // negative numbers, so we need to jump through hoops to make sure |
| // we don't divide negative numbers. |
| if (i > -10) { |
| i = -i; |
| *p-- = '0' + i; |
| *p = '-'; |
| return p; |
| } else { |
| // Make sure we aren't at MIN_INT, in which case we can't say i = -i |
| i = i + 10; |
| i = -i; |
| *p-- = '0' + i % 10; |
| // Undo what we did a moment ago |
| i = i / 10 + 1; |
| do { |
| *p-- = '0' + i % 10; |
| i /= 10; |
| } while (i > 0); |
| *p = '-'; |
| return p; |
| } |
| } |
| } |
| |
| char *FastHexToBuffer(int i, char* buffer) { |
| GOOGLE_CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i; |
| |
| static const char *hexdigits = "0123456789abcdef"; |
| char *p = buffer + 21; |
| *p-- = '\0'; |
| do { |
| *p-- = hexdigits[i & 15]; // mod by 16 |
| i >>= 4; // divide by 16 |
| } while (i > 0); |
| return p + 1; |
| } |
| |
| char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) { |
| static const char *hexdigits = "0123456789abcdef"; |
| buffer[num_byte] = '\0'; |
| for (int i = num_byte - 1; i >= 0; i--) { |
| #ifdef _M_X64 |
| // MSVC x64 platform has a bug optimizing the uint32(value) in the #else |
| // block. Given that the uint32 cast was to improve performance on 32-bit |
| // platforms, we use 64-bit '&' directly. |
| buffer[i] = hexdigits[value & 0xf]; |
| #else |
| buffer[i] = hexdigits[uint32(value) & 0xf]; |
| #endif |
| value >>= 4; |
| } |
| return buffer; |
| } |
| |
| char *FastHex64ToBuffer(uint64 value, char* buffer) { |
| return InternalFastHexToBuffer(value, buffer, 16); |
| } |
| |
| char *FastHex32ToBuffer(uint32 value, char* buffer) { |
| return InternalFastHexToBuffer(value, buffer, 8); |
| } |
| |
| static inline char* PlaceNum(char* p, int num, char prev_sep) { |
| *p-- = '0' + num % 10; |
| *p-- = '0' + num / 10; |
| *p-- = prev_sep; |
| return p; |
| } |
| |
| // ---------------------------------------------------------------------- |
| // FastInt32ToBufferLeft() |
| // FastUInt32ToBufferLeft() |
| // FastInt64ToBufferLeft() |
| // FastUInt64ToBufferLeft() |
| // |
| // Like the Fast*ToBuffer() functions above, these are intended for speed. |
| // Unlike the Fast*ToBuffer() functions, however, these functions write |
| // their output to the beginning of the buffer (hence the name, as the |
| // output is left-aligned). The caller is responsible for ensuring that |
| // the buffer has enough space to hold the output. |
| // |
| // Returns a pointer to the end of the string (i.e. the null character |
| // terminating the string). |
| // ---------------------------------------------------------------------- |
| |
| static const char two_ASCII_digits[100][2] = { |
| {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'}, |
| {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'}, |
| {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'}, |
| {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'}, |
| {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'}, |
| {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'}, |
| {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'}, |
| {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'}, |
| {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'}, |
| {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'}, |
| {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'}, |
| {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'}, |
| {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'}, |
| {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'}, |
| {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'}, |
| {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'}, |
| {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'}, |
| {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'}, |
| {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'}, |
| {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'} |
| }; |
| |
| char* FastUInt32ToBufferLeft(uint32 u, char* buffer) { |
| int digits; |
| const char *ASCII_digits = NULL; |
| // The idea of this implementation is to trim the number of divides to as few |
| // as possible by using multiplication and subtraction rather than mod (%), |
| // and by outputting two digits at a time rather than one. |
| // The huge-number case is first, in the hopes that the compiler will output |
| // that case in one branch-free block of code, and only output conditional |
| // branches into it from below. |
| if (u >= 1000000000) { // >= 1,000,000,000 |
| digits = u / 100000000; // 100,000,000 |
| ASCII_digits = two_ASCII_digits[digits]; |
| buffer[0] = ASCII_digits[0]; |
| buffer[1] = ASCII_digits[1]; |
| buffer += 2; |
| sublt100_000_000: |
| u -= digits * 100000000; // 100,000,000 |
| lt100_000_000: |
| digits = u / 1000000; // 1,000,000 |
| ASCII_digits = two_ASCII_digits[digits]; |
| buffer[0] = ASCII_digits[0]; |
| buffer[1] = ASCII_digits[1]; |
| buffer += 2; |
| sublt1_000_000: |
| u -= digits * 1000000; // 1,000,000 |
| lt1_000_000: |
| digits = u / 10000; // 10,000 |
| ASCII_digits = two_ASCII_digits[digits]; |
| buffer[0] = ASCII_digits[0]; |
| buffer[1] = ASCII_digits[1]; |
| buffer += 2; |
| sublt10_000: |
| u -= digits * 10000; // 10,000 |
| lt10_000: |
| digits = u / 100; |
| ASCII_digits = two_ASCII_digits[digits]; |
| buffer[0] = ASCII_digits[0]; |
| buffer[1] = ASCII_digits[1]; |
| buffer += 2; |
| sublt100: |
| u -= digits * 100; |
| lt100: |
| digits = u; |
| ASCII_digits = two_ASCII_digits[digits]; |
| buffer[0] = ASCII_digits[0]; |
| buffer[1] = ASCII_digits[1]; |
| buffer += 2; |
| done: |
| *buffer = 0; |
| return buffer; |
| } |
| |
| if (u < 100) { |
| digits = u; |
| if (u >= 10) goto lt100; |
| *buffer++ = '0' + digits; |
| goto done; |
| } |
| if (u < 10000) { // 10,000 |
| if (u >= 1000) goto lt10_000; |
| digits = u / 100; |
| *buffer++ = '0' + digits; |
| goto sublt100; |
| } |
| if (u < 1000000) { // 1,000,000 |
| if (u >= 100000) goto lt1_000_000; |
| digits = u / 10000; // 10,000 |
| *buffer++ = '0' + digits; |
| goto sublt10_000; |
| } |
| if (u < 100000000) { // 100,000,000 |
| if (u >= 10000000) goto lt100_000_000; |
| digits = u / 1000000; // 1,000,000 |
| *buffer++ = '0' + digits; |
| goto sublt1_000_000; |
| } |
| // we already know that u < 1,000,000,000 |
| digits = u / 100000000; // 100,000,000 |
| *buffer++ = '0' + digits; |
| goto sublt100_000_000; |
| } |
| |
| char* FastInt32ToBufferLeft(int32 i, char* buffer) { |
| uint32 u = i; |
| if (i < 0) { |
| *buffer++ = '-'; |
| u = -i; |
| } |
| return FastUInt32ToBufferLeft(u, buffer); |
| } |
| |
| char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) { |
| int digits; |
| const char *ASCII_digits = NULL; |
| |
| uint32 u = static_cast<uint32>(u64); |
| if (u == u64) return FastUInt32ToBufferLeft(u, buffer); |
| |
| uint64 top_11_digits = u64 / 1000000000; |
| buffer = FastUInt64ToBufferLeft(top_11_digits, buffer); |
| u = u64 - (top_11_digits * 1000000000); |
| |
| digits = u / 10000000; // 10,000,000 |
| GOOGLE_DCHECK_LT(digits, 100); |
| ASCII_digits = two_ASCII_digits[digits]; |
| buffer[0] = ASCII_digits[0]; |
| buffer[1] = ASCII_digits[1]; |
| buffer += 2; |
| u -= digits * 10000000; // 10,000,000 |
| digits = u / 100000; // 100,000 |
| ASCII_digits = two_ASCII_digits[digits]; |
| buffer[0] = ASCII_digits[0]; |
| buffer[1] = ASCII_digits[1]; |
| buffer += 2; |
| u -= digits * 100000; // 100,000 |
| digits = u / 1000; // 1,000 |
| ASCII_digits = two_ASCII_digits[digits]; |
| buffer[0] = ASCII_digits[0]; |
| buffer[1] = ASCII_digits[1]; |
| buffer += 2; |
| u -= digits * 1000; // 1,000 |
| digits = u / 10; |
| ASCII_digits = two_ASCII_digits[digits]; |
| buffer[0] = ASCII_digits[0]; |
| buffer[1] = ASCII_digits[1]; |
| buffer += 2; |
| u -= digits * 10; |
| digits = u; |
| *buffer++ = '0' + digits; |
| *buffer = 0; |
| return buffer; |
| } |
| |
| char* FastInt64ToBufferLeft(int64 i, char* buffer) { |
| uint64 u = i; |
| if (i < 0) { |
| *buffer++ = '-'; |
| u = -i; |
| } |
| return FastUInt64ToBufferLeft(u, buffer); |
| } |
| |
| // ---------------------------------------------------------------------- |
| // SimpleItoa() |
| // Description: converts an integer to a string. |
| // |
| // Return value: string |
| // ---------------------------------------------------------------------- |
| |
| string SimpleItoa(int i) { |
| char buffer[kFastToBufferSize]; |
| return (sizeof(i) == 4) ? |
| FastInt32ToBuffer(i, buffer) : |
| FastInt64ToBuffer(i, buffer); |
| } |
| |
| string SimpleItoa(unsigned int i) { |
| char buffer[kFastToBufferSize]; |
| return string(buffer, (sizeof(i) == 4) ? |
| FastUInt32ToBufferLeft(i, buffer) : |
| FastUInt64ToBufferLeft(i, buffer)); |
| } |
| |
| string SimpleItoa(long i) { |
| char buffer[kFastToBufferSize]; |
| return (sizeof(i) == 4) ? |
| FastInt32ToBuffer(i, buffer) : |
| FastInt64ToBuffer(i, buffer); |
| } |
| |
| string SimpleItoa(unsigned long i) { |
| char buffer[kFastToBufferSize]; |
| return string(buffer, (sizeof(i) == 4) ? |
| FastUInt32ToBufferLeft(i, buffer) : |
| FastUInt64ToBufferLeft(i, buffer)); |
| } |
| |
| string SimpleItoa(long long i) { |
| char buffer[kFastToBufferSize]; |
| return (sizeof(i) == 4) ? |
| FastInt32ToBuffer(i, buffer) : |
| FastInt64ToBuffer(i, buffer); |
| } |
| |
| string SimpleItoa(unsigned long long i) { |
| char buffer[kFastToBufferSize]; |
| return string(buffer, (sizeof(i) == 4) ? |
| FastUInt32ToBufferLeft(i, buffer) : |
| FastUInt64ToBufferLeft(i, buffer)); |
| } |
| |
| // ---------------------------------------------------------------------- |
| // SimpleDtoa() |
| // SimpleFtoa() |
| // DoubleToBuffer() |
| // FloatToBuffer() |
| // We want to print the value without losing precision, but we also do |
| // not want to print more digits than necessary. This turns out to be |
| // trickier than it sounds. Numbers like 0.2 cannot be represented |
| // exactly in binary. If we print 0.2 with a very large precision, |
| // e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167". |
| // On the other hand, if we set the precision too low, we lose |
| // significant digits when printing numbers that actually need them. |
| // It turns out there is no precision value that does the right thing |
| // for all numbers. |
| // |
| // Our strategy is to first try printing with a precision that is never |
| // over-precise, then parse the result with strtod() to see if it |
| // matches. If not, we print again with a precision that will always |
| // give a precise result, but may use more digits than necessary. |
| // |
| // An arguably better strategy would be to use the algorithm described |
| // in "How to Print Floating-Point Numbers Accurately" by Steele & |
| // White, e.g. as implemented by David M. Gay's dtoa(). It turns out, |
| // however, that the following implementation is about as fast as |
| // DMG's code. Furthermore, DMG's code locks mutexes, which means it |
| // will not scale well on multi-core machines. DMG's code is slightly |
| // more accurate (in that it will never use more digits than |
| // necessary), but this is probably irrelevant for most users. |
| // |
| // Rob Pike and Ken Thompson also have an implementation of dtoa() in |
| // third_party/fmt/fltfmt.cc. Their implementation is similar to this |
| // one in that it makes guesses and then uses strtod() to check them. |
| // Their implementation is faster because they use their own code to |
| // generate the digits in the first place rather than use snprintf(), |
| // thus avoiding format string parsing overhead. However, this makes |
| // it considerably more complicated than the following implementation, |
| // and it is embedded in a larger library. If speed turns out to be |
| // an issue, we could re-implement this in terms of their |
| // implementation. |
| // ---------------------------------------------------------------------- |
| |
| string SimpleDtoa(double value) { |
| char buffer[kDoubleToBufferSize]; |
| return DoubleToBuffer(value, buffer); |
| } |
| |
| string SimpleFtoa(float value) { |
| char buffer[kFloatToBufferSize]; |
| return FloatToBuffer(value, buffer); |
| } |
| |
| static inline bool IsValidFloatChar(char c) { |
| return ('0' <= c && c <= '9') || |
| c == 'e' || c == 'E' || |
| c == '+' || c == '-'; |
| } |
| |
| void DelocalizeRadix(char* buffer) { |
| // Fast check: if the buffer has a normal decimal point, assume no |
| // translation is needed. |
| if (strchr(buffer, '.') != NULL) return; |
| |
| // Find the first unknown character. |
| while (IsValidFloatChar(*buffer)) ++buffer; |
| |
| if (*buffer == '\0') { |
| // No radix character found. |
| return; |
| } |
| |
| // We are now pointing at the locale-specific radix character. Replace it |
| // with '.'. |
| *buffer = '.'; |
| ++buffer; |
| |
| if (!IsValidFloatChar(*buffer) && *buffer != '\0') { |
| // It appears the radix was a multi-byte character. We need to remove the |
| // extra bytes. |
| char* target = buffer; |
| do { ++buffer; } while (!IsValidFloatChar(*buffer) && *buffer != '\0'); |
| memmove(target, buffer, strlen(buffer) + 1); |
| } |
| } |
| |
| char* DoubleToBuffer(double value, char* buffer) { |
| // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all |
| // platforms these days. Just in case some system exists where DBL_DIG |
| // is significantly larger -- and risks overflowing our buffer -- we have |
| // this assert. |
| GOOGLE_COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); |
| |
| if (value == numeric_limits<double>::infinity()) { |
| strcpy(buffer, "inf"); |
| return buffer; |
| } else if (value == -numeric_limits<double>::infinity()) { |
| strcpy(buffer, "-inf"); |
| return buffer; |
| } else if (IsNaN(value)) { |
| strcpy(buffer, "nan"); |
| return buffer; |
| } |
| |
| int snprintf_result = |
| snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value); |
| |
| // The snprintf should never overflow because the buffer is significantly |
| // larger than the precision we asked for. |
| GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); |
| |
| // We need to make parsed_value volatile in order to force the compiler to |
| // write it out to the stack. Otherwise, it may keep the value in a |
| // register, and if it does that, it may keep it as a long double instead |
| // of a double. This long double may have extra bits that make it compare |
| // unequal to "value" even though it would be exactly equal if it were |
| // truncated to a double. |
| volatile double parsed_value = strtod(buffer, NULL); |
| if (parsed_value != value) { |
| int snprintf_result = |
| snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value); |
| |
| // Should never overflow; see above. |
| GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); |
| } |
| |
| DelocalizeRadix(buffer); |
| return buffer; |
| } |
| |
| bool safe_strtof(const char* str, float* value) { |
| char* endptr; |
| errno = 0; // errno only gets set on errors |
| #if defined(_WIN32) || defined (__hpux) // has no strtof() |
| *value = strtod(str, &endptr); |
| #else |
| *value = strtof(str, &endptr); |
| #endif |
| return *str != 0 && *endptr == 0 && errno == 0; |
| } |
| |
| char* FloatToBuffer(float value, char* buffer) { |
| // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all |
| // platforms these days. Just in case some system exists where FLT_DIG |
| // is significantly larger -- and risks overflowing our buffer -- we have |
| // this assert. |
| GOOGLE_COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); |
| |
| if (value == numeric_limits<double>::infinity()) { |
| strcpy(buffer, "inf"); |
| return buffer; |
| } else if (value == -numeric_limits<double>::infinity()) { |
| strcpy(buffer, "-inf"); |
| return buffer; |
| } else if (IsNaN(value)) { |
| strcpy(buffer, "nan"); |
| return buffer; |
| } |
| |
| int snprintf_result = |
| snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value); |
| |
| // The snprintf should never overflow because the buffer is significantly |
| // larger than the precision we asked for. |
| GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); |
| |
| float parsed_value; |
| if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { |
| int snprintf_result = |
| snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value); |
| |
| // Should never overflow; see above. |
| GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); |
| } |
| |
| DelocalizeRadix(buffer); |
| return buffer; |
| } |
| |
| string ToHex(uint64 num) { |
| if (num == 0) { |
| return string("0"); |
| } |
| |
| // Compute hex bytes in reverse order, writing to the back of the |
| // buffer. |
| char buf[16]; // No more than 16 hex digits needed. |
| char* bufptr = buf + 16; |
| static const char kHexChars[] = "0123456789abcdef"; |
| while (num != 0) { |
| *--bufptr = kHexChars[num & 0xf]; |
| num >>= 4; |
| } |
| |
| return string(bufptr, buf + 16 - bufptr); |
| } |
| |
| namespace strings { |
| |
| AlphaNum::AlphaNum(strings::Hex hex) { |
| char *const end = &digits[kFastToBufferSize]; |
| char *writer = end; |
| uint64 value = hex.value; |
| uint64 width = hex.spec; |
| // We accomplish minimum width by OR'ing in 0x10000 to the user's value, |
| // where 0x10000 is the smallest hex number that is as wide as the user |
| // asked for. |
| uint64 mask = ((static_cast<uint64>(1) << (width - 1) * 4)) | value; |
| static const char hexdigits[] = "0123456789abcdef"; |
| do { |
| *--writer = hexdigits[value & 0xF]; |
| value >>= 4; |
| mask >>= 4; |
| } while (mask != 0); |
| piece_data_ = writer; |
| piece_size_ = end - writer; |
| } |
| |
| } // namespace strings |
| |
| // ---------------------------------------------------------------------- |
| // StrCat() |
| // This merges the given strings or integers, with no delimiter. This |
| // is designed to be the fastest possible way to construct a string out |
| // of a mix of raw C strings, C++ strings, and integer values. |
| // ---------------------------------------------------------------------- |
| |
| // Append is merely a version of memcpy that returns the address of the byte |
| // after the area just overwritten. It comes in multiple flavors to minimize |
| // call overhead. |
| static char *Append1(char *out, const AlphaNum &x) { |
| memcpy(out, x.data(), x.size()); |
| return out + x.size(); |
| } |
| |
| static char *Append2(char *out, const AlphaNum &x1, const AlphaNum &x2) { |
| memcpy(out, x1.data(), x1.size()); |
| out += x1.size(); |
| |
| memcpy(out, x2.data(), x2.size()); |
| return out + x2.size(); |
| } |
| |
| static char *Append4(char *out, |
| const AlphaNum &x1, const AlphaNum &x2, |
| const AlphaNum &x3, const AlphaNum &x4) { |
| memcpy(out, x1.data(), x1.size()); |
| out += x1.size(); |
| |
| memcpy(out, x2.data(), x2.size()); |
| out += x2.size(); |
| |
| memcpy(out, x3.data(), x3.size()); |
| out += x3.size(); |
| |
| memcpy(out, x4.data(), x4.size()); |
| return out + x4.size(); |
| } |
| |
| string StrCat(const AlphaNum &a, const AlphaNum &b) { |
| string result; |
| result.resize(a.size() + b.size()); |
| char *const begin = &*result.begin(); |
| char *out = Append2(begin, a, b); |
| GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| return result; |
| } |
| |
| string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) { |
| string result; |
| result.resize(a.size() + b.size() + c.size()); |
| char *const begin = &*result.begin(); |
| char *out = Append2(begin, a, b); |
| out = Append1(out, c); |
| GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| return result; |
| } |
| |
| string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| const AlphaNum &d) { |
| string result; |
| result.resize(a.size() + b.size() + c.size() + d.size()); |
| char *const begin = &*result.begin(); |
| char *out = Append4(begin, a, b, c, d); |
| GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| return result; |
| } |
| |
| string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| const AlphaNum &d, const AlphaNum &e) { |
| string result; |
| result.resize(a.size() + b.size() + c.size() + d.size() + e.size()); |
| char *const begin = &*result.begin(); |
| char *out = Append4(begin, a, b, c, d); |
| out = Append1(out, e); |
| GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| return result; |
| } |
| |
| string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) { |
| string result; |
| result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + |
| f.size()); |
| char *const begin = &*result.begin(); |
| char *out = Append4(begin, a, b, c, d); |
| out = Append2(out, e, f); |
| GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| return result; |
| } |
| |
| string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, |
| const AlphaNum &g) { |
| string result; |
| result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + |
| f.size() + g.size()); |
| char *const begin = &*result.begin(); |
| char *out = Append4(begin, a, b, c, d); |
| out = Append2(out, e, f); |
| out = Append1(out, g); |
| GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| return result; |
| } |
| |
| string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, |
| const AlphaNum &g, const AlphaNum &h) { |
| string result; |
| result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + |
| f.size() + g.size() + h.size()); |
| char *const begin = &*result.begin(); |
| char *out = Append4(begin, a, b, c, d); |
| out = Append4(out, e, f, g, h); |
| GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| return result; |
| } |
| |
| string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, |
| const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) { |
| string result; |
| result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + |
| f.size() + g.size() + h.size() + i.size()); |
| char *const begin = &*result.begin(); |
| char *out = Append4(begin, a, b, c, d); |
| out = Append4(out, e, f, g, h); |
| out = Append1(out, i); |
| GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| return result; |
| } |
| |
| // It's possible to call StrAppend with a char * pointer that is partway into |
| // the string we're appending to. However the results of this are random. |
| // Therefore, check for this in debug mode. Use unsigned math so we only have |
| // to do one comparison. |
| #define GOOGLE_DCHECK_NO_OVERLAP(dest, src) \ |
| GOOGLE_DCHECK_GT(uintptr_t((src).data() - (dest).data()), \ |
| uintptr_t((dest).size())) |
| |
| void StrAppend(string *result, const AlphaNum &a) { |
| GOOGLE_DCHECK_NO_OVERLAP(*result, a); |
| result->append(a.data(), a.size()); |
| } |
| |
| void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) { |
| GOOGLE_DCHECK_NO_OVERLAP(*result, a); |
| GOOGLE_DCHECK_NO_OVERLAP(*result, b); |
| string::size_type old_size = result->size(); |
| result->resize(old_size + a.size() + b.size()); |
| char *const begin = &*result->begin(); |
| char *out = Append2(begin + old_size, a, b); |
| GOOGLE_DCHECK_EQ(out, begin + result->size()); |
| } |
| |
| void StrAppend(string *result, |
| const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) { |
| GOOGLE_DCHECK_NO_OVERLAP(*result, a); |
| GOOGLE_DCHECK_NO_OVERLAP(*result, b); |
| GOOGLE_DCHECK_NO_OVERLAP(*result, c); |
| string::size_type old_size = result->size(); |
| result->resize(old_size + a.size() + b.size() + c.size()); |
| char *const begin = &*result->begin(); |
| char *out = Append2(begin + old_size, a, b); |
| out = Append1(out, c); |
| GOOGLE_DCHECK_EQ(out, begin + result->size()); |
| } |
| |
| void StrAppend(string *result, |
| const AlphaNum &a, const AlphaNum &b, |
| const AlphaNum &c, const AlphaNum &d) { |
| GOOGLE_DCHECK_NO_OVERLAP(*result, a); |
| GOOGLE_DCHECK_NO_OVERLAP(*result, b); |
| GOOGLE_DCHECK_NO_OVERLAP(*result, c); |
| GOOGLE_DCHECK_NO_OVERLAP(*result, d); |
| string::size_type old_size = result->size(); |
| result->resize(old_size + a.size() + b.size() + c.size() + d.size()); |
| char *const begin = &*result->begin(); |
| char *out = Append4(begin + old_size, a, b, c, d); |
| GOOGLE_DCHECK_EQ(out, begin + result->size()); |
| } |
| |
| int GlobalReplaceSubstring(const string& substring, |
| const string& replacement, |
| string* s) { |
| GOOGLE_CHECK(s != NULL); |
| if (s->empty() || substring.empty()) |
| return 0; |
| string tmp; |
| int num_replacements = 0; |
| int pos = 0; |
| for (int match_pos = s->find(substring.data(), pos, substring.length()); |
| match_pos != string::npos; |
| pos = match_pos + substring.length(), |
| match_pos = s->find(substring.data(), pos, substring.length())) { |
| ++num_replacements; |
| // Append the original content before the match. |
| tmp.append(*s, pos, match_pos - pos); |
| // Append the replacement for the match. |
| tmp.append(replacement.begin(), replacement.end()); |
| } |
| // Append the content after the last match. If no replacements were made, the |
| // original string is left untouched. |
| if (num_replacements > 0) { |
| tmp.append(*s, pos, s->length() - pos); |
| s->swap(tmp); |
| } |
| return num_replacements; |
| } |
| |
| } // namespace protobuf |
| } // namespace google |