Abseil Team | 59ae4d5 | 2018-05-18 08:24:54 -0700 | [diff] [blame] | 1 | // Copyright 2018 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
nik7273 | 38b7043 | 2019-03-08 10:27:53 -0500 | [diff] [blame] | 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
Abseil Team | 59ae4d5 | 2018-05-18 08:24:54 -0700 | [diff] [blame] | 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "absl/strings/str_replace.h" |
| 16 | |
| 17 | #include <cstring> |
| 18 | #include <string> |
| 19 | |
| 20 | #include "benchmark/benchmark.h" |
| 21 | #include "absl/base/internal/raw_logging.h" |
| 22 | |
| 23 | namespace { |
| 24 | |
| 25 | std::string* big_string; |
| 26 | std::string* after_replacing_the; |
| 27 | std::string* after_replacing_many; |
| 28 | |
| 29 | struct Replacement { |
| 30 | const char* needle; |
| 31 | const char* replacement; |
| 32 | } replacements[] = { |
| 33 | {"the", "box"}, // |
| 34 | {"brown", "quick"}, // |
| 35 | {"jumped", "liquored"}, // |
| 36 | {"dozen", "brown"}, // |
| 37 | {"lazy", "pack"}, // |
| 38 | {"liquor", "shakes"}, // |
| 39 | }; |
| 40 | |
Abseil Team | bed5bd6 | 2018-08-21 11:31:02 -0700 | [diff] [blame] | 41 | // Here, we set up a string for use in global-replace benchmarks. |
Abseil Team | 59ae4d5 | 2018-05-18 08:24:54 -0700 | [diff] [blame] | 42 | // We started with a million blanks, and then deterministically insert |
Abseil Team | bed5bd6 | 2018-08-21 11:31:02 -0700 | [diff] [blame] | 43 | // 10,000 copies each of two pangrams. The result is a string that is |
Abseil Team | 59ae4d5 | 2018-05-18 08:24:54 -0700 | [diff] [blame] | 44 | // 40% blank space and 60% these words. 'the' occurs 18,247 times and |
| 45 | // all the substitutions together occur 49,004 times. |
| 46 | // |
Abseil Team | bed5bd6 | 2018-08-21 11:31:02 -0700 | [diff] [blame] | 47 | // We then create "after_replacing_the" to be a string that is a result of |
Abseil Team | 59ae4d5 | 2018-05-18 08:24:54 -0700 | [diff] [blame] | 48 | // replacing "the" with "box" in big_string. |
| 49 | // |
Abseil Team | bed5bd6 | 2018-08-21 11:31:02 -0700 | [diff] [blame] | 50 | // And then we create "after_replacing_many" to be a string that is result |
Abseil Team | 59ae4d5 | 2018-05-18 08:24:54 -0700 | [diff] [blame] | 51 | // of preferring several substitutions. |
| 52 | void SetUpStrings() { |
| 53 | if (big_string == nullptr) { |
| 54 | size_t r = 0; |
| 55 | big_string = new std::string(1000 * 1000, ' '); |
| 56 | for (std::string phrase : {"the quick brown fox jumped over the lazy dogs", |
Abseil Team | febc5ee | 2019-03-06 11:36:55 -0800 | [diff] [blame] | 57 | "pack my box with the five dozen liquor jugs"}) { |
Abseil Team | 59ae4d5 | 2018-05-18 08:24:54 -0700 | [diff] [blame] | 58 | for (int i = 0; i < 10 * 1000; ++i) { |
| 59 | r = r * 237 + 41; // not very random. |
| 60 | memcpy(&(*big_string)[r % (big_string->size() - phrase.size())], |
| 61 | phrase.data(), phrase.size()); |
| 62 | } |
| 63 | } |
| 64 | // big_string->resize(50); |
Abseil Team | a877af1 | 2020-03-10 09:28:06 -0700 | [diff] [blame] | 65 | // OK, we've set up the string, now let's set up expectations - first by |
Abseil Team | 59ae4d5 | 2018-05-18 08:24:54 -0700 | [diff] [blame] | 66 | // just replacing "the" with "box" |
| 67 | after_replacing_the = new std::string(*big_string); |
| 68 | for (size_t pos = 0; |
| 69 | (pos = after_replacing_the->find("the", pos)) != std::string::npos;) { |
| 70 | memcpy(&(*after_replacing_the)[pos], "box", 3); |
| 71 | } |
| 72 | // And then with all the replacements. |
| 73 | after_replacing_many = new std::string(*big_string); |
| 74 | for (size_t pos = 0;;) { |
| 75 | size_t next_pos = static_cast<size_t>(-1); |
| 76 | const char* needle_string = nullptr; |
| 77 | const char* replacement_string = nullptr; |
| 78 | for (const auto& r : replacements) { |
| 79 | auto needlepos = after_replacing_many->find(r.needle, pos); |
| 80 | if (needlepos != std::string::npos && needlepos < next_pos) { |
| 81 | next_pos = needlepos; |
| 82 | needle_string = r.needle; |
| 83 | replacement_string = r.replacement; |
| 84 | } |
| 85 | } |
| 86 | if (next_pos > after_replacing_many->size()) break; |
| 87 | after_replacing_many->replace(next_pos, strlen(needle_string), |
| 88 | replacement_string); |
| 89 | next_pos += strlen(replacement_string); |
| 90 | pos = next_pos; |
| 91 | } |
| 92 | } |
| 93 | } |
| 94 | |
| 95 | void BM_StrReplaceAllOneReplacement(benchmark::State& state) { |
| 96 | SetUpStrings(); |
| 97 | std::string src = *big_string; |
| 98 | for (auto _ : state) { |
| 99 | std::string dest = absl::StrReplaceAll(src, {{"the", "box"}}); |
| 100 | ABSL_RAW_CHECK(dest == *after_replacing_the, |
| 101 | "not benchmarking intended behavior"); |
| 102 | } |
| 103 | } |
| 104 | BENCHMARK(BM_StrReplaceAllOneReplacement); |
| 105 | |
| 106 | void BM_StrReplaceAll(benchmark::State& state) { |
| 107 | SetUpStrings(); |
| 108 | std::string src = *big_string; |
| 109 | for (auto _ : state) { |
| 110 | std::string dest = absl::StrReplaceAll(src, {{"the", "box"}, |
Abseil Team | febc5ee | 2019-03-06 11:36:55 -0800 | [diff] [blame] | 111 | {"brown", "quick"}, |
| 112 | {"jumped", "liquored"}, |
| 113 | {"dozen", "brown"}, |
| 114 | {"lazy", "pack"}, |
| 115 | {"liquor", "shakes"}}); |
Abseil Team | 59ae4d5 | 2018-05-18 08:24:54 -0700 | [diff] [blame] | 116 | ABSL_RAW_CHECK(dest == *after_replacing_many, |
| 117 | "not benchmarking intended behavior"); |
| 118 | } |
| 119 | } |
| 120 | BENCHMARK(BM_StrReplaceAll); |
| 121 | |
| 122 | } // namespace |