Blame - ucs2.diff - third_party/github/google/re2

blob: 57aec04a15cfd2fc93dd75468841076a7042fc97 [file] [log] [blame]

Russ Cox	0176cc7	2012-02-07 13:13:06 -0500	[diff] [blame]	1	This is a dump from Google's source control system of the change
				2	that removed UCS-2 support from RE2. As the explanation below
				3	says, UCS-2 mode is fundamentally at odds with things like ^ and $,
				4	so it never really worked very well. But if you are interested in using
				5	it without those operators, it did work for that. It assumed that the
				6	UCS-2 data was in the native host byte order.
				7
				8	If you are interested in adding UCS-2 mode back, this patch might
				9	be a good starting point.
				10
				11
				12	Change 12780686 by rsc@rsc-re2 on 2009/09/16 15:30:15
				13
				14	Retire UCS-2 mode.
				15
				16	I added it as an experiment for V8, but it
				17	requires 2-byte lookahead to do completely,
				18	and RE2 has 1-byte lookahead (enough for UTF-8)
				19	as a fairly deep fundamental assumption,
				20	so it did not support ^ or $.
				21
				22	==== re2/bitstate.cc#2 - re2/bitstate.cc#3 ====
				23	re2/bitstate.cc#2:314,321 - re2/bitstate.cc#3:314,319
				24	cap_[0] = p;
				25	if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
				26	return true;
				27	- if (prog_->flags() & Regexp::UCS2)
				28	- p++;
				29	}
				30	return false;
				31	}
				32	==== re2/compile.cc#17 - re2/compile.cc#18 ====
				33	re2/compile.cc#17:95,101 - re2/compile.cc#18:95,100
				34	// Input encodings.
				35	enum Encoding {
				36	kEncodingUTF8 = 1, // UTF-8 (0-10FFFF)
				37	- kEncodingUCS2, // UCS-2 (0-FFFF), native byte order
				38	kEncodingLatin1, // Latin1 (0-FF)
				39	};
				40
				41	re2/compile.cc#17:168,176 - re2/compile.cc#18:167,172
				42	void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase);
				43	void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase);
				44	void Add_80_10ffff();
				45	- void AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase);
				46	- void AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1,
				47	- uint8 lo2, uint8 hi2, bool fold2);
				48
				49	// New suffix that matches the byte range lo-hi, then goes to next.
				50	Inst* RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, Inst* next);
				51	re2/compile.cc#17:475,481 - re2/compile.cc#18:471,477
				52
				53	// Converts rune range lo-hi into a fragment that recognizes
				54	// the bytes that would make up those runes in the current
				55	- // encoding (Latin 1, UTF-8, or UCS-2).
				56	+ // encoding (Latin 1 or UTF-8).
				57	// This lets the machine work byte-by-byte even when
				58	// using multibyte encodings.
				59
				60	re2/compile.cc#17:488,496 - re2/compile.cc#18:484,489
				61	case kEncodingLatin1:
				62	AddRuneRangeLatin1(lo, hi, foldcase);
				63	break;
				64	- case kEncodingUCS2:
				65	- AddRuneRangeUCS2(lo, hi, foldcase);
				66	- break;
				67	}
				68	}
				69
				70	re2/compile.cc#17:503,581 - re2/compile.cc#18:496,501
				71	AddSuffix(RuneByteSuffix(lo, hi, foldcase, NULL));
				72	}
				73
				74	- // Test whether 16-bit values are big or little endian.
				75	- static bool BigEndian() {
				76	- union {
				77	- char byte[2];
				78	- int16 endian;
				79	- } u;
				80	-
				81	- u.byte[0] = 1;
				82	- u.byte[1] = 2;
				83	- return u.endian == 0x0102;
				84	- }
				85	-
				86	- void Compiler::AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1,
				87	- uint8 lo2, uint8 hi2, bool fold2) {
				88	- Inst* ip;
				89	- if (reversed_) {
				90	- ip = RuneByteSuffix(lo1, hi1, fold1, NULL);
				91	- ip = RuneByteSuffix(lo2, hi2, fold2, ip);
				92	- } else {
				93	- ip = RuneByteSuffix(lo2, hi2, fold2, NULL);
				94	- ip = RuneByteSuffix(lo1, hi1, fold1, ip);
				95	- }
				96	- AddSuffix(ip);
				97	- }
				98	-
				99	- void Compiler::AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase) {
				100	- if (lo > hi \|\| lo > 0xFFFF)
				101	- return;
				102	- if (hi > 0xFFFF)
				103	- hi = 0xFFFF;
				104	-
				105	- // We'll assemble a pattern assuming big endian.
				106	- // If the machine isn't, tell Cat to reverse its arguments.
				107	- bool oldreversed = reversed_;
				108	- if (!BigEndian()) {
				109	- reversed_ = !oldreversed;
				110	- }
				111	-
				112	- // Split into bytes.
				113	- int lo1 = lo >> 8;
				114	- int lo2 = lo & 0xFF;
				115	- int hi1 = hi >> 8;
				116	- int hi2 = hi & 0xFF;
				117	-
				118	- if (lo1 == hi1) {
				119	- // Easy case: high bits are same in both.
				120	- // Only do ASCII case folding on the second byte if the top byte is 00.
				121	- AddUCS2Pair(lo1, lo1, false, lo2, hi2, lo1==0 && foldcase);
				122	- } else {
				123	- // Harder case: different second byte ranges depending on first byte.
				124	-
				125	- // Initial fragment.
				126	- if (lo2 > 0) {
				127	- AddUCS2Pair(lo1, lo1, false, lo2, 0xFF, lo1==0 && foldcase);
				128	- lo1++;
				129	- }
				130	-
				131	- // Trailing fragment.
				132	- if (hi2 < 0xFF) {
				133	- AddUCS2Pair(hi1, hi1, false, 0, hi2, false);
				134	- hi1--;
				135	- }
				136	-
				137	- // Inner ranges.
				138	- if (lo1 <= hi1) {
				139	- AddUCS2Pair(lo1, hi1, false, 0, 0xFF, false);
				140	- }
				141	- }
				142	-
				143	- // Restore reverse setting.
				144	- reversed_ = oldreversed;
				145	- }
				146	-
				147	// Table describing how to make a UTF-8 matching machine
				148	// for the rune range 80-10FFFF (Runeself-Runemax).
				149	// This range happens frequently enough (for example /./ and /[^a-z]/)
				150	re2/compile.cc#17:707,716 - re2/compile.cc#18:627,634
				151
				152	Frag Compiler::Literal(Rune r, bool foldcase) {
				153	switch (encoding_) {
				154	- default: // UCS-2 or something new
				155	- BeginRange();
				156	- AddRuneRange(r, r, foldcase);
				157	- return EndRange();
				158	+ default:
				159	+ return kNullFrag;
				160
				161	case kEncodingLatin1:
				162	return ByteRange(r, r, foldcase);
				163	re2/compile.cc#17:927,934 - re2/compile.cc#18:845,850
				164
				165	if (re->parse_flags() & Regexp::Latin1)
				166	c.encoding_ = kEncodingLatin1;
				167	- else if (re->parse_flags() & Regexp::UCS2)
				168	- c.encoding_ = kEncodingUCS2;
				169	c.reversed_ = reversed;
				170	if (max_mem <= 0) {
				171	c.max_inst_ = 100000; // more than enough
				172	re2/compile.cc#17:983,993 - re2/compile.cc#18:899,905
				173	c.prog_->set_start_unanchored(c.prog_->start());
				174	} else {
				175	Frag dot;
				176	- if (c.encoding_ == kEncodingUCS2) {
				177	- dot = c.Cat(c.ByteRange(0x00, 0xFF, false), c.ByteRange(0x00, 0xFF, false));
				178	- } else {
				179	- dot = c.ByteRange(0x00, 0xFF, false);
				180	- }
				181	+ dot = c.ByteRange(0x00, 0xFF, false);
				182	Frag dotloop = c.Star(dot, true);
				183	Frag unanchored = c.Cat(dotloop, all);
				184	c.prog_->set_start_unanchored(unanchored.begin);
				185	==== re2/nfa.cc#8 - re2/nfa.cc#9 ====
				186	re2/nfa.cc#8:426,432 - re2/nfa.cc#9:426,431
				187	const char* bp = context.begin();
				188	int c = -1;
				189	int wasword = 0;
				190	- bool ucs2 = prog_->flags() & Regexp::UCS2;
				191
				192	if (text.begin() > context.begin()) {
				193	c = text.begin()[-1] & 0xFF;
				194	re2/nfa.cc#8:492,498 - re2/nfa.cc#9:491,497
				195	// If there's a required first byte for an unanchored search
				196	// and we're not in the middle of any possible matches,
				197	// use memchr to search for the byte quickly.
				198	- if (!ucs2 && !anchored && first_byte_ >= 0 && runq->size() == 0 &&
				199	+ if (!anchored && first_byte_ >= 0 && runq->size() == 0 &&
				200	p < text.end() && (p[0] & 0xFF) != first_byte_) {
				201	p = reinterpret_cast<const char*>(memchr(p, first_byte_,
				202	text.end() - p));
				203	re2/nfa.cc#8:505,526 - re2/nfa.cc#9:504,514
				204	flag = Prog::EmptyFlags(context, p);
				205	}
				206
				207	- // In UCS-2 mode, if we need to start a new thread,
				208	- // make sure to do it on an even boundary.
				209	- if(ucs2 && runq->size() == 0 &&
				210	- (p - context.begin()) % 2 && p < text.end()) {
				211	- p++;
				212	- flag = Prog::EmptyFlags(context, p);
				213	- }
				214	-
				215	// Steal match storage (cleared but unused as of yet)
				216	// temporarily to hold match boundaries for new thread.
				217	- // In UCS-2 mode, only start the thread on a 2-byte boundary.
				218	- if(!ucs2 \|\| (p - context.begin()) % 2 == 0) {
				219	- match_[0] = p;
				220	- AddToThreadq(runq, start_, flag, p, match_);
				221	- match_[0] = NULL;
				222	- }
				223	+ match_[0] = p;
				224	+ AddToThreadq(runq, start_, flag, p, match_);
				225	+ match_[0] = NULL;
				226	}
				227
				228	// If all the threads have died, stop early.
				229	==== re2/parse.cc#22 - re2/parse.cc#23 ====
				230	re2/parse.cc#22:160,167 - re2/parse.cc#23:160,165
				231	status_(status), stacktop_(NULL), ncap_(0) {
				232	if (flags_ & Latin1)
				233	rune_max_ = 0xFF;
				234	- else if (flags & UCS2)
				235	- rune_max_ = 0xFFFF;
				236	else
				237	rune_max_ = Runemax;
				238	}
				239	re2/parse.cc#22:365,387 - re2/parse.cc#23:363,374
				240	bool Regexp::ParseState::PushCarat() {
				241	if (flags_ & OneLine) {
				242	return PushSimpleOp(kRegexpBeginText);
				243	- } else {
				244	- if (flags_ & UCS2) {
				245	- status_->set_code(kRegexpUnsupported);
				246	- status_->set_error_arg("multiline ^ in UCS-2 mode");
				247	- return false;
				248	- }
				249	- return PushSimpleOp(kRegexpBeginLine);
				250	}
				251	+ return PushSimpleOp(kRegexpBeginLine);
				252	}
				253
				254	// Pushes a \b or \B onto the stack.
				255	bool Regexp::ParseState::PushWordBoundary(bool word) {
				256	- if (flags_ & UCS2) {
				257	- status_->set_code(kRegexpUnsupported);
				258	- status_->set_error_arg("\\b or \\B in UCS-2 mode");
				259	- return false;
				260	- }
				261	if (word)
				262	return PushSimpleOp(kRegexpWordBoundary);
				263	return PushSimpleOp(kRegexpNoWordBoundary);
				264	re2/parse.cc#22:397,407 - re2/parse.cc#23:384,389
				265	bool ret = PushSimpleOp(kRegexpEndText);
				266	flags_ = oflags;
				267	return ret;
				268	- }
				269	- if (flags_ & UCS2) {
				270	- status_->set_code(kRegexpUnsupported);
				271	- status_->set_error_arg("multiline $ in UCS-2 mode");
				272	- return false;
				273	}
				274	return PushSimpleOp(kRegexpEndLine);
				275	}
				276	==== re2/re2.cc#34 - re2/re2.cc#35 ====
				277	re2/re2.cc#34:79,86 - re2/re2.cc#35:79,84
				278	return RE2::ErrorBadUTF8;
				279	case re2::kRegexpBadNamedCapture:
				280	return RE2::ErrorBadNamedCapture;
				281	- case re2::kRegexpUnsupported:
				282	- return RE2::ErrorUnsupported;
				283	}
				284	return RE2::ErrorInternal;
				285	}
				286	re2/re2.cc#34:122,130 - re2/re2.cc#35:120,125
				287	break;
				288	case RE2::Options::EncodingLatin1:
				289	flags \|= Regexp::Latin1;
				290	- break;
				291	- case RE2::Options::EncodingUCS2:
				292	- flags \|= Regexp::UCS2;
				293	break;
				294	}
				295
				296	==== re2/re2.h#36 - re2/re2.h#37 ====
				297	re2/re2.h#36:246,252 - re2/re2.h#37:246,251
				298	ErrorBadUTF8, // invalid UTF-8 in regexp
				299	ErrorBadNamedCapture, // bad named capture group
				300	ErrorPatternTooLarge, // pattern too large (compile failed)
				301	- ErrorUnsupported, // unsupported feature (in UCS-2 mode)
				302	};
				303
				304	// Predefined common options.
				305	re2/re2.h#36:570,576 - re2/re2.h#37:569,574
				306
				307	enum Encoding {
				308	EncodingUTF8 = 1,
				309	- EncodingUCS2, // 16-bit Unicode 0-FFFF only
				310	EncodingLatin1
				311	};
				312
				313	==== re2/regexp.cc#15 - re2/regexp.cc#16 ====
				314	re2/regexp.cc#15:324,333 - re2/regexp.cc#16:324,329
				315	// the regexp that remains after the prefix. The prefix might
				316	// be ASCII case-insensitive.
				317	bool Regexp::RequiredPrefix(string prefix, bool foldcase, Regexp** suffix) {
				318	- // Don't even bother for UCS-2; it's time to throw that code away.
				319	- if (parse_flags_ & UCS2)
				320	- return false;
				321	-
				322	// No need for a walker: the regexp must be of the form
				323	// 1. some number of ^ anchors
				324	// 2. a literal char or string
				325	==== re2/regexp.h#20 - re2/regexp.h#21 ====
				326	re2/regexp.h#20:187,193 - re2/regexp.h#21:187,192
				327	kRegexpBadPerlOp, // bad perl operator
				328	kRegexpBadUTF8, // invalid UTF-8 in regexp
				329	kRegexpBadNamedCapture, // bad named capture
				330	- kRegexpUnsupported, // unsupported operator
				331	};
				332
				333	// Error status for certain operations.
				334	re2/regexp.h#20:307,316 - re2/regexp.h#21:306,314
				335	// \Q and \E to disable/enable metacharacters
				336	// (?P<name>expr) for named captures
				337	// \C to match any single byte
				338	- UCS2 = 1<<10, // Text is in UCS-2, regexp is in UTF-8.
				339	- UnicodeGroups = 1<<11, // Allow \p{Han} for Unicode Han group
				340	+ UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group
				341	// and \P{Han} for its negation.
				342	- NeverNL = 1<<12, // Never match NL, even if the regexp mentions
				343	+ NeverNL = 1<<11, // Never match NL, even if the regexp mentions
				344	// it explicitly.
				345
				346	// As close to Perl as we can get.
				347	==== re2/testing/backtrack.cc#4 - re2/testing/backtrack.cc#5 ====
				348	re2/testing/backtrack.cc#4:134,141 - re2/testing/backtrack.cc#5:134,139
				349	cap_[0] = p;
				350	if (Visit(prog_->start(), p)) // Match must be leftmost; done.
				351	return true;
				352	- if (prog_->flags() & Regexp::UCS2)
				353	- p++;
				354	}
				355	return false;
				356	}
				357	==== re2/testing/tester.cc#12 - re2/testing/tester.cc#13 ====
				358	re2/testing/tester.cc#12:144,154 - re2/testing/tester.cc#13:144,152
				359	static ParseMode parse_modes[] = {
				360	{ single_line, "single-line" },
				361	{ single_line\|Regexp::Latin1, "single-line, latin1" },
				362	- { single_line\|Regexp::UCS2, "single-line, ucs2" },
				363	{ multi_line, "multiline" },
				364	{ multi_line\|Regexp::NonGreedy, "multiline, nongreedy" },
				365	{ multi_line\|Regexp::Latin1, "multiline, latin1" },
				366	- { multi_line\|Regexp::UCS2, "multiline, ucs2" },
				367	};
				368
				369	static string FormatMode(Regexp::ParseFlags flags) {
				370	re2/testing/tester.cc#12:179,189 - re2/testing/tester.cc#13:177,185
				371	RegexpStatus status;
				372	regexp_ = Regexp::Parse(regexp_str, flags, &status);
				373	if (regexp_ == NULL) {
				374	- if (status.code() != kRegexpUnsupported) {
				375	- LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
				376	- << " mode: " << FormatMode(flags);
				377	- error_ = true;
				378	- }
				379	+ LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
				380	+ << " mode: " << FormatMode(flags);
				381	+ error_ = true;
				382	return;
				383	}
				384	prog_ = regexp_->CompileToProg(0);
				385	re2/testing/tester.cc#12:230,237 - re2/testing/tester.cc#13:226,231
				386	RE2::Options options;
				387	if (flags & Regexp::Latin1)
				388	options.set_encoding(RE2::Options::EncodingLatin1);
				389	- else if (flags & Regexp::UCS2)
				390	- options.set_encoding(RE2::Options::EncodingUCS2);
				391	if (kind_ == Prog::kLongestMatch)
				392	options.set_longest_match(true);
				393	re2_ = new RE2(re, options);
				394	re2/testing/tester.cc#12:281,379 - re2/testing/tester.cc#13:275,280
				395	delete re2_;
				396	}
				397
				398	- // Converts UTF-8 string in text into UCS-2 string in new_text.
				399	- static bool ConvertUTF8ToUCS2(const StringPiece& text, StringPiece* new_text) {
				400	- const char* p = text.begin();
				401	- const char* ep = text.end();
				402	- uint16* q = new uint16[ep - p];
				403	- uint16* q0 = q;
				404	-
				405	- int n;
				406	- Rune r;
				407	- for (; p < ep; p += n) {
				408	- if (!fullrune(p, ep - p)) {
				409	- delete[] q0;
				410	- return false;
				411	- }
				412	- n = chartorune(&r, p);
				413	- if (r > 0xFFFF) {
				414	- delete[] q0;
				415	- return false;
				416	- }
				417	- *q++ = r;
				418	- }
				419	- new_text = StringPiece(reinterpret_cast<char>(q0), 2*(q - q0));
				420	- return true;
				421	- }
				422	-
				423	- // Rewrites *sp from being a pointer into text8 (UTF-8)
				424	- // to being a pointer into text16 (equivalent text but in UCS-2).
				425	- static void AdjustUTF8ToUCS2(const StringPiece& text8, const StringPiece& text16,
				426	- StringPiece *sp) {
				427	- if (sp->begin() == NULL && text8.begin() != NULL)
				428	- return;
				429	-
				430	- int nrune = 0;
				431	- int n;
				432	- Rune r;
				433	- const char* p = text8.begin();
				434	- const char* ep = text8.end();
				435	- const char* spbegin = NULL;
				436	- const char* spend = NULL;
				437	- for (;;) {
				438	- if (p == sp->begin())
				439	- spbegin = text16.begin() + sizeof(uint16)*nrune;
				440	- if (p == sp->end())
				441	- spend = text16.begin() + sizeof(uint16)*nrune;
				442	- if (p >= ep)
				443	- break;
				444	- n = chartorune(&r, p);
				445	- p += n;
				446	- nrune++;
				447	- }
				448	- if (spbegin == NULL \|\| spend == NULL) {
				449	- LOG(FATAL) << "Error in AdjustUTF8ToUCS2 "
				450	- << CEscape(text8) << " "
				451	- << (int)(sp->begin() - text8.begin()) << " "
				452	- << (int)(sp->end() - text8.begin());
				453	- }
				454	- *sp = StringPiece(spbegin, spend - spbegin);
				455	- }
				456	-
				457	- // Rewrites *sp from begin a pointer into text16 (UCS-2)
				458	- // to being a pointer into text8 (equivalent text but in UTF-8).
				459	- static void AdjustUCS2ToUTF8(const StringPiece& text16, const StringPiece& text8,
				460	- StringPiece* sp) {
				461	- if (sp->begin() == NULL)
				462	- return;
				463	-
				464	- int nrune = 0;
				465	- int n;
				466	- Rune r;
				467	- const char* p = text8.begin();
				468	- const char* ep = text8.end();
				469	- const char* spbegin = NULL;
				470	- const char* spend = NULL;
				471	- for (;;) {
				472	- if (nrune == (sp->begin() - text16.begin())/2)
				473	- spbegin = p;
				474	- if (nrune == (sp->end() - text16.begin())/2)
				475	- spend = p;
				476	- if (p >= ep)
				477	- break;
				478	- n = chartorune(&r, p);
				479	- p += n;
				480	- nrune++;
				481	- }
				482	- if (text8.begin() != NULL && (spbegin == NULL \|\| spend == NULL)) {
				483	- LOG(FATAL) << "Error in AdjustUCS2ToUTF8 "
				484	- << CEscape(text16) << " "
				485	- << (int)(sp->begin() - text16.begin()) << " "
				486	- << (int)(sp->end() - text16.begin());
				487	- }
				488	- *sp = StringPiece(spbegin, spend - spbegin);
				489	- }
				490	-
				491	// Runs a single search using the named engine type.
				492	// This interface hides all the irregularities of the various
				493	// engine interfaces from the rest of this file.
				494	re2/testing/tester.cc#12:393,411 - re2/testing/tester.cc#13:294,300
				495
				496	StringPiece text = orig_text;
				497	StringPiece context = orig_context;
				498	- bool ucs2 = false;
				499
				500	- if ((flags() & Regexp::UCS2) && type != kEnginePCRE) {
				501	- if (!ConvertUTF8ToUCS2(orig_context, &context)) {
				502	- result->skipped = true;
				503	- return;
				504	- }
				505	-
				506	- // Rewrite context to refer to new text.
				507	- AdjustUTF8ToUCS2(orig_context, context, &text);
				508	- ucs2 = true;
				509	- }
				510	-
				511	switch (type) {
				512	default:
				513	LOG(FATAL) << "Bad RunSearch type: " << (int)type;
				514	re2/testing/tester.cc#12:557,577 - re2/testing/tester.cc#13:446,451
				515	}
				516	}
				517
				518	- // If we did UCS-2 matching, rewrite the matches to refer
				519	- // to the original UTF-8 text.
				520	- if (ucs2) {
				521	- if (result->matched) {
				522	- if (result->have_submatch0) {
				523	- AdjustUCS2ToUTF8(context, orig_context, &result->submatch[0]);
				524	- } else if (result->have_submatch) {
				525	- for (int i = 0; i < nsubmatch; i++) {
				526	- AdjustUCS2ToUTF8(context, orig_context, &result->submatch[i]);
				527	- }
				528	- }
				529	- }
				530	- delete[] context.begin();
				531	- }
				532	-
				533	if (!result->matched)
				534	memset(result->submatch, 0, sizeof result->submatch);
				535	}
				536	re2/testing/tester.cc#12:596,617 - re2/testing/tester.cc#13:470,475
				537	return true;
				538	}
				539
				540	- // Check whether text uses only Unicode points <= 0xFFFF
				541	- // (in the BMP).
				542	- static bool IsBMP(const StringPiece& text) {
				543	- const char* p = text.begin();
				544	- const char* ep = text.end();
				545	- while (p < ep) {
				546	- if (!fullrune(p, ep - p))
				547	- return false;
				548	- Rune r;
				549	- p += chartorune(&r, p);
				550	- if (r > 0xFFFF)
				551	- return false;
				552	- }
				553	- return true;
				554	- }
				555	-
				556	// Runs a single test.
				557	bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
				558	Prog::Anchor anchor) {
				559	re2/testing/tester.cc#12:619,625 - re2/testing/tester.cc#13:477,483
				560	Result correct;
				561	RunSearch(kEngineBacktrack, text, context, anchor, &correct);
				562	if (correct.skipped) {
				563	- if (regexp_ == NULL \|\| !IsBMP(context)) // okay to skip in UCS-2 mode
				564	+ if (regexp_ == NULL)
				565	return true;
				566	LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)
				567	<< " " << FormatMode(flags_);