[K/N][stdlib] Support \V, \v, \H, \h, \R in regex engine
Issue #KT-50742 Fixed
diff --git a/libraries/stdlib/native-wasm/src/kotlin/text/regex/AbstractCharClass.kt b/libraries/stdlib/native-wasm/src/kotlin/text/regex/AbstractCharClass.kt
index 090a5e4..f6dbb42 100644
--- a/libraries/stdlib/native-wasm/src/kotlin/text/regex/AbstractCharClass.kt
+++ b/libraries/stdlib/native-wasm/src/kotlin/text/regex/AbstractCharClass.kt
@@ -300,6 +300,46 @@
override fun computeValue(): AbstractCharClass = CharClass().add('0', '9').add('a', 'f').add('A', 'F')
}
+ // From Java 8+ `Pattern` doc: \v - A vertical whitespace character: [\n\x0B\f\r\x85\u2028\u2029]
+ internal class CachedVerticalWhitespace : CachedCharClass() {
+ init {
+ initValues()
+ }
+ override fun computeValue(): AbstractCharClass =
+ CharClass().addAll(listOf('\n', '\u000B', '\u000C' /* aka \f */, '\r', '\u0085', '\u2028', '\u2029'))
+ }
+
+ // From Java 8+ `Pattern` doc: \V - A non-vertical whitespace character: [^\v]
+ internal class CachedNonVerticalWhitespace: CachedCharClass() {
+ init {
+ initValues()
+ }
+ override fun computeValue(): AbstractCharClass =
+ CachedVerticalWhitespace().getValue(negative = true).apply { mayContainSupplCodepoints = true }
+ // TODO: Does it match a pair of surrogates? Do we really need mayContainSupplCodepoints?
+ }
+
+ // From Java 8+ `Pattern` doc:
+ // \h - A horizontal whitespace character: [ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]
+ internal class CachedHorizontalWhitespace: CachedCharClass() {
+ init {
+ initValues()
+ }
+ override fun computeValue(): AbstractCharClass =
+ CharClass().addAll(listOf(' ', '\t', '\u00A0', '\u1680', '\u180e', '\u202f', '\u205f', '\u3000'))
+ .add('\u2000', '\u200a')
+ }
+
+ // From Java 8+ `Pattern` doc:
+ // \H - A non-horizontal whitespace character: [^\h]
+ internal class CachedNonHorizontalWhitespace: CachedCharClass() {
+ init {
+ initValues()
+ }
+ override fun computeValue(): AbstractCharClass =
+ CachedHorizontalWhitespace().getValue(negative = true).apply { mayContainSupplCodepoints = true }
+ }
+
internal class CachedRange(var start: Int, var end: Int) : CachedCharClass() {
init {
initValues()
@@ -392,6 +432,10 @@
NON_SPACE("S", ::CachedNonSpace),
DIGIT_SHORT("d", ::CachedDigit),
NON_DIGIT("D", ::CachedNonDigit),
+ VERTICAL_WHITESPACE("v", ::CachedVerticalWhitespace),
+ NON_VERTICAL_WHITESPACE("V", ::CachedNonVerticalWhitespace),
+ HORIZONTAL_WHITESPACE("h", ::CachedHorizontalWhitespace),
+ NON_HORIZONTAL_WHITESPACE("H", ::CachedNonHorizontalWhitespace),
BASIC_LATIN("BasicLatin", { CachedRange(0x0000, 0x007F) }),
LATIN1_SUPPLEMENT("Latin-1Supplement", { CachedRange(0x0080, 0x00FF) }),
LATIN_EXTENDED_A("LatinExtended-A", { CachedRange(0x0100, 0x017F) }),
diff --git a/libraries/stdlib/native-wasm/src/kotlin/text/regex/CharClass.kt b/libraries/stdlib/native-wasm/src/kotlin/text/regex/CharClass.kt
index 9333d94..5a65d85 100644
--- a/libraries/stdlib/native-wasm/src/kotlin/text/regex/CharClass.kt
+++ b/libraries/stdlib/native-wasm/src/kotlin/text/regex/CharClass.kt
@@ -244,6 +244,16 @@
fun add(start: Char, end: Char): CharClass = add(start.toInt(), end.toInt())
+ fun addAll(chars: Iterable<Char>): CharClass {
+ chars.forEach { add(it) }
+ return this
+ }
+
+ fun addAll(chars: Iterable<Int>): CharClass {
+ chars.forEach { add(it) }
+ return this
+ }
+
// OR operation
fun union(another: AbstractCharClass) {
if (!mayContainSupplCodepoints && another.mayContainSupplCodepoints) {
diff --git a/libraries/stdlib/native-wasm/src/kotlin/text/regex/Lexer.kt b/libraries/stdlib/native-wasm/src/kotlin/text/regex/Lexer.kt
index eb2465a..a0055e1 100644
--- a/libraries/stdlib/native-wasm/src/kotlin/text/regex/Lexer.kt
+++ b/libraries/stdlib/native-wasm/src/kotlin/text/regex/Lexer.kt
@@ -435,7 +435,7 @@
}
// Word/whitespace/digit.
- 'w', 's', 'd', 'W', 'S', 'D' -> {
+ 'w', 's', 'd', 'W', 'S', 'D', 'v', 'V', 'h', 'H' -> {
lookAheadSpecialToken = AbstractCharClass.getPredefinedClass(
pattern.concatToString(prevNonWhitespaceIndex, prevNonWhitespaceIndex + 1),
false
@@ -477,6 +477,7 @@
'G' -> lookAhead = CHAR_PREVIOUS_MATCH
'Z' -> lookAhead = CHAR_END_OF_LINE
'z' -> lookAhead = CHAR_END_OF_INPUT
+ 'R' -> lookAhead = CHAR_LINEBREAK
// \cx - A control character corresponding to x.
'c' -> {
@@ -488,7 +489,7 @@
}
}
- 'C', 'E', 'F', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'R', 'T', 'U', 'V', 'X', 'Y', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'q', 'y' ->
+ 'C', 'E', 'F', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'T', 'U', 'X', 'Y', 'g', 'i', 'j', 'k', 'l', 'm', 'o', 'q', 'y' ->
throw PatternSyntaxException("Illegal escape sequence", patternString, curTokenIndex)
}
return false
@@ -704,6 +705,7 @@
val CHAR_PREVIOUS_MATCH = 0x80000000.toInt() or 'G'.toInt()
val CHAR_END_OF_INPUT = 0x80000000.toInt() or 'z'.toInt()
val CHAR_END_OF_LINE = 0x80000000.toInt() or 'Z'.toInt()
+ val CHAR_LINEBREAK = 0x80000000.toInt() or 'R'.toInt()
// Quantifier modes.
val QMOD_GREEDY = 0xe0000000.toInt()
diff --git a/libraries/stdlib/native-wasm/src/kotlin/text/regex/Pattern.kt b/libraries/stdlib/native-wasm/src/kotlin/text/regex/Pattern.kt
index 9eaad27..01bdf00 100644
--- a/libraries/stdlib/native-wasm/src/kotlin/text/regex/Pattern.kt
+++ b/libraries/stdlib/native-wasm/src/kotlin/text/regex/Pattern.kt
@@ -533,11 +533,27 @@
term = EOLSet(consumersCount++, AbstractLineTerminator.getInstance(flags))
}
- Lexer.CHAR_START_OF_INPUT -> { // Start if an input: \A
+ Lexer.CHAR_START_OF_INPUT -> { // Start of an input: \A
lexemes.next()
term = SOLSet(AbstractLineTerminator.getInstance(flags))
}
+ Lexer.CHAR_LINEBREAK -> {
+ // Any unicode linebreak sequence:
+ // \u000D\u000A|[\u000A\u000B\u000C\u000D\u0085\u2028\u2029]
+ lexemes.next()
+ val fSet = NonCapFSet(consumersCount++)
+ val lineBreakSequence = SequenceSet("\u000D\u000A").apply {
+ next = fSet
+ }
+ val lineBreakChars = RangeSet(
+ CharClass().addAll(listOf('\u000A', '\u000B', '\u000C', '\u000D', '\u0085', '\u2028', '\u2029'))
+ ).apply {
+ next = fSet
+ }
+ term = NonCapturingJointSet(listOf(lineBreakSequence, lineBreakChars), fSet)
+ }
+
Lexer.CHAR_PREVIOUS_MATCH -> { // A previous match: \G
lexemes.next()
term = PreviousMatchSet()
diff --git a/libraries/stdlib/native-wasm/test/harmony_regex/PatternTest.kt b/libraries/stdlib/native-wasm/test/harmony_regex/PatternTest.kt
index ef8cccb..d7aea8c 100644
--- a/libraries/stdlib/native-wasm/test/harmony_regex/PatternTest.kt
+++ b/libraries/stdlib/native-wasm/test/harmony_regex/PatternTest.kt
@@ -25,6 +25,37 @@
fun assertTrue(msg: String, value: Boolean) = assertTrue(value, msg)
fun assertFalse(msg: String, value: Boolean) = assertFalse(value, msg)
+ private fun String.asEscapeSeq() = buildString {
+ this@asEscapeSeq.forEach {
+ when {
+ it.isLetterOrDigit() -> append(it)
+ it == '\n' -> append("\\n")
+ it == '\r' -> append("\\r")
+ it == '\t' -> append("\\t")
+ else -> {
+ val hexCode = it.code.toString(16)
+ append("\\u$hexCode")
+ }
+ }
+ }
+ }
+
+ private fun assertMatch(regex: Regex, string: String) {
+ assertTrue(regex.matches(string), "Regex `$regex` expected to match string `${string.asEscapeSeq()}`")
+ }
+
+ private fun assertNoMatch(regex: Regex, string: String) {
+ assertFalse(regex.matches(string), "Regex `$regex` expected to not match string `${string.asEscapeSeq()}`")
+ }
+
+ private fun assertFind(regex: Regex, string: String, expectedRange: IntRange) {
+ assertEquals(
+ expectedRange,
+ regex.find(string)?.range,
+ "Wrong `find` result for regex `$regex` in string `${string.asEscapeSeq()}`"
+ )
+ }
+
internal var testPatterns = arrayOf("(a|b)*abb", "(1*2*3*4*)*567", "(a|b|c|d)*aab", "(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*", "(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*", "(a|b)*(a|b)*A(a|b)*lice.*", "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|" + "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do", "x(?c)y", "x(?cc)y", "x(?:c)y")
@Test fun testCommentsInPattern() {
@@ -1260,4 +1291,323 @@
assertNull(result)
}
+
+ @Test fun testVerticalWhitespaceChar() {
+ // From Java 8+ `Pattern` doc:
+ // \v - A vertical whitespace character: [\n\x0B\f\r\x85\u2028\u2029]
+ // \V - A non-vertical whitespace character: [^\v]
+
+ val positiveRegex = Regex("\\v")
+ val negativeRegex = Regex("\\V")
+ val verticalWhitespaces = listOf("\n", "\u000B", "\u000C" /* aka \f */, "\r", "\u0085", "\u2028", "\u2029")
+ val nonVerticalWhitespaces = listOf("1", "K", " ", "${Char.MIN_HIGH_SURROGATE}${Char.MIN_LOW_SURROGATE}")
+
+ // Smoke tests
+ verticalWhitespaces.forEach {
+ assertMatch(positiveRegex, it)
+ assertNoMatch(negativeRegex, it)
+
+ assertFind(positiveRegex, "prefix$it", 6..6)
+ assertFind(positiveRegex, "prefix${it}suffix", 6..6)
+ assertFind(positiveRegex, "${it}suffix", 0..0)
+ }
+
+ nonVerticalWhitespaces.forEach {
+ assertNoMatch(positiveRegex, it)
+ assertMatch(negativeRegex, it)
+
+ assertFind(negativeRegex, "\n\n$it", 2 until (2 + it.length))
+ assertFind(negativeRegex, "\n\n${it}\n\n", 2 until (2 + it.length))
+ assertFind(negativeRegex, "${it}\n\n", 0 until it.length)
+ }
+
+ // Test that \v and \V can be a part of a more complex regex.
+ val neighbors = listOf(
+ // regex expression to its match.
+ "x" to "x",
+ " " to " ",
+ "\\n" to "\n",
+ "\\\\" to "\\",
+ "[abc]" to "a",
+ "." to "x",
+ "\\d" to "5",
+ "\\D" to "x",
+ "\\s" to " ",
+ "\\S" to "x",
+ "\\w" to "x",
+ "\\W" to "|",
+ "\\p{Alnum}" to "x",
+ "\\p{Space}" to " ",
+ "\\p{Blank}" to " ",
+ "\\p{Sc}" to "$"
+ ).flatMap { (expression, match) ->
+ listOf(
+ expression to match,
+ "$expression+" to match.repeat(2),
+ "$expression{2,4}" to match.repeat(3)
+ )
+ }
+
+ neighbors.forEach { (neighbor, match) ->
+ // \v
+ assertMatch(Regex("$neighbor\\v"), "$match\n")
+ assertMatch(Regex("$neighbor\\v$neighbor"), "$match\n$match")
+ assertMatch(Regex("\\v$neighbor"), "\n$match")
+
+ assertMatch(Regex("$neighbor\\v+"), "$match\n\n")
+ assertMatch(Regex("$neighbor\\v+$neighbor"), "$match\n\n$match")
+ assertMatch(Regex("\\v+$neighbor"), "\n\n$match")
+
+ assertMatch(Regex("$neighbor\\v{2,4}"), "$match\n\n\n")
+ assertMatch(Regex("$neighbor\\v{2,4}$neighbor"), "$match\n\n\n$match")
+ assertMatch(Regex("\\v{2,4}$neighbor"), "\n\n\n$match")
+
+ assertMatch(Regex("$neighbor\\v*"), match)
+ assertMatch(Regex("$neighbor\\v*$neighbor"), "$match$match")
+ assertMatch(Regex("\\v*$neighbor"), match)
+
+ // \V
+ assertMatch(Regex("$neighbor\\V"), "$match ")
+ assertMatch(Regex("$neighbor\\V$neighbor"), "$match $match")
+ assertMatch(Regex("\\V$neighbor"), " $match")
+
+ assertMatch(Regex("$neighbor\\V+"), "$match ")
+ assertMatch(Regex("$neighbor\\V+$neighbor"), "$match $match")
+ assertMatch(Regex("\\V+$neighbor"), " $match")
+
+ assertMatch(Regex("$neighbor\\V{2,4}"), "$match ")
+ assertMatch(Regex("$neighbor\\V{2,4}$neighbor"), "$match $match")
+ assertMatch(Regex("\\V{2,4}$neighbor"), " $match")
+
+ assertMatch(Regex("$neighbor\\V*"), match)
+ assertMatch(Regex("$neighbor\\V*$neighbor"), "$match$match")
+ assertMatch(Regex("\\V*$neighbor"), match)
+ }
+
+ // Backrefs and `or` expression
+ assertMatch(Regex("(\\v)x\\1"), "\nx\n")
+ assertMatch(Regex("\\v|x"), "\n")
+ assertMatch(Regex("\\v|x"), "x")
+
+ assertMatch(Regex("(\\V)\n\\1"), "x\nx")
+ assertMatch(Regex("\\V|\n"), "x")
+ assertMatch(Regex("\\V|\n"), "\n")
+
+ // Boundaries: ^, $, \b, \B
+ assertMatch(Regex("^\\vx"), "\nx")
+ assertNoMatch(Regex("^\\vx"), "x\n")
+ assertMatch(Regex("x\\v$"), "x\n")
+ assertNoMatch(Regex("x\\v$"), "\nx")
+ assertMatch(Regex("abc\\b\\v"), "abc\n")
+ assertMatch(Regex("abc\\b\\v"), "abc\n")
+
+ assertMatch(Regex("^\\V\n"), "x\n")
+ assertNoMatch(Regex("^\\V\n"), "\nx")
+ assertMatch(Regex("\n\\V$"), "\nx")
+ assertNoMatch(Regex("\n\\V$"), "x\n")
+ assertMatch(Regex("abc\\B\\V"), "abcd")
+ assertMatch(Regex("abc\\B\\V"), "abcd")
+ }
+
+ @Test fun testHorizontalWhitespaceChar() {
+ // From Java 8+ `Pattern` doc:
+ // \h - A horizontal whitespace character: [ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]
+ // \H - A non-horizontal whitespace character: [^\h]
+
+ val positiveRegex = Regex("\\h")
+ val negativeRegex = Regex("\\H")
+ val verticalWhitespaces = listOf(" ", "\t", "\u00A0", "\u1680", "\u180e", "\u202f", "\u205f", "\u3000") +
+ ('\u2000'..'\u200a').map(Char::toString)
+ val nonVerticalWhitespaces = listOf("1", "K", "\n", "${Char.MIN_HIGH_SURROGATE}${Char.MIN_LOW_SURROGATE}")
+
+ // Smoke tests
+ verticalWhitespaces.forEach {
+ assertMatch(positiveRegex, it)
+ assertNoMatch(negativeRegex, it)
+
+ assertFind(positiveRegex, "prefix$it", 6..6)
+ assertFind(positiveRegex, "prefix${it}suffix", 6..6)
+ assertFind(positiveRegex, "${it}suffix", 0..0)
+ }
+
+ nonVerticalWhitespaces.forEach {
+ assertNoMatch(positiveRegex, it)
+ assertMatch(negativeRegex, it)
+
+ assertFind(negativeRegex, " $it", 2 until (2 + it.length))
+ assertFind(negativeRegex, " $it ", 2 until (2 + it.length))
+ assertFind(negativeRegex, "$it ", 0 until it.length)
+ }
+
+ // Test that \h and \H can be a part of a more complex regex.
+ val neighbors = listOf(
+ // regex expression to its match.
+ "x" to "x",
+ " " to " ",
+ "\\n" to "\n",
+ "\\\\" to "\\",
+ "[abc]" to "a",
+ "." to "x",
+ "\\d" to "5",
+ "\\D" to "x",
+ "\\s" to " ",
+ "\\S" to "x",
+ "\\w" to "x",
+ "\\W" to "|",
+ "\\p{Alnum}" to "x",
+ "\\p{Space}" to " ",
+ "\\p{Blank}" to " ",
+ "\\p{Sc}" to "$"
+ ).flatMap { (expression, match) ->
+ listOf(
+ expression to match,
+ "$expression+" to match.repeat(2),
+ "$expression{2,4}" to match.repeat(3)
+ )
+ }
+
+ neighbors.forEach { (neighbor, match) ->
+ // \h
+ assertMatch(Regex("$neighbor\\h"), "$match ")
+ assertMatch(Regex("$neighbor\\h$neighbor"), "$match $match")
+ assertMatch(Regex("\\h$neighbor"), " $match")
+
+ assertMatch(Regex("$neighbor\\h+"), "$match ")
+ assertMatch(Regex("$neighbor\\h+$neighbor"), "$match $match")
+ assertMatch(Regex("\\h+$neighbor"), " $match")
+
+ assertMatch(Regex("$neighbor\\h{2,4}"), "$match ")
+ assertMatch(Regex("$neighbor\\h{2,4}$neighbor"), "$match $match")
+ assertMatch(Regex("\\h{2,4}$neighbor"), " $match")
+
+ assertMatch(Regex("$neighbor\\h*"), match)
+ assertMatch(Regex("$neighbor\\h*$neighbor"), "$match$match")
+ assertMatch(Regex("\\h*$neighbor"), match)
+
+ // \H
+ assertMatch(Regex("$neighbor\\H"), "$match\n")
+ assertMatch(Regex("$neighbor\\H$neighbor"), "$match\n$match")
+ assertMatch(Regex("\\H$neighbor"), "\n$match")
+
+ assertMatch(Regex("$neighbor\\H+"), "$match\n\n")
+ assertMatch(Regex("$neighbor\\H+$neighbor"), "$match\n\n$match")
+ assertMatch(Regex("\\H+$neighbor"), "\n\n$match")
+
+ assertMatch(Regex("$neighbor\\H{2,4}"), "$match\n\n\n")
+ assertMatch(Regex("$neighbor\\H{2,4}$neighbor"), "$match\n\n\n$match")
+ assertMatch(Regex("\\H{2,4}$neighbor"), "\n\n\n$match")
+
+ assertMatch(Regex("$neighbor\\H*"), match)
+ assertMatch(Regex("$neighbor\\H*$neighbor"), "$match$match")
+ assertMatch(Regex("\\H*$neighbor"), match)
+ }
+
+ // Backrefs and `or` expression
+ assertMatch(Regex("(\\h)x\\1"), " x ")
+ assertMatch(Regex("\\h|x"), " ")
+ assertMatch(Regex("\\h|x"), "x")
+
+ assertMatch(Regex("(\\H) \\1"), "x x")
+ assertMatch(Regex("\\H| "), "x")
+ assertMatch(Regex("\\H| "), " ")
+
+ // Boundaries: ^, $, \b, \B
+ assertMatch(Regex("^\\hx"), " x")
+ assertNoMatch(Regex("^\\hx"), "x ")
+ assertMatch(Regex("x\\h$"), "x ")
+ assertNoMatch(Regex("x\\h$"), " x")
+ assertMatch(Regex("abc\\b\\h"), "abc ")
+ assertMatch(Regex("abc\\b\\h"), "abc ")
+
+ assertMatch(Regex("^\\H "), "x ")
+ assertNoMatch(Regex("^\\H "), " x")
+ assertMatch(Regex(" \\H$"), " x")
+ assertNoMatch(Regex(" \\H$"), "x ")
+ assertMatch(Regex("abc\\B\\H"), "abcd")
+ assertMatch(Regex("abc\\B\\H"), "abcd")
+ }
+
+ @Test fun testUnicodeLinebreakChar() {
+ // From Java 8+ `Pattern` doc:
+ // \R - Any Unicode linebreak sequence, is equivalent to \u000D\u000A|[\u000A\u000B\u000C\u000D\u0085\u2028\u2029]
+
+ val regex = Regex("\\R")
+ val linebreaks = listOf("\u000D\u000A", "\u000A", "\u000B", "\u000C", "\u000D", "\u0085", "\u2028", "\u2029")
+ val nonLinebreaks = listOf("1", "K", " ", "${Char.MIN_HIGH_SURROGATE}${Char.MIN_LOW_SURROGATE}")
+
+ // Smoke tests
+ linebreaks.forEach {
+ assertMatch(regex, it)
+
+ assertFind(regex, "prefix$it", 6 until (6 + it.length))
+ assertFind(regex, "prefix${it}suffix", 6 until (6 + it.length))
+ assertFind(regex, "${it}suffix", 0 until it.length)
+ }
+
+ nonLinebreaks.forEach {
+ assertNoMatch(regex, it)
+ }
+
+ // Test that \r\n matches both \R and \R\R
+ assertMatch(Regex("\\R\\R\\R"), "\r\r\n\n")
+ assertMatch(Regex("\\R\\R\\R\\R"), "\r\r\n\n")
+
+ // Test that \R can be a part of a more complex regex.
+ val neighbors = listOf(
+ // regex expression to its match.
+ "x" to "x",
+ " " to " ",
+ "\\n" to "\n",
+ "\\\\" to "\\",
+ "[abc]" to "a",
+ "." to "x",
+ "\\d" to "5",
+ "\\D" to "x",
+ "\\s" to " ",
+ "\\S" to "x",
+ "\\w" to "x",
+ "\\W" to "|",
+ "\\p{Alnum}" to "x",
+ "\\p{Space}" to " ",
+ "\\p{Blank}" to " ",
+ "\\p{Sc}" to "$"
+ ).flatMap { (expression, match) ->
+ listOf(
+ expression to match,
+ "$expression+" to match.repeat(2),
+ "$expression{2,4}" to match.repeat(3)
+ )
+ }
+
+ neighbors.forEach { (neighbor, match) ->
+ assertMatch(Regex("$neighbor\\R"), "$match\n")
+ assertMatch(Regex("$neighbor\\R$neighbor"), "$match\n$match")
+ assertMatch(Regex("\\R$neighbor"), "\n$match")
+
+ assertMatch(Regex("$neighbor\\R+"), "$match\n\n")
+ assertMatch(Regex("$neighbor\\R+$neighbor"), "$match\n\n$match")
+ assertMatch(Regex("\\R+$neighbor"), "\n\n$match")
+
+ assertMatch(Regex("$neighbor\\R{2,4}"), "$match\n\n\n")
+ assertMatch(Regex("$neighbor\\R{2,4}$neighbor"), "$match\n\n\n$match")
+ assertMatch(Regex("\\R{2,4}$neighbor"), "\n\n\n$match")
+
+ assertMatch(Regex("$neighbor\\R*"), match)
+ assertMatch(Regex("$neighbor\\R*$neighbor"), "$match$match")
+ assertMatch(Regex("\\R*$neighbor"), match)
+ }
+
+ // Backrefs and `or` expression
+ assertMatch(Regex("(\\R)x\\1"), "\nx\n")
+ assertMatch(Regex("\\R|x"), "\n")
+ assertMatch(Regex("\\R|x"), "x")
+
+ // Boundaries: ^, $, \b, \B
+ assertMatch(Regex("^\\Rx"), "\nx")
+ assertNoMatch(Regex("^\\Rx"), "x\n")
+ assertMatch(Regex("x\\R$"), "x\n")
+ assertNoMatch(Regex("x\\R$"), "\nx")
+ assertMatch(Regex("abc\\b\\R"), "abc\n")
+ assertMatch(Regex("abc\\b\\R"), "abc\n")
+ }
}