在正则表达式里,$本来代表字符串的结尾,但是如果使用了RegexOptions.Multiline选项的话,它的含意就变成了任意行的结尾.
我们知道,在MS平台上,行与行之间的分隔符是\r\n,先回车,再换行,那么这个$到底是在\r的前边,还是在\r后后面,还是\n的后面?再者,如果字符串里包含了没有连续成对出现的\r和\n,那么这些字符的前后能不能匹配$?
using
System.Text.RegularExpressions;
using NUnit.Framework;
[TestFixture]
public class RegexTest
{
private readonly string text1 = @" String 1
String 2
String 3 " ;
private readonly string text2 = @" String 1
String 2
String 3
" ;
private readonly string text3 = " String 1\rString 2\rString 3\r " ;
private readonly string text4 = " String 1\nString 2\nString 3\n " ;
[Test]
public void BeforeReturn()
{
Regex r = new Regex( @" ^String \d+$ " , RegexOptions.Multiline);
Assert.AreEqual( 1 , MatchCount(r, text1));
Assert.AreEqual( " String 3 " , r.Match(text1).Value);
Assert.AreEqual( 0 , MatchCount(r, text2));
Assert.AreEqual( 0 , MatchCount(r, text3));
Assert.AreEqual( 3 , MatchCount(r, text4));
}
[Test]
public void BeforeNextLine()
{
Regex r = new Regex( @" ^String \d+\r$ " , RegexOptions.Multiline);
// 由于最后一行结尾没有\r,所以最后一行未被匹配
Assert.AreEqual( 2 , MatchCount(r, text1));
Assert.AreEqual( 3 , MatchCount(r, text2));
Assert.AreEqual( 0 , MatchCount(r, text3));
Assert.AreEqual( 0 , MatchCount(r, text4));
}
[Test]
public void AfterNextLine()
{
Regex r = new Regex( @" ^String \d+\r\n$ " , RegexOptions.Multiline);
Assert.AreEqual( 0 , MatchCount(r, text1));
Assert.AreEqual( 1 , MatchCount(r, text2));
// 注意,这里的$实际上是匹配到了一下个空行的行尾
Assert.AreEqual( " String 3\r\n " , r.Match(text2).Value);
Assert.AreEqual( 0 , MatchCount(r, text3));
Assert.AreEqual( 0 , MatchCount(r, text4));
}
[Test]
public void FirstCharNextLine()
{
string s = " \nabc " ;
Regex r = new Regex( " $ " , RegexOptions.Multiline);
// 即使\n是第一个字符,它的前面依然能匹配$
Assert.AreEqual( 2 , MatchCount(r, s));
}
[Test]
public void LastCharNextLine()
{
string s = " abc\n " ;
Regex r = new Regex( " $ " , RegexOptions.Multiline);
// 即使最后一个字符是\n,它的后面依然能匹配$
Assert.AreEqual( 2 , MatchCount(r, s));
}
[Test]
public void OnlyNextLine()
{
string s = " \n " ;
Regex r = new Regex( " $ " , RegexOptions.Multiline);
// \n之前之后各有一个
Assert.AreEqual( 2 , MatchCount(r, s));
}
[Test]
public void Nothing()
{
Regex r = new Regex( " $ " , RegexOptions.Multiline);
Assert.AreEqual( 1 , MatchCount(r, string .Empty));
}
int MatchCount(Regex r, string s)
{
return r.Matches(s).Count;
}
}
using NUnit.Framework;
[TestFixture]
public class RegexTest
{
private readonly string text1 = @" String 1
String 2
String 3 " ;
private readonly string text2 = @" String 1
String 2
String 3
" ;
private readonly string text3 = " String 1\rString 2\rString 3\r " ;
private readonly string text4 = " String 1\nString 2\nString 3\n " ;
[Test]
public void BeforeReturn()
{
Regex r = new Regex( @" ^String \d+$ " , RegexOptions.Multiline);
Assert.AreEqual( 1 , MatchCount(r, text1));
Assert.AreEqual( " String 3 " , r.Match(text1).Value);
Assert.AreEqual( 0 , MatchCount(r, text2));
Assert.AreEqual( 0 , MatchCount(r, text3));
Assert.AreEqual( 3 , MatchCount(r, text4));
}
[Test]
public void BeforeNextLine()
{
Regex r = new Regex( @" ^String \d+\r$ " , RegexOptions.Multiline);
// 由于最后一行结尾没有\r,所以最后一行未被匹配
Assert.AreEqual( 2 , MatchCount(r, text1));
Assert.AreEqual( 3 , MatchCount(r, text2));
Assert.AreEqual( 0 , MatchCount(r, text3));
Assert.AreEqual( 0 , MatchCount(r, text4));
}
[Test]
public void AfterNextLine()
{
Regex r = new Regex( @" ^String \d+\r\n$ " , RegexOptions.Multiline);
Assert.AreEqual( 0 , MatchCount(r, text1));
Assert.AreEqual( 1 , MatchCount(r, text2));
// 注意,这里的$实际上是匹配到了一下个空行的行尾
Assert.AreEqual( " String 3\r\n " , r.Match(text2).Value);
Assert.AreEqual( 0 , MatchCount(r, text3));
Assert.AreEqual( 0 , MatchCount(r, text4));
}
[Test]
public void FirstCharNextLine()
{
string s = " \nabc " ;
Regex r = new Regex( " $ " , RegexOptions.Multiline);
// 即使\n是第一个字符,它的前面依然能匹配$
Assert.AreEqual( 2 , MatchCount(r, s));
}
[Test]
public void LastCharNextLine()
{
string s = " abc\n " ;
Regex r = new Regex( " $ " , RegexOptions.Multiline);
// 即使最后一个字符是\n,它的后面依然能匹配$
Assert.AreEqual( 2 , MatchCount(r, s));
}
[Test]
public void OnlyNextLine()
{
string s = " \n " ;
Regex r = new Regex( " $ " , RegexOptions.Multiline);
// \n之前之后各有一个
Assert.AreEqual( 2 , MatchCount(r, s));
}
[Test]
public void Nothing()
{
Regex r = new Regex( " $ " , RegexOptions.Multiline);
Assert.AreEqual( 1 , MatchCount(r, string .Empty));
}
int MatchCount(Regex r, string s)
{
return r.Matches(s).Count;
}
}
结论是: $匹配\n之前的位置以及字符串结束前的位置.
而在对RegexTester进行调试时,我发现RichTextBox对\r\n的处理十分古怪--对它的Text属性使用文本可视化工具查看,结果是有换行;但是对其调用Contains("\n"),返回的结果居然是false!时间有限,出现这种现象的原因留待以后深究.目前至少证实了, 如果牵扯到换行的话,最好还是直接用代码对正则表达式进行测试.