转载请注明出处:http://blog.csdn.net/zxsean
因为项目需求,现在需要在c#中使用python的正则,最开始采用的方法是ironpython.
但是在ios上面编译无法通过.好在ironpython是一个开源项目,于是拿到代码开始研究.
因为我需要的功能很简单,所以最后的代码也只是判断是否传入的字符串匹配我传入的python正则表达式.
解析部分代码直接使用:
/* ****************************************************************************
*
* Copyright (c) Microsoft Corporation.
*
* This source code is subject to terms and conditions of the Microsoft Public
* License. A copy of the license can be found in the License.html file at the
* root of this distribution. If you cannot locate the Microsoft Public
* License, please send an email to dlr@microsoft.com. By using this source
* code in any fashion, you are agreeing to be bound by the terms of the
* Microsoft Public License.
*
* You must not remove this notice, or any other, from this software.
*
* ***************************************************************************/
using System;
using System.Text;
using System.Text.RegularExpressions;
/// <summary>
/// python正则解析
/// By ZeaLotSean
/// </summary>
public static class PythonRegex
{
#region CONSTANTS
// short forms
//public static object I = 0x02;
//public static object L = 0x04;
//public static object M = 0x08;
//public static object S = 0x10;
//public static object U = 0x20;
//public static object X = 0x40;
// long forms
public static object IGNORECASE = 0x02;
public static object LOCALE = 0x04;
public static object MULTILINE = 0x08;
public static object DOTALL = 0x10;
public static object UNICODE = 0x20;
public static object VERBOSE = 0x40;
#endregion
/// <summary>
/// 编译一个正则表达式
/// </summary>
/// <param name="_pattern"></param>
/// <returns></returns>
public static Python_Pattern Compile(string _pattern)
{
return new Python_Pattern(_pattern, 0, true);
}
public static bool isMatched(string _pattern, string _str)
{
return isMatched(_pattern, _str, 0);
}
public static bool isMatched(string _pattern, string _str, int flags)
{
return new Python_Pattern(_pattern, flags).isMatched(_str);
}
/// <summary>
/// Compiled reg-ex pattern
/// </summary>
public class Python_Pattern
{
internal Regex m_re;
internal ParsedRegex m_pre;
public Python_Pattern(string pattern)
: this(pattern, 0)
{
}
public Python_Pattern(string pattern, int flags)
: this(pattern, flags, false)
{
}
public Python_Pattern(string pattern, int flags, bool compiled)
{
m_pre = PreParseRegex(pattern);
RegexOptions opts = FlagsToOption(flags);
this.m_re = new Regex(m_pre.Pattern, opts | (compiled ? RegexOptions.Compiled : RegexOptions.None));
}
/// <summary>
/// 传入字符串查找是否匹配正则表达式
/// </summary>
/// <param name="_str"></param>
/// <returns></returns>
public bool isMatched(string _str)
{
return m_re.Match(_str).Success;
}
public string Pattern
{
get
{
return m_pre.UserPattern;
}
}
}
private static RegexOptions FlagsToOption(int flags)
{
RegexOptions opts = RegexOptions.None;
if ((flags & (int)IGNORECASE) != 0) opts |= RegexOptions.IgnoreCase;
if ((flags & (int)MULTILINE) != 0) opts |= RegexOptions.Multiline;
if (((flags & (int)LOCALE)) == 0) opts &= (~RegexOptions.CultureInvariant);
if ((flags & (int)DOTALL) != 0) opts |= RegexOptions.Singleline;
if ((flags & (int)VERBOSE) != 0) opts |= RegexOptions.IgnorePatternWhitespace;
return opts;
}
internal class ParsedRegex
{
public ParsedRegex(string pattern)
{
this.UserPattern = pattern;
}
public string UserPattern;
public string Pattern;
public RegexOptions Options = RegexOptions.CultureInvariant;
}
/// <summary>
/// Preparses a regular expression text returning a ParsedRegex class
/// that can be used for further regular expressions.
/// 传入一个python正则表达式,返回一个c#可用格式
/// </summary>
private static ParsedRegex PreParseRegex(string pattern)
{
ParsedRegex res = new ParsedRegex(pattern);
//string newPattern;
int cur = 0, nameIndex;
int curGroup = 0;
bool containsNamedGroup = false;
for (; ; )
{
nameIndex = pattern.IndexOf("(", cur);
if (nameIndex > 0 && pattern[nameIndex - 1] == '\\')
{
int curIndex = nameIndex - 2;
int backslashCount = 1;
while (curIndex >= 0 && pattern[curIndex] == '\\')
{
backslashCount++;
curIndex--;
}
// odd number of back slashes, this is an optional
// paren that we should ignore.
if ((backslashCount & 0x01) != 0)
{
cur++;
continue;
}
}
if (nameIndex == -1) break;
if (nameIndex == pattern.Length - 1) break;
switch (pattern[++nameIndex])
{
case '?':
// extension syntax
if (nameIndex == pattern.Length - 1)
{
return null;
}
switch (pattern[++nameIndex])
{
case 'P':
// named regex, .NET doesn't expect the P so we'll remove it;
// also, once we see a named group i.e. ?P then we need to start artificially
// naming all unnamed groups from then on---this is to get around the fact that
// the CLR RegEx support orders all the unnamed groups before all the named
// groups, even if the named groups are before the unnamed ones in the pattern;
// the artificial naming preserves the order of the groups and thus the order of
// the matches
if (nameIndex + 1 < pattern.Length && pattern[nameIndex + 1] == '=')
{
// match whatever was previously matched by the named group
// remove the (?P=
pattern = pattern.Remove(nameIndex - 2, 4);
pattern = pattern.Insert(nameIndex - 2, "\\\\k<");
int tmpIndex = nameIndex;
while (tmpIndex < pattern.Length && pattern[tmpIndex] != ')')
tmpIndex++;
if (tmpIndex == pattern.Length)
{
return null;
}
pattern = pattern.Substring(0, tmpIndex) + ">" + pattern.Substring(tmpIndex + 1);
}
else
{
containsNamedGroup = true;
pattern = pattern.Remove(nameIndex, 1);
}
break;
case 'i': res.Options |= RegexOptions.IgnoreCase; break;
case 'L': res.Options &= ~(RegexOptions.CultureInvariant); break;
case 'm': res.Options |= RegexOptions.Multiline; break;
case 's': res.Options |= RegexOptions.Singleline; break;
case 'u': break;
case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break;
case ':': break; // non-capturing
case '=': break; // look ahead assertion
case '<': break; // positive look behind assertion
case '!': break; // negative look ahead assertion
case '#': break; // inline comment
case '(': // yes/no if group exists, we don't support this
default:
{
return null;
}
}
break;
default:
// just another group
curGroup++;
if (containsNamedGroup)
{
// need to name this unnamed group
pattern = pattern.Insert(nameIndex, "?<Named" + GetRandomString() + ">");
}
break;
}
cur = nameIndex;
}
cur = 0;
for (; ; )
{
nameIndex = pattern.IndexOf('\\', cur);
if (nameIndex == -1 || nameIndex == pattern.Length - 1) break;
char curChar = pattern[++nameIndex];
switch (curChar)
{
case 'x':
case 'u':
case 'a':
case 'b':
case 'e':
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
case 'c':
case 's':
case 'W':
case 'w':
case 'p':
case 'P':
case 'S':
case 'd':
case 'D':
case 'Z':
// known escape sequences, leave escaped.
break;
case '\\':
// escaping a \\
cur += 2;
break;
default:
System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar);
switch (charClass)
{
// recognized word characters, always unescape.
case System.Globalization.UnicodeCategory.ModifierLetter:
case System.Globalization.UnicodeCategory.LowercaseLetter:
case System.Globalization.UnicodeCategory.UppercaseLetter:
case System.Globalization.UnicodeCategory.TitlecaseLetter:
case System.Globalization.UnicodeCategory.OtherLetter:
case System.Globalization.UnicodeCategory.LetterNumber:
case System.Globalization.UnicodeCategory.OtherNumber:
case System.Globalization.UnicodeCategory.ConnectorPunctuation:
pattern = pattern.Remove(nameIndex - 1, 1);
break;
case System.Globalization.UnicodeCategory.DecimalDigitNumber:
// actually don't want to unescape '\1', '\2' etc. which are references to groups
break;
}
break;
}
cur++;
}
res.Pattern = pattern;
return res;
}
static Random r = new Random(DateTime.Now.Millisecond);
private static string GetRandomString()
{
return r.Next(Int32.MaxValue / 2, Int32.MaxValue).ToString();
}
}