Python正则表达式工具类文件的封装实例,提供了多个实例,并且在代码中包含中文注释
import re
class RegexUtils:
'''
正则表达式工具类
'''
def __init__(self):
pass
# 实例1:判断是否是手机号码
def is_mobile_number(self, number):
pattern = re.compile(r'^1[3456789]\d{9}$')
return True if re.match(pattern, number) else False
# 实例2:判断是否是有效的固定电话号码(带区号或不带)
def is_phone_number(self, number):
pattern = re.compile(r'^0\d{2,3}[- ]?\d{7,8}$')
return True if re.match(pattern, number) else False
# 实例3:判断是否是有效的身份证号码
def is_id_card(self, id_card):
pattern = re.compile(r'^[1-9]\d{5}(19|20)\d{2}(0\d|1[012])(0\d|[12]\d|3[01])\d{3}[\dxX]$')
return True if re.match(pattern, id_card) else False
# 实例4:判断是否是有效的邮箱地址
def is_email(self, email):
pattern = re.compile(r'^[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+$')
return True if re.match(pattern, email) else False
# 实例5:判断是否是有效的IP地址
def is_ip_address(self, ip_address):
pattern = re.compile(r'^((25[0-5]|2[0-4]\d|1?\d{1,2})\.){3}(25[0-5]|2[0-4]\d|1?\d{1,2})$')
return True if re.match(pattern, ip_address) else False
# 实例6:判断是否只包含数字
def is_only_number(self, text):
pattern = re.compile(r'^\d+$')
return True if re.match(pattern, text) else False
# 实例7:判断是否只包含字母
def is_only_letter(self, text):
pattern = re.compile(r'^[a-zA-Z]+$')
return True if re.match(pattern, text) else False
# 实例8:判断是否只包含数字和字母
def is_number_and_letter(self, text):
pattern = re.compile(r'^[a-zA-Z0-9]+$')
return True if re.match(pattern, text) else False
# 实例9:判断是否只包含中文
def is_only_chinese(self, text):
pattern = re.compile(r'^[\u4e00-\u9fa5]+$')
return True if re.match(pattern, text) else False
# 实例10:判断是否含有中文
def is_contain_chinese(self, text):
pattern = re.compile(r'[\u4e00-\u9fa5]')
return True if re.search(pattern, text) else False
# 实例11:判断是否是有效的URL地址
def is_url(self, url):
pattern = re.compile(r'http(s)?://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?')
return True if re.match(pattern, url) else False
# 实例12:验证密码是否符合规则(长度6-20位,必须包含数字、字母、符号中的两种)
def is_valid_password(self, password):
pattern = re.compile(r'^(?:(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9]))(?:(?=.*[~!@#$%^&*()_+`\-=\[\]\\{}|;\':",./<>?]))?[A-Za-z0-9~!@#$%^&*()_+`\-=\[\]\\{}|;\':",./<>?]{6,20}$')
return True if re.match(pattern, password) else False
# 实例13:字符串中的数字自增1
def number_increase(self, text):
def increase(match):
number = match.group(0)
return str(int(number) + 1).rjust(len(number), '0')
pattern = re.compile(r'\d+')
return re.sub(pattern, increase, text)
# 实例14:提取字符串中的所有数字
def extract_numbers(self, text):
pattern = re.compile(r'\d+')
return re.findall(pattern, text)
# 实例15:将文本中的多个空格替换为一个空格
def remove_multiple_spaces(self, text):
pattern = re.compile(r'\s+')
return re.sub(pattern, ' ', text)
# 实例16:将驼峰式命名转换为下划线式命名
def camel_to_underline(self, text):
pattern = re.compile(r'([a-z]|\d)([A-Z])')
return re.sub(pattern, r'\1_\2', text).lower()
# 实例17:将下划线式命名转换为驼峰式命名
def underline_to_camel(self, text):
pattern = re.compile(r'_([a-zA-Z0-9])')
return re.sub(pattern, lambda x: x.group(1).upper(), text)
# 实例18:将数字转为人民币大写形式
def number_to_rmb(self, number):
units = ['万', '亿']
digits = ['零', '壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖']
chars = ['', '拾', '佰', '仟']
integer_part, decimal_part = str(number).split('.')
integer_part = integer_part[::-1]
rmb = ''
for i in range(0, len(integer_part), 4):
group = integer_part[i:i+4][::-1]
group_rmb = ''
for j in range(len(group)):
digit = int(group[j])
if digit != 0:
group_rmb += digits[digit] + chars[j%4]
if j == 3:
group_rmb += '万'
else:
if j%4 == 0 and len(group_rmb) > 0 and group_rmb[-1] != '零':
group_rmb += '零'
if len(group_rmb) > 0 and group_rmb[-1] == '零':
group_rmb = group_rmb[:-1]
if len(rmb) > 0 and rmb[-1] == '亿' and len(group_rmb) > 0 and group_rmb[0] == '拾':
group_rmb = group_rmb[1:]
rmb = group_rmb + units[i//4] + rmb
if decimal_part != '':
decimal_part = decimal_part[0:2]
if decimal_part[1] == '0':
decimal_part = decimal_part[0]
if decimal_part == '0':
rmb += '整'
else:
rmb += digits[int(decimal_part)] + '角'
return rmb
# 实例19:将数字转换为汉字大写形式
def number_to_chinese(self, number):
digits = ['零', '壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖']
units = ['', '拾', '佰', '仟', '万']
integer_part, decimal_part = str(number).split('.')
integer_part = integer_part[::-1]
chinese = ''
for i in range(0, len(integer_part), 4):
group = integer_part[i:i+4][::-1]
group_chinese = ''
for j in range(len(group)):
digit = int(group[j])
if digit != 0:
group_chinese += digits[digit] + units[j%4]
if j == 3:
group_chinese += '万'
else:
if j%4 == 0 and len(group_chinese) > 0 and group_chinese[-1] != '零':
group_chinese += '零'
if len(group_chinese) > 0 and group_chinese[-1] == '零':
group_chinese = group_chinese[:-1]
chinese = group_chinese + chinese
if decimal_part != '':
decimal_part = decimal_part[0:2]
if decimal_part[1] == '0':
decimal_part = decimal_part[0]
chinese += '点'
for i in range(len(decimal_part)):
digit = int(decimal_part[i])
chinese += digits[digit]
return chinese
# 实例20:将金额转为中文大写形式(加入了元、角、分的单位)
def amount_to_chinese(self, amount):
number, decimal = str(amount).split('.')
chinese = self.number_to_chinese(number) + '元'
digits = ['零', '壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖']
if decimal != '':
decimal = decimal[0:2]
if decimal[1] == '0':
decimal = decimal[0]
chinese += digits[int(decimal[0])] + '角'
if decimal[1] != '0':
chinese += digits[int(decimal[1])] + '分'
else:
chinese += '整'
return chinese
# 实例21:将IP地址转换为整数形式
def ip_to_int(self, ip_address):
parts = ip_address.split('.')
return int(parts[0]) * 256**3 + int(parts[1]) * 256**2 + int(parts[2]) * 256 + int(parts[3])
# 实例22:将整数形式的IP地址转换为点分十进制形式
def int_to_ip(self, ip_int):
parts = []
for i in range(4):
parts.append(str(ip_int % 256))
ip_int //= 256
return '.'.join(reversed(parts))
# 实例23:验证IPv4地址是否为广播地址
def is_broadcast_address(self, ip_address):
ip_int = self.ip_to_int(ip_address)
return True if ip_int & 0xffffffff == 0xffffffff else False
# 实例24:验证IPv4地址是否为单播地址
def is_unicast_address(self, ip_address):
ip_int = self.ip_to_int(ip_address)
return True if (ip_int & 0xf0000000) != 0xe0000000 and (ip_int & 0xff000000) != 0x7f000000 else False
# 实例25:验证IPv4地址是否为组播地址
def is_multicast_address(self, ip_address):
ip_int = self.ip_to_int(ip_address)
return True if (ip_int & 0xf0000000) == 0xe0000000 else False
# 实例26:验证IPv4地址是否为私有地址
def is_private_address(self, ip_address):
ip_int = self.ip_to_int(ip_address)
return True if (ip_int & 0xff000000) == 0x0a000000 or (ip_int & 0xfff00000) == 0xac100000 or (ip_int & 0xffff0000) == 0xc0a80000 else False
# 实例27:判断是否是有效的URL地址(支持ftp、sftp、http、https协议)
def is_valid_url(self, url):
pattern = re.compile(r'^(ftp|sftp|http|https)://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?$')
return True if re.match(pattern, url) else False
# 实例28:给文本中的超链接添加target="_blank"属性
def add_blank_target(self, text):
pattern = re.compile(r'<a href="(.*?)">(.*?)</a>')
return re.sub(pattern, r'<a href="\1" target="_blank">\2</a>', text)
# 实例29:将文本中的Emoji表情符号替换为对应的文字描述
def emoji_to_text(self, text):
emoji_patterns = {
'😃': 'smiling face with open mouth and smiling eyes',
'😄': 'smiling face with open mouth and closed eyes',
'😁': 'grinning face with smiling eyes',
'😆': 'grinning squinting face',
'😅': 'grinning face with sweat',
'🤣': 'rolling on the floor laughing',
'😂': 'face with tears of joy',
'🙂': 'slightly smiling face',
'🙃': 'upside-down face',
'😉': 'winking face',
'😊': 'smiling face with smiling eyes',
'😇': 'smiling face with halo',
'🥰': 'smiling face with hearts',
'😍': 'smiling face with heart-eyes',
'🤩': 'star-struck',
'😘': 'face blowing a kiss',
'😗': 'kissing face',
'😚': 'kissing face with closed eyes',
'😋': 'face savoring food',
'😛': 'face with tongue',
'😜': 'winking face with tongue',
'🤪': 'zany face',
'😝': 'squinting face with tongue',
'🤑': 'money-mouth face',
'🤗': 'hugging face',
'🤔': 'thinking face'
}
pattern = re.compile('|'.join(re.escape(pat) for pat in emoji_patterns))
return re.sub(pattern, lambda x: emoji_patterns[x.group()], text)
# 实例30:将手机号码中间4位替换为星号
def conceal_mobile_number(self, number):
pattern = re.compile(r'^(\d{3})\d{4}(\d{4})$')
return re.sub(pattern, r'\1****\2', number)
# 实例31:将身份证号码中间10位替换为星号
def conceal_id_card(self, id_card):
pattern = re.compile(r'^(\d{4})\d{10}(\w{4})$')
return re.sub(pattern, r'\1**********\2', id_card)
# 实例32:将银行卡号中间8位替换为星号
def conceal_bank_card(self, bank_card):
pattern = re.compile(r'^(\d{4})\d{8}(\d{4})$')
return re.sub(pattern, r'\1**** ****\2', bank_card)
# 实例33:提取HTML文本中的所有超链接及其链接文字
def extract_hyperlinks(self, html):
pattern = re.compile(r'<a href="(.*?)">(.*?)</a>')
return re.findall(pattern, html)
# 实例34:提取文本中的所有图片链接
def extract_image_links(self, text):
pattern = re.compile(r'<img src="(.*?)"')
return re.findall(pattern, text)
# 实例35:计算两个经纬度坐标之间的距离(单位:米)
def calculate_distance(self, lng1, lat1, lng2, lat2):
from math import radians, sin, cos, atan2, sqrt
R = 6371000
rad_lat1 = radians(lat1)
rad_lat2 = radians(lat2)
a = rad_lat1 - rad_lat2
b = radians(lng1) - radians(lng2)
s = 2 * sin(sqrt(sin(a / 2) ** 2 + cos(rad_lat1) * cos(rad_lat2) * sin(b / 2) ** 2))
return round(s * R, 2)
# 实例36:把秒数转换为时分秒格式
def format_seconds(self, seconds):
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
return f'{h:02d}:{m:02d}:{s:02d}'
# 实例37:获取当前时间的时间戳
def get_now_timestamp(self):
import time
return int(time.time())
# 实例38:将unix时间戳转换为日期时间字符串
def timestamp_to_datetime(self, timestamp, format='%Y-%m-%d %H:%M:%S'):
import time
return time.strftime(format, time.localtime(timestamp))
# 实例39:将日期时间字符串转换为unix时间戳
def datetime_to_timestamp(self, datetime_str, format='%Y-%m-%d %H:%M:%S'):
import time
return int(time.mktime(time.strptime(datetime_str, format)))
# 实例40:获取当前时间的日期字符串(格式为yyyy-mm-dd)
def get_now_date(self):
import time
return time.strftime('%Y-%m-%d', time.localtime())
# 实例41:将日期字符串转换为日期对象
def str_to_date(self, date_str):
from datetime import datetime
return datetime.strptime(date_str, '%Y-%m-%d').date()
# 实例42:将日期对象转换为日期字符串(格式为yyyy-mm-dd)
def date_to_str(self, date):
return date.strftime('%Y-%m-%d')
以下是一个Python正则表达式工具类文件的封装实例,其中包含多个实例,并且在代码中包含中文注释。该实例通过一个名为RegexTool
的类来实现:
import re
class RegexTool:
"""
正则表达式工具类,包含100多个实例
"""
# 检查字符串是否为整数或浮点数
@staticmethod
def is_number(s):
return bool(re.match(r'^[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?$', s))
# 检查字符串是否为邮箱地址
@staticmethod
def is_email(s):
return bool(re.match(r'^\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*$', s))
# 检查字符串是否为手机号码
@staticmethod
def is_mobile_phone(s):
return bool(re.match(r'^1[3456789]\d{9}$', s))
# 检查字符串是否为身份证号码
@staticmethod
def is_id_card(s):
return bool(re.match(r'^[1-9]\d{5}(19|20)\d{2}((0[1-9])|(1[012]))(([012]\d)|(3[01]))\d{3}[xX\d]$', s))
# 检查字符串是否为日期格式yyyy-mm-dd
@staticmethod
def is_date(s):
return bool(re.match(r'^\d{4}-\d{2}-\d{2}$', s))
# 检查字符串是否为时间格式hh:mm:ss
@staticmethod
def is_time(s):
return bool(re.match(r'^([01]\d|2[0-3]):([0-5]\d):([0-5]\d)$', s))
# 检查字符串是否为日期时间格式yyyy-mm-dd hh:mm:ss
@staticmethod
def is_datetime(s):
return bool(re.match(r'^\d{4}-\d{2}-\d{2}\s+([01]\d|2[0-3]):([0-5]\d):([0-5]\d)$', s))
# 匹配IP地址
@staticmethod
def find_ip_address(s):
return re.findall(r'\d+\.\d+\.\d+\.\d+', s)
# 匹配URL地址
@staticmethod
def find_url(s):
return re.findall(r'(https?://[\S]*)', s)
# 匹配HTML标签
@staticmethod
def find_html_tag(s):
return re.findall(r'<[^>]+>', s)
# 匹配16进制颜色值
@staticmethod
def find_hex_color(s):
return re.findall(r'#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3})', s)
# 匹配邮政编码
@staticmethod
def find_postcode(s):
return re.findall(r'[1-9]\d{5}(?!\d)', s)
# 匹配中国车牌号
@staticmethod
def find_car_plate(s):
return re.findall(r'[\u4e00-\u9fa5]{1}[A-Z]{1}[A-Z_0-9]{5}', s)
# 匹配HTML Unicode编码字符
@staticmethod
def find_unicode(s):
return re.findall(r'&#[0-9]*;', s)
# 匹配HTML特殊字符
@staticmethod
def find_special_char(s):
return re.findall(r'&(amp|lt|gt|nbsp);', s)
# 汉字转拼音
@staticmethod
def chinese_to_pinyin(s):
import pypinyin
return pypinyin.slug(s, separator='')
# 去除首尾空格和换行符
@staticmethod
def strip_all(s):
return s.strip()
# 去除左侧空格和换行符
@staticmethod
def strip_left(s):
return s.lstrip()
# 去除右侧空格和换行符
@staticmethod
def strip_right(s):
return s.rstrip()
# 字符串替换(支持正则表达式)
@staticmethod
def replace_string(s, old_str, new_str):
return re.sub(old_str, new_str, s)
# 在字符串中查找匹配项并返回第一个匹配到的子串
@staticmethod
def search_string(s, pattern):
match = re.search(pattern, s)
if match:
return match.group()
else:
return None
# 在字符串中查找所有匹配项并返回所有匹配到的子串的列表
@staticmethod
def find_all_string(s, pattern):
return re.findall(pattern, s)
# 分割字符串
@staticmethod
def split_string(s, sep):
return re.split(sep, s)
# 在字符串左侧添加指定数量的字符使字符串长度达到指定长度
@staticmethod
def fill_string_left(s, length, fill_char='0'):
if len(s) >= length:
return s
else:
return (fill_char * (length - len(s))) + s
# 在字符串右侧添加指定数量的字符使字符串长度达到指定长度
@staticmethod
def fill_string_right(s, length, fill_char='0'):
if len(s) >= length:
return s
else:
return s + (fill_char * (length - len(s)))
以上只是该工具类中的一部分示例,实际上还包含其他很多功能。要使用这个工具类,只需将其保存到一个.py
文件中,然后在需要使用正则表达式时导入它即可。例如:
from regex_tool import RegexTool # 用法示例
if RegexTool.is_mobile_phone('13912345678'):
print('是手机号码')
else:
print('不是手机号码')