网上找了一些例子完善了一下。
# -*- coding: UTF-8 -*-
import re
# 测试文本
test = u'<h1>hello 你好, world 世界</h1>'
# 中文匹配正则
chinese_pattern = u'[\u4e00-\u9fa5]+'
says = re.findall(chinese_pattern, test)
# 输出提取的内容
hi = ''
for say in says:
print(say)
hi += say + ','
hi = hi.strip(',')
# 打印结果:你好,世界
print(hi)
def Find(string):
# findall() 查找匹配正则表达式的字符串
url = re.findall('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', string)
return url
string = 'Runoob 的网页地址为:https://www.runoob.com,Google 的网页地址为:https://www.google.com'
print("Urls: ", Find(string))