话不多说,直接上代码
import html
import re
import emoji
content = "你的富文本字符串"
htmlContent = html.unescape(content)
regex = r'\>.*?\<'
listAll = re.findall(regex, htmlContent)
listToSave = [i[1:-1] for i in listAll if i != '><' and len(i) > 2]
strContent = '\n'.join(listToSave)
# 过滤掉表情
strContent = filter_emoji(strContent)
if strContent != "":
listRichTestInfo.append((ticketId, fieldKey, '', strContent))
filter_emoji 函数的实现方式,为了把表情字符替换为空
# filter_emoji 函数的实现方式,为了把表情字符替换为空
def filter_emoji(desstr, restr=''):
# 过滤表情
try:
co = re.compile(u'[\U00010000-\U0010ffff]')
except re.error:
co = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
return co.sub(restr, desstr)
其实代码就这么多,欢迎大家来沟通交流