1、实际转中文函数(一个字一个字转)
def str_to_chinese(var):
start1 = var.find("\\x")
str1 = var[start1 + 2:start1 + 4]
start2 = var[start1 + 4:].find("\\x") + start1 + 4
str2 = var[start2 + 2:start2 + 4]
start3 = var[start2 + 4:].find("\\x") + start2 + 4
str3 = var[start3 + 2:start3 + 4]
if '\\xf0' in var:
start4 = var[start3 + 4:].find("\\x") + start3 + 4
str4 = var[start4 + 2:start4 + 4]
if start1 > -1 and start2 > -1 and start3 > -1 and start4 > -1:
str_all = str1 + str2 + str3 + str4
str_all = str_all.decode('hex')
str_re = var[start1:start4 + 4]
var = var.replace(str_re, str_all)
return var
else:
if start1 > -1 and start2 > -1 and start3 > -1:
str_all = str1 + str2 + str3
str_all = str_all.decode('hex')
str_re = var[start1:start3 + 4]
var = var.replace(str_re, str_all)
return var
2、处理乱码字符串(截取一个一个中文或emoil表情,合成一个整体返回)
def handle_str(content):
str1 = ''
while content.find("\\x") != -1:
index = content.find("\\x")
head = content[:index]
str1 += head
content = content[index:]
if re.match(r'\\xf0', content):
# emoil表情为\\xf0开头,且包含四个
chinese = str_to_chinese(content[:16])
str1 += chinese
content = content[16:]
else:
# 正常只包括三个
chinese = str_to_chinese(content[:12])
str1 += chinese
content = content[12:]
str1 += content
return str1
3、emoil表情存入MySQL数据库
修改数据表的编码规则:
ALTER TABLE 表名 CONVERT TO CHARACTER SET utf8mb4;
4、更新(以上会存在用户\x报错的情形)
a = "hhhh \\xf0\\x9f\\x99\\x82\\\\xa\\xe5\\xa5\\xbd\\xf0\\x9f\\x98\\x80"
a = bytes(a)
b = a.decode('unicode_escape').encode('latin1').decode('utf-8')
print(b)