def unicode_it(html):
if isinstance(html, str):
r = chardet.detect(html)
if r["confidence"] >= 0.7:
encoding = r["encoding"]
if encoding.lower() == "gb2312":
charset = charset_pattern.findall(html)
if not charset or charset[0].lower().strip() == "gbk":
encoding = "gbk"
print encoding
s = html.decode(encoding)
return s
else:
return html
Python2编码判断Demo
最新推荐文章于 2022-09-08 16:14:25 发布