-- coding: utf-8 --
import jieba
with open('./nlp_test0.txt') as f:
document = f.read()
document_decode = document.decode('GBK')
document_cut = jieba.cut(document_decode)
#print ' '.join(jieba_cut) //如果打印结果,则分词效果消失,后面的result无法显示
result = ' '.join(document_cut)
result = result.encode('utf-8')
with open('./nlp_test1.txt', 'w') as f2:
f2.write(result)
f.close()
f2.close()
UnicodeDecodeError Traceback (most recent call last)
in
4
5 with open('./nlp_test0.txt') as f:
----> 6 document = f.read()
7
8 document_decode = document.decode('GBK')
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 32: illegal multibyte sequence