import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv('./earphone_sentiment.csv',encoding='utf-8')
df.head()
import jieba
words=df.content.apply(lambda t:list(jieba.cut(t)))
#去掉标点,英文
def clean_text(value):
import re
if value:
text="".join(re.findall(r"[\u4e00-\u9fff]+", value))
return text if len(text)>0 else None
else:
return