通过列索引值筛选内容的写法。
pcfr = pd.read_excel('hair.xlsx')
df = pcfr
# '=='后面替换品牌名字即可
m = df[df['product_title']=='remington ac2015 t|studio salon collection pearl ceramic hair dryer, deep purple']
情感分析函数。
def s_c_f(df):
# 去重
df.duplicated().value_counts()
# NaN remove
df['review_body'].str.split(expand = True)
# date format convert
'''经常报错,参考这个https://stackoverflow.com/questions/51367393/when-i-use-apply-function-in-pandas-it-shows-typeerror-must-be-string-not-fl
有时候改了好了,换一个表又不行了'''
# df['review_date'] = df.review_date.apply(lambda x : parser.parse(str(x)))
# df['review_date'] = df.review_date.apply(parser.parse)
df['review_date'] = pd.to_datetime(df['review_date'])
#将date设置为index
df=df.set_index('review_date')
## sentiment analysis
# func for polarity
def sentiment_calc(text):
try:
return TextBlob(text).sentiment.polarity
except:
return None