python-通用数据分析-新闻数据
利用结巴分词器分析新闻数据
代码
import numpy as np
import pandas as pd
import jieba
# jieba 分词器
#读取新闻文本数据转换为列表
df_news = pd.read_table('val.txt',names=['category','theme','URL','content'],encoding='utf-8')
df_news = df_news.dropna()
print(df_news.head())
print(df_news.shape)
content = df_news['content'].values.tolist()
print(content[1000])
#进行分词
content_s = []
for line in content