import pandas as pd
import jieba
import numpy as np
from sklearn import feature_extraction
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import svm
from sklearn.metrics import precision_recall_fscore_support
from sklearn.svm import SVC
def read_data(url):
data = pd.read_csv(url,encoding='utf-8')
data.fillna("null",inplace=True) #使用inplace参数会改掉本身
return data
数据清洗
defclean_text(text):
text =str(text)
text = text.replace('\n','')
text = text.replace('<br />',' ')
text = text<