data_财经.csv
data_房产.csv
data_家居.csv
# -*- coding: utf-8 -*-
import pandas as pd
import os
from sklearn.utils import shuffle
def get_labeled_data():
data_1 = pd.read_csv('data_财经.csv')[:5]
data_2 = pd.read_csv('data_房产.csv')[:5]
data_3 = pd.read_csv('data_家居.csv')[:5]
frames = [data_1, data_2, data_3]
data = pd.concat(frames)
data = shuffle(data) # 打乱
data['id'] = range(len(data)) # id列
# 去空格 换行符
data['content'] = data['content'].str.replace('n', '').replace(' ', '').replace('t', '').replace('r', '')
data.set_index('id', inplace=True) # 设index
data.to_csv('data1111.csv')
get_labeled_data()
data1111.csv