最近由于项目需求,尝试各种模型实现多分类。
# coding=gbk
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
data = pd.read_csv('data/sample.csv', header=0, names=['jing', 'ceng', 'y', 'DEPTH', 'AC', 'SP', 'GR', 'CAL', 'RT'],
encoding='utf-8', low_memory=False)[1:]
categorical_columns = ['jing', 'ceng', 'y']
for f in categorical_columns:
data[f] = data[f].map(dict(zip(data[f].unique(), range(0, data[f].nunique()))))
# shuffle(data)
target = data['y']
del data['y']
# data:[800000,8] target:[800000,1]
# 将data转为[batch_size, sequence_length, input_dimension] shape, 最后一维对应正常NLP任务的字 embedding 维度,这里取1
# 而target转为[batch_size, output_dimension] shape, 这里直接用pd.get_dummy转换
train_input, train_outp