import numpy as np
from sklearn.preprocessing import OneHotEncoder
from numpy import *
import MySQLdb
conn = MySQLdb.connect(host='localhost', user='root', passwd='Zhouy2008', port = 3306)
cursor = conn.cursor()
# 选择该数据库
conn.select_db('ml_test')
# 查询one_hot数据表中的数据
sql = "select * from one_hot"
cursor.execute(sql)
data = cursor.fetchall()
print 'now, transfrom data type from tuple to ndarray'
print 'data_arr:\n'
data_arr = np.array(data)
for i in data_arr:
print i
# one-hot编码过程
enc = OneHotEncoder()
enc.fit(data_arr)
data_hoted = enc.transform(data_arr).toarray()
print
print 'enc.transform(all).toarray():\n', enc.transform(data_arr).toarray()
# 如何将data_hoted(数组形式)写入文本
num_rows, num_cols = shape(data_hoted)
print '写入文本'
f = open("G:/one_hoted.txt", "w")
for i in range(num_rows):
print >>f, data_hoted[i,:]
f.close()
print '写入文本完成'
One-hot编码:Python sklearn CTR实验
最新推荐文章于 2024-06-26 11:35:37 发布