tensorflow63 《深度学习原理与TensorFlow实战》03 Hello TensorFlow

00 基本信息

《深度学习原理与TensorFlow实战》书中涉及到的代码主要来源于:
A:Tensorflow/TensorflowModel/TFLean的样例,
B:https://github.com/DeepVisionTeam/TensorFlowBook.git
https://github.com/DeepVisionTeam/TensorFlowBook.git的Titanic中提供了测试需要的数据。

#《深度学习原理与TensorFlow实战》03 Hello TensorFlow
# 书源码地址:https://github.com/DeepVisionTeam/TensorFlowBook.git
# 视频讲座地址:http://edu.csdn.net/course/detail/5222
# win10 Tensorflow-gpu1.2.0 python3.6.1
# CUDA v8.0 cudnn-8.0-windows10-x64-v5.1
# 本地代码位置:D:\git\DeepLearning\TensorFlowBook\Titanic
# 01_tensorflow_basic.py
# 02_tensorflow_advanced.py
# 03_skflow.py (运行有错误,未解决)
# 04_tflearn.py
# csv_to_tfrecords.py
# data_processing.py
# read_from_tfrecords.py

01 csv_to_tfrecords.py

# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/csv_to_tfrecords.py
#! -*- coding:utf-8 -*-

import pandas as pd
import tensorflow as tf


# convert train.csv to train.tfrecords
def transform_to_tfrecord():
    data = pd.read_csv('data/train.csv')
    tfrecord_file = 'train.tfrecords'

    def int_feature(value):
        return tf.train.Feature(
            int64_list=tf.train.Int64List(value=[value]))

    def float_feature(value):
        return tf.train.Feature(
            float_list=tf.train.FloatList(value=[value]))

    writer = tf.python_io.TFRecordWriter(tfrecord_file)
    for i in range(len(data)):
        features = tf.train.Features(feature={
            'Age': float_feature(data['Age'][i]),
            'Survived': int_feature(data['Survived'][i]),
            'Pclass': int_feature(data['Pclass'][i]),
            'Parch': int_feature(data['Parch'][i]),
            'SibSp': int_feature(data['SibSp'][i]),
            'Sex': int_feature(1 if data['Sex'][i] == 'male' else 0),
            'Fare': float_feature(data['Fare'][i])
        })
        example = tf.train.Example(features=features)
        writer.write(example.SerializeToString())
    writer.close()


if __name__ == '__main__':
    transform_to_tfrecord()

02 data_processing.py

# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/data_processing.py
import os
import re

import pandas as pd
import tensorflow as tf

pjoin = os.path.join
DATA_DIR = pjoin(os.path.dirname(__file__), 'data')

train_data = pd.read_csv(pjoin(DATA_DIR, 'train.csv'))
test_data = pd.read_csv(pjoin(DATA_DIR, 'test.csv'))

# Translation:
#  Don: an honorific title used in Spain, Portugal, Italy
#  Dona: Feminine form for don
#  Mme: Madame, Mrs
#  Mlle: Mademoiselle, Miss
#  Jonkheer (female equivalent: Jonkvrouw) is a Dutch honorific of nobility
HONORABLE_TITLES = ['sir', 'lady', 'don', 'dona', 'countess', 'jonkheer',
                    'major', 'col', 'dr', 'master', 'capt']
NORMAL_TITLES = ['mr', 'ms', 'mrs', 'miss', 'mme', 'mlle', 'rev']
TITLES = HONORABLE_TITLES + NORMAL_TITLES


def get_title(name):
    title_search = re.search('([A-Za-z]+)\.', name)
    return title_search.group(1).lower()


def get_family(row):
    last_name = row['Name'].split(",")[0]
    if last_name:
        family_size = 1 + row['Parch'] + row['SibSp']
        if family_size > 3:
            return "{0}_{1}".format(last_name.lower(), family_size)
        else:
            return "nofamily"
    else:
        return "unknown"


def get_deck(cabin):
    if pd.isnull(cabin):
        return 'U'
    return cabin[:1]


class TitanicDigest(object):
    def __init__(self, dataset):
        self.count_by_sex = dataset.groupby('Sex')['PassengerId'].count()
        self.mean_age = dataset['Age'].mean()
        self.mean_age_by_sex = dataset.groupby("Sex")["Age"].mean()
        self.mean_fare_by_class = dataset.groupby("Pclass")["Fare"].mean()
        self.titles = TITLES
        self.families = dataset.apply(get_family, axis=1).unique().tolist()
        self.decks = dataset["Cabin"].apply(get_deck).unique().tolist()
        self.embarkments = dataset.Embarked.unique().tolist()
        self.embark_mode = dataset.Embarked.dropna().mode().values


def preprocess(data, digest):
    # convert ['male', 'female'] values of Sex to [1, 0]
    data['Sex'] = data['Sex'].apply(lambda s: 1 if s == 'male' else 0)
    # fill empty age field with mean age
    data['Age'] = data['Age'].apply(
        lambda age: digest.mean_age if pd.isnull(age) else age)

    # is child flag
    data['Child'] = data['Age'].apply(lambda age: 1 if age <= 15 else 0)

    # fill fare with mean fare of the class
    def get_fare_value(row):
        if pd.isnull(row['Fare']):
            return digest.mean_fare_by_class[row['Pclass']]
        else:
            return row['Fare']

    data['Fare'] = data.apply(get_fare_value, axis=1)

    # fill Embarked with mode
    data['Embarked'] = data['Embarked'].apply(
        lambda e: digest.embark_mode if pd.isnull(e) else e)
    data["EmbarkedF"] = data["Embarked"].apply(digest.embarkments.index)

    #
    data['Cabin'] = data['Cabin'].apply(lambda c: 'U0' if pd.isnull(c) else c)

    # Deck
    data["Deck"] = data["Cabin"].apply(lambda cabin: cabin[0])
    data["DeckF"] = data['Deck'].apply(digest.decks.index)

    data['Title'] = data['Name'].apply(get_title)
    data['TitleF'] = data['Title'].apply(digest.titles.index)

    data['Honor'] = data['Title'].apply(
        lambda title: int(title in HONORABLE_TITLES))

    data['Family'] = data.apply(get_family, axis=1)

    if 'Survived' in data.keys():
        data['Deceased'] = data['Survived'].apply(lambda s: int(not s))
    return data


digest = TitanicDigest(train_data)


def get_train_data():
    return preprocess(train_data, digest)


def get_test_data():
    return preprocess(test_data, digest)


def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def transform_to_tfrecord():
    data = pd.read_csv(pjoin(DATA_DIR, 'train.csv'))
    filepath = pjoin(DATA_DIR, 'data.tfrecords')
    writer = tf.python_io.TFRecordWriter(filepath)
    for i in range(len(data)):
        feature = {}
        for key in data.keys():
            value = data[key][i]
            if isinstance(value, int):
                value = tf.train.Feature(
                    int64_list=tf.train.Int64List(value=[value]))
            elif isinstance(value, float):
                value = tf.train.Feature(
                    float_list=tf.train.FloatList(value=[value])
                )
            elif isinstance(value, str):
                value = tf.train.Feature(
                    bytes_list=tf.train.BytesList(
                        value=[value.encode(encoding="utf-8")])
                )
            feature[key] = value
        example = tf.train.Example(
            features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())
    writer.close()


if __name__ == '__main__':
    transform_to_tfrecord()

03 read_from_tfrecords.py

# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/read_from_tfrecords.py
#!/usr/bin/env python
# coding=utf-8
import tensorflow as tf

def read_and_decode(train_files, num_threads=2, num_epochs=100,
                    batch_size=10, min_after_dequeue=10):
    # read data from trainFile with TFRecord format
    reader = tf.TFRecordReader()
    filename_queue = tf.train.string_input_producer(
        train_files,
        num_epochs=num_epochs)
    _, serialized_example = reader.read(filename_queue)
    featuresdict = tf.parse_single_example(
        serialized_example,
        features={
            'Survived': tf.FixedLenFeature([], tf.int64),
            'Pclass': tf.FixedLenFeature([], tf.int64),
            'Parch': tf.FixedLenFeature([], tf.int64),
            'SibSp': tf.FixedLenFeature([], tf.int64),
            'Sex': tf.FixedLenFeature([], tf.int64),
            'Age': tf.FixedLenFeature([], tf.float32),
            'Fare': tf.FixedLenFeature([], tf.float32)})

    # decode features to same format of float32
    labels = featuresdict.pop('Survived')
    features = [tf.cast(value, tf.float32)
                for value in featuresdict.values()]

    # get data with shuffle batch and return
    features, labels = tf.train.shuffle_batch(
        [features, labels],
        batch_size=batch_size,
        num_threads=num_threads,
        capacity=min_after_dequeue + 3 * batch_size,
        min_after_dequeue=min_after_dequeue)
    return features, labels


def train_with_queuerunner():
    x, y = read_and_decode(['train.tfrecords'])

    with tf.Session() as sess:
        tf.group(tf.global_variables_initializer(),
                 tf.local_variables_initializer()).run()

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            step = 0
            while not coord.should_stop():
                # Run training steps or whatever
                features, lables = sess.run([x, y])
                if step % 100 == 0:
                    print('step %d:' % step, lables)
                step += 1
        except tf.errors.OutOfRangeError:
            print('Done training -- epoch limit reached')
        finally:
            # When done, ask the threads to stop.
            coord.request_stop()
        # Wait for threads to finish.
        coord.join(threads)

if __name__ == '__main__':
    train_with_queuerunner()

04 01_tensorflow_basic.py

# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/01_tensorflow_basic.py
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

################################
# Preparing Data
################################

# read data from file
data = pd.read_csv('data/train.csv')

# fill nan values with 0
data = data.fillna(0)
# convert ['male', 'female'] values of Sex to [1, 0]
data['Sex'] = data['Sex'].apply(lambda s: 1 if s == 'male' else 0)
# 'Survived' is the label of one class,
# add 'Deceased' as the other class
data['Deceased'] = data['Survived'].apply(lambda s: 1 - s)

# select features and labels for training
dataset_X = data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare']].as_matrix()
dataset_Y = data[['Deceased', 'Survived']].as_matrix()

# split training data and validation set data
X_train, X_val, y_train, y_val = train_test_split(dataset_X, dataset_Y,
                                                  test_size=0.2,
                                                  random_state=42)

################################
# Constructing Dataflow Graph
################################

# create symbolic variables
X = tf.placeholder(tf.float32, shape=[None, 6])
y = tf.placeholder(tf.float32, shape=[None, 2])

# weights and bias are the variables to be trained
weights = tf.Variable(tf.random_normal([6, 2]), name='weights')
bias = tf.Variable(tf.zeros([2]), name='bias')
y_pred = tf.nn.softmax(tf.matmul(X, weights) + bias)

# Minimise cost using cross entropy
# NOTE: add a epsilon(1e-10) when calculate log(y_pred),
# otherwise the result will be -inf
cross_entropy = - tf.reduce_sum(y * tf.log(y_pred + 1e-10),
                                reduction_indices=1)
cost = tf.reduce_mean(cross_entropy)

# use gradient descent optimizer to minimize cost
train_op = tf.train.GradientDescentOptimizer(0.001).minimize(cost)

# calculate accuracy
correct_pred = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1))
acc_op = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

################################
# Training and Evaluating the model
################################

# use session to run the calculation
with tf.Session() as sess:
    # variables have to be initialized at the first place
    tf.global_variables_initializer().run()

    # training loop
    for epoch in range(10):
        total_loss = 0.
        for i in range(len(X_train)):
            # prepare feed data and run
            feed_dict = {X: [X_train[i]], y: [y_train[i]]}
            _, loss = sess.run([train_op, cost], feed_dict=feed_dict)
            total_loss += loss
        # display loss per epoch
        print('Epoch: %04d, total loss=%.9f' % (epoch + 1, total_loss))

    # Accuracy calculated by TensorFlow
    accuracy = sess.run(acc_op, feed_dict={X: X_val, y: y_val})
    print("Accuracy on validation set: %.9f" % accuracy)

    # Accuracy calculated by NumPy
    pred = sess.run(y_pred, feed_dict={X: X_val})
    correct = np.equal(np.argmax(pred, 1), np.argmax(y_val, 1))
    numpy_accuracy = np.mean(correct.astype(np.float32))
    print("Accuracy on validation set (numpy): %.9f" % numpy_accuracy)

    # predict on test data
    testdata = pd.read_csv('data/test.csv')
    testdata = testdata.fillna(0)
    # convert ['male', 'female'] values of Sex to [1, 0]
    testdata['Sex'] = testdata['Sex'].apply(lambda s: 1 if s == 'male' else 0)
    X_test = testdata[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare']]
    predictions = np.argmax(sess.run(y_pred, feed_dict={X: X_test}), 1)
    submission = pd.DataFrame({
        "PassengerId": testdata["PassengerId"],
        "Survived": predictions
    })

    submission.to_csv("titanic-submission.csv", index=False)
'''
Epoch: 0001, total loss=1332.714233560
Epoch: 0002, total loss=1080.481877883
Epoch: 0003, total loss=1146.676006738
...
Epoch: 0009, total loss=1100.583975340
Epoch: 0010, total loss=1091.225955000
Accuracy on validation set: 0.586592197
Accuracy on validation set (numpy): 0.586592197
'''

05 02_tensorflow_advanced.py

# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/02_tensorflow_advanced.py
import os

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

################################
# Preparing Data
################################

# read data from file
data = pd.read_csv('data/train.csv')

# fill nan values with 0
data = data.fillna(0)
# convert ['male', 'female'] values of Sex to [1, 0]
data['Sex'] = data['Sex'].apply(lambda s: 1 if s == 'male' else 0)
# 'Survived' is the label of one class,
# add 'Deceased' as the other class
data['Deceased'] = data['Survived'].apply(lambda s: 1 - s)

# select features and labels for training
dataset_X = data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare']].as_matrix()
dataset_Y = data[['Deceased', 'Survived']].as_matrix()

# split training data and validation set data
X_train, X_val, y_train, y_val = train_test_split(dataset_X, dataset_Y,
                                                  test_size=0.2,
                                                  random_state=42)

################################
# Constructing Dataflow Graph
################################

# arguments that can be set in command line
tf.app.flags.DEFINE_integer('epochs', 10, 'Training epochs')
tf.app.flags.DEFINE_integer('batch_size', 10, 'size of mini-batch')
FLAGS = tf.app.flags.FLAGS

with tf.name_scope('input'):
    # create symbolic variables
    X = tf.placeholder(tf.float32, shape=[None, 6])
    y_true = tf.placeholder(tf.float32, shape=[None, 2])

with tf.name_scope('classifier'):
    # weights and bias are the variables to be trained
    weights = tf.Variable(tf.random_normal([6, 2]))
    bias = tf.Variable(tf.zeros([2]))
    y_pred = tf.nn.softmax(tf.matmul(X, weights) + bias)

    # add histogram summaries for weights, view on tensorboard
    tf.summary.histogram('weights', weights)
    tf.summary.histogram('bias', bias)

# Minimise cost using cross entropy
# NOTE: add a epsilon(1e-10) when calculate log(y_pred),
# otherwise the result will be -inf
with tf.name_scope('cost'):
    cross_entropy = - tf.reduce_sum(y_true * tf.log(y_pred + 1e-10),
                                    reduction_indices=1)
    cost = tf.reduce_mean(cross_entropy)
    tf.summary.scalar('loss', cost)

# use gradient descent optimizer to minimize cost
train_op = tf.train.GradientDescentOptimizer(0.001).minimize(cost)

with tf.name_scope('accuracy'):
    correct_pred = tf.equal(tf.argmax(y_true, 1), tf.argmax(y_pred, 1))
    acc_op = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    # Add scalar summary for accuracy
    tf.summary.scalar('accuracy', acc_op)

global_step = tf.Variable(0, name='global_step', trainable=False)
# use saver to save and restore model
saver = tf.train.Saver()

# this variable won't be stored, since it is declared after tf.train.Saver()
non_storable_variable = tf.Variable(777)

ckpt_dir = './ckpt_dir'
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

################################
# Training the model
################################

# use session to run the calculation
with tf.Session() as sess:
    # create a log writer. run 'tensorboard --logdir=./logs'
    writer = tf.summary.FileWriter('./logs', sess.graph)
    merged = tf.summary.merge_all()

    # variables have to be initialized at the first place
    tf.global_variables_initializer().run()

    # restore variables from checkpoint if exists
    ckpt = tf.train.get_checkpoint_state(ckpt_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print('Restoring from checkpoint: %s' % ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)

    start = global_step.eval()
    # training loop
    for epoch in range(start, start + FLAGS.epochs):
        total_loss = 0.
        for i in range(0, len(X_train), FLAGS.batch_size):
            # train with mini-batch
            feed_dict = {
                X: X_train[i: i + FLAGS.batch_size],
                y_true: y_train[i: i + FLAGS.batch_size]
            }
            _, loss = sess.run([train_op, cost], feed_dict=feed_dict)
            total_loss += loss
        # display loss per epoch
        print('Epoch: %04d, loss=%.9f' % (epoch + 1, total_loss))

        summary, accuracy = sess.run([merged, acc_op],
                                     feed_dict={X: X_val, y_true: y_val})
        writer.add_summary(summary, epoch)  # Write summary
        print('Accuracy on validation set: %.9f' % accuracy)

        # set and update(eval) global_step with epoch
        global_step.assign(epoch).eval()
        saver.save(sess, ckpt_dir + '/logistic.ckpt',
                   global_step=global_step)
    print('Training complete!')

################################
# Evaluating on the test set
################################

# restore variables and run prediction in another session
with tf.Session() as sess:
    # restore variables from checkpoint if exists
    ckpt = tf.train.get_checkpoint_state(ckpt_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print('Restoring from checkpoint: %s' % ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)

    # predict on test data
    testdata = pd.read_csv('data/test.csv')
    testdata = testdata.fillna(0)
    # convert ['male', 'female'] values of Sex to [1, 0]
    testdata['Sex'] = testdata['Sex'].apply(lambda s: 1 if s == 'male' else 0)
    X_test = testdata[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare']]

    # predict on test set
    predictions = np.argmax(sess.run(y_pred, feed_dict={X: X_test}), 1)
    submission = pd.DataFrame({
        "PassengerId": testdata["PassengerId"],
        "Survived": predictions
    })

    submission.to_csv("titanic-submission.csv", index=False)
'''
Epoch: 0001, loss=615.245625108
Accuracy on validation set: 0.586592138
...
Epoch: 0009, loss=610.188485358
Accuracy on validation set: 0.586592138
Epoch: 0010, loss=609.152617797
Accuracy on validation set: 0.586592197
Training complete!
Restoring from checkpoint: ./ckpt_dir\logistic.ckpt-9
# 生成一下文件
# .\titanic-submission.csv
# .\logs\events.out.tfevents.1498818459.LFY_P50
# .\ckpt_dir\logistic.ckpt*
# .\ckpt_dir\checkpoint
'''

06 03_skflow.py

这个没运行成功,有警告和错误

# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/03_skflow.py
import pandas as pd
import tensorflow.contrib.learn as skflow
from sklearn import metrics
from sklearn.model_selection import train_test_split

from data_processing import get_test_data, get_train_data

train_data = get_train_data()
X = train_data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Child',
                'EmbarkedF', 'DeckF', 'TitleF', 'Honor']].as_matrix()
Y = train_data['Survived']

# split training data and validation set data
X_train, X_val, Y_train, Y_val = (
    train_test_split(X, Y, test_size=0.1, random_state=42))

# skflow classifier
feature_cols = skflow.infer_real_valued_columns_from_input(X_train)
classifier = skflow.LinearClassifier(feature_columns=feature_cols, n_classes=2)
classifier.fit(X_train, Y_train, steps=200)
score = metrics.accuracy_score(Y_val, classifier.predict(X_val))
print("Accuracy: %f" % score)

# predict on test dataset
test_data = get_test_data()
X = test_data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Child',
               'EmbarkedF', 'DeckF', 'TitleF', 'Honor']].as_matrix()
predictions = classifier.predict(X)
submission = pd.DataFrame({
    "PassengerId": test_data["PassengerId"],
    "Survived": predictions
})
submission.to_csv("titanic-submission.csv", index=False)
'''
WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.
WARNING:tensorflow:Using temporary folder as model directory: C:\Users\soft\AppData\Local\Temp\tmpc5meq6vi
WARNING:tensorflow:From D:/git/DeepLearning/TensorFlowBook/Titanic/03_skflow.py:20: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with x is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:From D:/git/DeepLearning/TensorFlowBook/Titanic/03_skflow.py:20: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with y is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.
WARNING:tensorflow:From C:\Python36\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\head.py:625: scalar_summary (from tensorflow.python.ops.logging_ops) is deprecated and will be removed after 2016-11-30.
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
WARNING:tensorflow:Casting <dtype: 'int64'> labels to bool.
WARNING:tensorflow:Casting <dtype: 'int64'> labels to bool.
WARNING:tensorflow:From C:\Python36\lib\site-packages\tensorflow\python\util\deprecation.py:347: calling LinearClassifier.predict (from tensorflow.contrib.learn.python.learn.estimators.linear) with outputs=None is deprecated and will be removed after 2017-03-01.
Instructions for updating:
Please switch to predict_classes, or set `outputs` argument.
WARNING:tensorflow:From C:\Python36\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\linear.py:565: calling BaseEstimator.predict (from tensorflow.contrib.learn.python.learn.estimators.estimator) with x is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.
2017-06-30 17:57:09.918373: I d:\git\deeplearning\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1030] Creating TensorFlow device (/gpu:0) -> (device: 0, name: Quadro M2000M, pci bus id: 0000:01:00.0)
Traceback (most recent call last):
  File "D:/git/DeepLearning/TensorFlowBook/Titanic/03_skflow.py", line 21, in <module>
    score = metrics.accuracy_score(Y_val, classifier.predict(X_val))
  File "C:\Python36\lib\site-packages\sklearn\metrics\classification.py", line 172, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "C:\Python36\lib\site-packages\sklearn\metrics\classification.py", line 72, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "C:\Python36\lib\site-packages\sklearn\utils\validation.py", line 177, in check_consistent_length
    lengths = [_num_samples(X) for X in arrays if X is not None]
  File "C:\Python36\lib\site-packages\sklearn\utils\validation.py", line 177, in <listcomp>
    lengths = [_num_samples(X) for X in arrays if X is not None]
  File "C:\Python36\lib\site-packages\sklearn\utils\validation.py", line 122, in _num_samples
    type(x))
TypeError: Expected sequence or array-like, got <class 'generator'>
'''

07 04_tflearn.py

# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/04_tflearn.py
import os

import numpy as np
import pandas as pd
import tensorflow as tf
import tflearn

from data_processing import get_test_data
from data_processing import get_train_data

train_data = get_train_data()
X = train_data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Child',
                'EmbarkedF', 'DeckF', 'TitleF', 'Honor']].as_matrix()
Y = train_data[['Deceased', 'Survived']].as_matrix()

# arguments that can be set in command line
tf.app.flags.DEFINE_integer('epochs', 10, 'Training epochs')
FLAGS = tf.app.flags.FLAGS

ckpt_dir = './ckpt_dir'
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

# defind model
n_features = X.shape[1]
input = tflearn.input_data([None, n_features])
network = tflearn.layers.fully_connected(input, 100, activation='relu')
network = tflearn.layers.fully_connected(network, 100, activation='relu')
y_pred = tflearn.layers.fully_connected(network, 2, activation='softmax')
net = tflearn.regression(y_pred)
model = tflearn.DNN(net)

# restore model if there is a checkpoint
if os.path.isfile(os.path.join(ckpt_dir, 'model.ckpt')):
    model.load(os.path.join(ckpt_dir, 'model.ckpt'))

# train model
model.fit(X, Y, validation_set=0.1, n_epoch=FLAGS.epochs)

# save the trained model
model.save(os.path.join(ckpt_dir, 'model.ckpt'))

metric = model.evaluate(X, Y)
print('Accuracy on train set: %.9f' % metric[0])

# predict on test dataset
test_data = get_test_data()
X = test_data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Child',
               'EmbarkedF', 'DeckF', 'TitleF', 'Honor']].as_matrix()
predictions = np.argmax(model.predict(X), 1)

submission = pd.DataFrame({
    "PassengerId": test_data["PassengerId"],
    "Survived": predictions
})

submission.to_csv("titanic-submission.csv", index=False)
'''
---------------------------------
Run id: 0P7SI9
Log directory: /tmp/tflearn_logs/
---------------------------------
Training samples: 801
Validation samples: 90
--
Training Step: 1  | time: 0.295s
| Adam | epoch: 001 | loss: 0.00000 -- iter: 064/801
Training Step: 2  | total loss: 0.62647 | time: 0.299s
| Adam | epoch: 001 | loss: 0.62647 -- iter: 128/801
...
Training Step: 129  | total loss: 0.53789 | time: 0.052s
| Adam | epoch: 010 | loss: 0.53789 -- iter: 768/801
Training Step: 130  | total loss: 0.53268 | time: 1.059s
| Adam | epoch: 010 | loss: 0.53268 | val_loss: 0.49138 -- iter: 801/801
--
Accuracy on train set: 0.790123456
'''
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值