import math
import os
import pprint
import tempfile
import pathlib
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import apache_beam as beam
import tensorflow_transform as tft
import tensorflow_transform.beam as tft_beam
from tfx_bsl.public import tfxio
from tfx_bsl.coders.example_coder import RecordBatchToExamplesEncoder
2023-06-26 23:55:49.730378: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-26 23:55:50.561378: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/TensorRT/lib:/usr/local/cuda-11.7/lib64
2023-06-26 23:55:50.561477: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/TensorRT/lib:/usr/local/cuda-11.7/lib64
2023-06-26 23:55:50.561486: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
RAW_DATA_FEATURE_SPEC =dict([(name, tf.io.FixedLenFeature([], tf.string))for name in CATEGORICAL_FEATURE_KEYS]+[(name, tf.io.FixedLenFeature([], tf.float32))for name in NUMERIC_FEATURE_KEYS]+[(LABEL_KEY, tf.io.FixedLenFeature([], tf.string))])#如下,使用手动feature_spec获得Schema,或者也可通过tfdv.infer_schema自动推断
SCHEMA = tft.DatasetMetadata.from_feature_spec(RAW_DATA_FEATURE_SPEC).schema
2023-06-26 23:55:59.068527: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-06-26 23:55:59.099139: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/TensorRT/lib:/usr/local/cuda-11.7/lib64
2023-06-26 23:55:59.099171: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1934] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2023-06-26 23:55:59.099783: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
{'age': <tf.Tensor: shape=(), dtype=float32, numpy=39.0>,
'capital-gain': <tf.Tensor: shape=(), dtype=float32, numpy=2174.0>,
'capital-loss': <tf.Tensor: shape=(), dtype=float32, numpy=0.0>,
'education': <tf.Tensor: shape=(), dtype=string, numpy=b' Bachelors'>,
'education-num': <tf.Tensor: shape=(), dtype=float32, numpy=13.0>,
'hours-per-week': <tf.Tensor: shape=(), dtype=float32, numpy=40.0>,
'marital-status': <tf.Tensor: shape=(), dtype=string, numpy=b' Never-married'>,
'native-country': <tf.Tensor: shape=(), dtype=string, numpy=b' United-States'>,
'occupation': <tf.Tensor: shape=(), dtype=string, numpy=b' Adm-clerical'>,
'race': <tf.Tensor: shape=(), dtype=string, numpy=b' White'>,
'relationship': <tf.Tensor: shape=(), dtype=string, numpy=b' Not-in-family'>,
'sex': <tf.Tensor: shape=(), dtype=string, numpy=b' Male'>,
'workclass': <tf.Tensor: shape=(), dtype=string, numpy=b' State-gov'>}
WARNING:apache_beam.runners.interactive.interactive_environment:Dependencies required for Interactive Beam PCollection visualization are not available, please use: `pip install apache-beam[interactive]` to install necessary dependencies to enable all data visualization features.
WARNING:tensorflow:From /home/xzy/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow_transform/tf_utils.py:324: Tensor.experimental_ref (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use ref() instead.
WARNING:tensorflow:From /home/xzy/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow_transform/tf_utils.py:324: Tensor.experimental_ref (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use ref() instead.
INFO:tensorflow:Assets written to: /tmp/tmpg4n8yfpg/tftransform_tmp/631aea06548b4fb38aaa5e855fd97e70/assets
INFO:tensorflow:Assets written to: /tmp/tmpg4n8yfpg/tftransform_tmp/631aea06548b4fb38aaa5e855fd97e70/assets
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
INFO:tensorflow:tensorflow_text is not available.
INFO:tensorflow:tensorflow_text is not available.
INFO:tensorflow:Assets written to: /tmp/tmpg4n8yfpg/tftransform_tmp/2c585badca494ed8939abc241f42206a/assets
INFO:tensorflow:Assets written to: /tmp/tmpg4n8yfpg/tftransform_tmp/2c585badca494ed8939abc241f42206a/assets
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
INFO:tensorflow:tensorflow_text is not available.
INFO:tensorflow:tensorflow_text is not available.
WARNING:apache_beam.io.tfrecordio:Couldn't find python-snappy so the implementation of _TFRecordUtil._masked_crc32c is not as fast as it could be.
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
INFO:tensorflow:tensorflow_text is not available.
INFO:tensorflow:tensorflow_text is not available.
def_make_training_input_fn(tf_transform_output, train_file_pattern,
batch_size):"""An input function reading from transformed data, converting to model input.
Args:
tf_transform_output: Wrapper around output of tf.Transform.
transformed_examples: Base filename of examples.
batch_size: Batch size.
Returns:
The input data for training or eval, in the form of k.
"""definput_fn():return tf.data.experimental.make_batched_features_dataset(#生成字典数据,且label和inputs分开
file_pattern=train_file_pattern,
batch_size=batch_size,
features=tf_transform_output.transformed_feature_spec(),
reader=tf.data.TFRecordDataset,
label_key=LABEL_KEY,
shuffle=True)return input_fn
deftrain_model(model, train_dataset, validation_dataset):
model.compile(optimizer='adam',
loss=tf.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
history = model.fit(train_dataset, validation_data=validation_dataset,
epochs=TRAIN_NUM_EPOCHS,
steps_per_epoch=STEPS_PER_TRAIN_EPOCH,
validation_steps=EVALUATION_STEPS)return history
deftrain_and_evaluate(
model,
working_dir):"""Train the model on training data and evaluate on test data.
Args:
working_dir: The location of the Transform output.
num_train_instances: Number of instances in train set
num_test_instances: Number of instances in test set
Returns:
The results from the estimator's 'evaluate' method
"""
train_dataset = get_dataset(working_dir, TRANSFORMED_TRAIN_DATA_FILEBASE)
validation_dataset = get_dataset(working_dir, TRANSFORMED_TEST_DATA_FILEBASE)
model = build_keras_model(working_dir)
history = train_model(model, train_dataset, validation_dataset)
metric_values = model.evaluate(validation_dataset,
steps=EVALUATION_STEPS,
return_dict=True)return model, history, metric_values
WARNING:tensorflow:From /home/xzy/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/data/experimental/ops/readers.py:572: ignore_errors (from tensorflow.python.data.experimental.ops.error_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.data.Dataset.ignore_errors` instead.
WARNING:tensorflow:From /home/xzy/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/data/experimental/ops/readers.py:572: ignore_errors (from tensorflow.python.data.experimental.ops.error_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.data.Dataset.ignore_errors` instead.
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
INFO:tensorflow:tensorflow_text is not available.
INFO:tensorflow:tensorflow_text is not available.