TFX完整流水线示例以及补充（下）

起名大废废

已于 2023-06-27 13:01:34 修改

阅读量366

点赞数

分类专栏： tfx 文章标签： python tensorflow 机器学习

于 2023-06-27 12:35:29 首次发布

本文链接：https://blog.csdn.net/T_eddy/article/details/131414626

版权

tfx 专栏收录该内容

7 篇文章 0 订阅

订阅专栏

Trainer

_taxi_trainer_module_file = 'taxi_trainer.py'

%%writefile {_taxi_trainer_module_file}

from typing import Dict, List, Text

import os
import glob
from absl import logging

import datetime
import tensorflow as tf
import tensorflow_transform as tft

from tfx import v1 as tfx
from tfx_bsl.public import tfxio
from tensorflow_transform import TFTransformOutput

# Imported files such as taxi_constants are normally cached, so changes are
# not honored after the first import.  Normally this is good for efficiency, but
# during development when we may be iterating code it can be a problem. To
# avoid this problem during development, reload the file.
import taxi_constants
import sys
if 'google.colab' in sys.modules:  # Testing to see if we're doing development
  import importlib
  importlib.reload(taxi_constants)

_LABEL_KEY = taxi_constants.LABEL_KEY

_BATCH_SIZE = 40


def _input_fn(file_pattern: List[Text],
              data_accessor: tfx.components.DataAccessor,
              tf_transform_output: tft.TFTransformOutput,
              batch_size: int = 200) -> tf.data.Dataset:
  """Generates features and label for tuning/training.

  Args:
    file_pattern: List of paths or patterns of input tfrecord files.
    data_accessor: DataAccessor for converting input to RecordBatch.
    tf_transform_output: A TFTransformOutput.
    batch_size: representing the number of consecutive elements of returned
      dataset to combine in a single batch

  Returns:
    A dataset that contains (features, indices) tuple where features is a
      dictionary of Tensors, and indices is a single Tensor of label indices.
  """
  return data_accessor.tf_dataset_factory(
      file_pattern,
      tfxio.TensorFlowDatasetOptions(
          batch_size=batch_size, label_key=_LABEL_KEY),
      tf_transform_output.transformed_metadata.schema)

def _get_tf_examples_serving_signature(model, tf_transform_output):
  """Returns a serving signature that accepts `tensorflow.Example`."""

  # We need to track the layers in the model in order to save it.
  # TODO(b/162357359): Revise once the bug is resolved.
  model.tft_layer_inference = tf_transform_output.transform_features_layer()

  @tf.function(input_signature=[
      tf.TensorSpec(shape=[None], dtype=tf.string, name='examples')
  ])
  def serve_tf_examples_fn(serialized_tf_example):
    """Returns the output to be used in the serving signature."""
    raw_feature_spec = tf_transform_output.raw_feature_spec()
    # Remove label feature since these will not be present at serving time.
    raw_feature_spec.pop(_LABEL_KEY)
    raw_features = tf.io.parse_example(serialized_tf_example, raw_feature_spec)
    transformed_features = model.tft_layer_inference(raw_features)
    logging.info('serve_transformed_features = %s', transformed_features)

    outputs = model(transformed_features)
    # TODO(b/154085620): Convert the predicted labels from the model using a
    # reverse-lookup (opposite of transform.py).
    return {'outputs': outputs}

  return serve_tf_examples_fn


def _get_transform_features_signature(model, tf_transform_output):
  """Returns a serving signature that applies tf.Transform to features."""

  # We need to track the layers in the model in order to save it.
  # TODO(b/162357359): Revise once the bug is resolved.
  model.tft_layer_eval = tf_transform_output.transform_features_layer()

  @tf.function(input_signature=[
      tf.TensorSpec(shape=[None], dtype=tf.string, name='examples')
  ])
  def transform_features_fn(serialized_tf_example):
    """Returns the transformed_features to be fed as input to evaluator."""
    raw_feature_spec = tf_transform_output.raw_feature_spec()
    raw_features = tf.io.parse_example(serialized_tf_example, raw_feature_spec)
    transformed_features = model.tft_layer_eval(raw_features)
    logging.info('eval_transformed_features = %s', transformed_features)
    return transformed_features

  return transform_features_fn


def export_serving_model(tf_transform_output, model, output_dir):
  """Exports a keras model for serving.
  Args:
    tf_transform_output: Wrapper around output of tf.Transform.
    model: A keras model to export for serving.
    output_dir: A directory where the model will be exported to.
  """
  # The layer has to be saved to the model for keras tracking purpases.
  model.tft_layer = tf_transform_output.transform_features_layer()

  signatures = {
      'serving_default':
          _get_tf_examples_serving_signature(model, tf_transform_output),
      'transform_features':
          _get_transform_features_signature(model, tf_transform_output),
  }

  model.save(output_dir, save_format='tf', signatures=signatures)


def _build_keras_model(tf_transform_output: TFTransformOutput
                       ) -> tf.keras.Model:
  """Creates a DNN Keras model for classifying taxi data.

  Args:
    tf_transform_output: [TFTransformOutput], the outputs from Transform

  Returns:
    A keras Model.
  """
  feature_spec = tf_transform_output.transformed_feature_spec().copy()
  feature_spec.pop(_LABEL_KEY)

  inputs = {}
  for key, spec in feature_spec.items():
    if isinstance(spec, tf.io.VarLenFeature):
      inputs[key] = tf.keras.layers.Input(
          shape=[None], name=key, dtype=spec.dtype, sparse=True)
    elif isinstance(spec, tf.io.FixedLenFeature):
      # TODO(b/208879020): Move into schema such that spec.shape is [1] and not
      # [] for scalars.
      inputs[key] = tf.keras.layers.Input(
          shape=spec.shape or [1], name=key, dtype=spec.dtype)
    else:
      raise ValueError('Spec type is not supported: ', key, spec)

  output = tf.keras.layers.Concatenate()(tf.nest.flatten(inputs))
  output = tf.keras.layers.Dense(100, activation='relu')(output)
  output = tf.keras.layers.Dense(70, activation='relu')(output)
  output = tf.keras.layers.Dense(50, activation='relu')(output)
  output = tf.keras.layers.Dense(20, activation='relu')(output)
  output = tf.keras.layers.Dense(1)(output)
  return tf.keras.Model(inputs=inputs, outputs=output)


# TFX Trainer will call this function.
def run_fn(fn_args: tfx.components.FnArgs):
  """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
  tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

  train_dataset = _input_fn(fn_args.train_files, fn_args.data_accessor, 
                            tf_transform_output, _BATCH_SIZE)
  eval_dataset = _input_fn(fn_args.eval_files, fn_args.data_accessor, 
                           tf_transform_output, _BATCH_SIZE)

  model = _build_keras_model(tf_transform_output)

  model.compile(
      loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
      optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
      metrics=[tf.keras.metrics.BinaryAccuracy()])

  tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir=fn_args.model_run_dir, update_freq='batch')

  model.fit(
      train_dataset,
      steps_per_epoch=fn_args.train_steps,
      validation_data=eval_dataset,
      validation_steps=fn_args.eval_steps,
      callbacks=[tensorboard_callback])

  # Export the model.
  export_serving_model(tf_transform_output, model, fn_args.serving_model_dir)

Overwriting taxi_trainer.py

trainer = tfx.components.Trainer(
    module_file=os.path.abspath(_taxi_trainer_module_file),
    examples=transform.outputs['transformed_examples'],
    transform_graph=transform.outputs['transform_graph'],
    schema=schema_gen.outputs['schema'],
    train_args=tfx.proto.TrainArgs(num_steps=10000),
    eval_args=tfx.proto.EvalArgs(num_steps=5000))
context.run(trainer, enable_cache=True)

INFO:absl:Generating ephemeral wheel package for '/mnt/c/Users/DELL/jupyter_notebook_code/chicago_taxi_pipeline/taxi_trainer.py' (including modules: ['taxi_constants', 'taxi_trainer', 'taxi_transform']).
INFO:absl:User module package has hash fingerprint version 79f9cdb8dcb0633411b76b3906a3770b749e6c7c16484cb1f26a1a8c7cbf516a.
INFO:absl:Executing: ['/home/xzy/anaconda3/envs/tfx/bin/python', '/tmp/tmptydrvwrp/_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', '/tmp/tmp9d7rg1o9', '--dist-dir', '/tmp/tmpmyq6a9n7']
/home/xzy/anaconda3/envs/tfx/lib/python3.9/site-packages/setuptools/_distutils/cmd.py:66: SetuptoolsDeprecationWarning: setup.py install is deprecated.
!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer, pypa/build or
        other standards-based tools.

        See https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html for details.
        ********************************************************************************

!!
  self.initialize_options()
INFO:absl:Successfully built user code wheel distribution at './pipeline_output_root/_wheels/tfx_user_code_Trainer-0.0+79f9cdb8dcb0633411b76b3906a3770b749e6c7c16484cb1f26a1a8c7cbf516a-py3-none-any.whl'; target user module is 'taxi_trainer'.
INFO:absl:Full user module path is 'taxi_trainer@./pipeline_output_root/_wheels/tfx_user_code_Trainer-0.0+79f9cdb8dcb0633411b76b3906a3770b749e6c7c16484cb1f26a1a8c7cbf516a-py3-none-any.whl'
INFO:absl:Running driver for Trainer
INFO:absl:MetadataStore with DB connection initialized


running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying taxi_constants.py -> build/lib
copying taxi_trainer.py -> build/lib
copying taxi_transform.py -> build/lib
installing to /tmp/tmp9d7rg1o9
running install
running install_lib
copying build/lib/taxi_transform.py -> /tmp/tmp9d7rg1o9
copying build/lib/taxi_trainer.py -> /tmp/tmp9d7rg1o9
copying build/lib/taxi_constants.py -> /tmp/tmp9d7rg1o9
running install_egg_info
running egg_info
creating tfx_user_code_Trainer.egg-info
writing tfx_user_code_Trainer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Trainer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Trainer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
......省略

ExecutionResult at 0x7f0295ad3790

.execution_id

.component

Artifact of type 'ModelRun' (uri: ./pipeline_output_root/Trainer/model_run/6) at 0x7f0295ad3250

.type	<class 'tfx.types.standard_artifacts.ModelRun'>
.uri	./pipeline_output_root/Trainer/model_run/6

model_run_artifact_dir = trainer.outputs['model_run'].get()[0].uri

%load_ext tensorboard
%tensorboard --logdir {model_run_artifact_dir}

在这里插入图片描述

Evaluator

# Imported files such as taxi_constants are normally cached, so changes are
# not honored after the first import.  Normally this is good for efficiency, but
# during development when we may be iterating code it can be a problem. To
# avoid this problem during development, reload the file.
import taxi_constants
import sys
if 'google.colab' in sys.modules:  # Testing to see if we're doing development
  import importlib
  importlib.reload(taxi_constants)

eval_config = tfma.EvalConfig(
    model_specs=[
        # This assumes a serving model with signature 'serving_default'. If
        # using estimator based EvalSavedModel, add signature_name: 'eval' and
        # remove the label_key.
        tfma.ModelSpec(
            signature_name='serving_default',
            label_key=taxi_constants.LABEL_KEY,
            preprocessing_function_names=['transform_features'],
            )
        ],
    metrics_specs=[
        tfma.MetricsSpec(
            # The metrics added here are in addition to those saved with the
            # model (assuming either a keras model or EvalSavedModel is used).
            # Any metrics added into the saved model (for example using
            # model.compile(..., metrics=[...]), etc) will be computed
            # automatically.
            # To add validation thresholds for metrics saved with the model,
            # add them keyed by metric name to the thresholds map.
            metrics=[
                tfma.MetricConfig(class_name='ExampleCount'),
                tfma.MetricConfig(class_name='BinaryAccuracy',
                  threshold=tfma.MetricThreshold(
                      value_threshold=tfma.GenericValueThreshold(
                          lower_bound={'value': 0.5}),
                      # Change threshold will be ignored if there is no
                      # baseline model resolved from MLMD (first run).
                      change_threshold=tfma.GenericChangeThreshold(
                          direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                          absolute={'value': -1e-10})))
            ]
        )
    ],
    slicing_specs=[
        # An empty slice spec means the overall slice, i.e. the whole dataset.
        tfma.SlicingSpec(),
        # Data can be sliced along a feature column. In this case, data is
        # sliced along feature column trip_start_hour.
        tfma.SlicingSpec(
            feature_keys=['trip_start_hour'])
    ])

# Use TFMA to compute a evaluation statistics over features of a model and
# validate them against a baseline.

# The model resolver is only required if performing model validation in addition
# to evaluation. In this case we validate against the latest blessed model. If
# no model has been blessed before (as in this case) the evaluator will make our
# candidate the first blessed model.
model_resolver = tfx.dsl.Resolver(
      strategy_class=tfx.dsl.experimental.LatestBlessedModelStrategy,
      model=tfx.dsl.Channel(type=tfx.types.standard_artifacts.Model),
      model_blessing=tfx.dsl.Channel(
          type=tfx.types.standard_artifacts.ModelBlessing)).with_id(
              'latest_blessed_model_resolver')
context.run(model_resolver, enable_cache=True)

evaluator = tfx.components.Evaluator(
    examples=example_gen.outputs['examples'],
    model=trainer.outputs['model'],
    baseline_model=model_resolver.outputs['model'],
    eval_config=eval_config)
#输出过长
context.run(evaluator, enable_cache=True)

INFO:absl:Running driver for latest_blessed_model_resolver
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running publisher for latest_blessed_model_resolver
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running driver for Evaluator
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running executor for Evaluator
INFO:absl:udf_utils.get_fn {'eval_config': '{\n  "metrics_specs": [\n    {\n      "metrics": [\n        {\n          "class_name": "ExampleCount"\n        },\n        {\n          "class_name": "BinaryAccuracy",\n          "threshold": {\n            "change_threshold": {\n              "absolute": -1e-10,\n              "direction": "HIGHER_IS_BETTER"\n            },\n            "value_threshold": {\n              "lower_bound": 0.5\n            }\n          }\n        }\n      ]\n    }\n  ],\n  "model_specs": [\n    {\n      "label_key": "tips",\n      "preprocessing_function_names": [\n        "transform_features"\n      ],\n      "signature_name": "serving_default"\n    }\n  ],\n  "slicing_specs": [\n    {},\n    {\n      "feature_keys": [\n        "trip_start_hour"\n      ]\n    }\n  ]\n}', 'feature_slicing_spec': None, 'fairness_indicator_thresholds': 'null', 'example_splits': 'null', 'module_file': None, 'module_path': None} 'custom_eval_shared_model'
INFO:absl:Request was made to ignore the baseline ModelSpec and any change thresholds. This is likely because a baseline model was not provided: updated_config=
model_specs {
  signature_name: "serving_default"
  label_key: "tips"
  preprocessing_function_names: "transform_features"
}
slicing_specs {
}
slicing_specs {
  feature_keys: "trip_start_hour"
}
metrics_specs {
  metrics {
    class_name: "ExampleCount"
  }
  metrics {
    class_name: "BinaryAccuracy"
    threshold {
      value_threshold {
        lower_bound {
          value: 0.5
        }
      }
    }
  }
}

......省略

ExecutionResult at 0x7f02535d2d90

.execution_id

.component

Artifact of type 'ModelBlessing' (uri: ./pipeline_output_root/Evaluator/blessing/8) at 0x7f02243a7910

.type	<class 'tfx.types.standard_artifacts.ModelBlessing'>
.uri	./pipeline_output_root/Evaluator/blessing/8

context.show(evaluator.outputs['evaluation'])

在这里插入图片描述

SlicingMetricsViewer(config={'weightedExamplesColumn': 'example_count'}, data=[{'slice': 'Overall', 'metrics':…

import tensorflow_model_analysis as tfma

# Get the TFMA output result path and load the result.
PATH_TO_RESULT = evaluator.outputs['evaluation'].get()[0].uri
tfma_result = tfma.load_eval_result(PATH_TO_RESULT)

# Show data sliced along feature column trip_start_hour.
tfma.view.render_slicing_metrics(
    tfma_result, slicing_column='trip_start_hour')

在这里插入图片描述

SlicingMetricsViewer(config={'weightedExamplesColumn': 'example_count'}, data=[{'slice': 'trip_start_hour:19',…

PATH_TO_RESULT = evaluator.outputs['evaluation'].get()[0].uri
print(tfma.load_validation_result(PATH_TO_RESULT))

validation_ok: true
validation_details {
  slicing_details {
    slicing_spec {
    }
    num_matching_slices: 25
  }
}

Pusher

pusher = tfx.components.Pusher(
    model=trainer.outputs['model'],
    model_blessing=evaluator.outputs['blessing'],
    push_destination=tfx.proto.PushDestination(
        filesystem=tfx.proto.PushDestination.Filesystem(
            base_directory=_serving_model_dir)))
context.run(pusher, enable_cache=True)

INFO:absl:Running driver for Pusher
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running executor for Pusher
INFO:absl:Model version: 1687834975
INFO:absl:Model written to serving path ./serving_model/taxi_simple/1687834975.
INFO:absl:Model pushed to ./pipeline_output_root/Pusher/pushed_model/9.
INFO:absl:Running publisher for Pusher
INFO:absl:MetadataStore with DB connection initialized

ExecutionResult at 0x7f0253563550

.execution_id

.component

Artifact of type 'PushedModel' (uri: ./pipeline_output_root/Pusher/pushed_model/9) at 0x7f02243a7f70

.type	<class 'tfx.types.standard_artifacts.PushedModel'>
.uri	./pipeline_output_root/Pusher/pushed_model/9

push_uri = pusher.outputs['pushed_model'].get()[0].uri
model = tf.saved_model.load(push_uri)

for item in model.signatures.items():
  pp.pprint(item)

('serving_default',
 <ConcreteFunction signature_wrapper(*, examples) at 0x7F0265217C70>)
('transform_features',
 <ConcreteFunction signature_wrapper(*, examples) at 0x7F02652D0D90>)

pipeline

def _create_pipeline(pipeline_name:str,pipeline_root:str,metadata_path:str):
    components = [
        example_gen,
        statistics_gen,
        schema_gen,
        example_validator,
        transform,
        trainer,
        evaluator,
        pusher
    ]
    return tfx.dsl.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        metadata_connection_config=tfx.orchestration.metadata
        .sqlite_metadata_connection_config(metadata_path),
        components=components)

tfx.orchestration.LocalDagRunner().run(
  _create_pipeline(
      pipeline_name=PIPELINE_NAME,
      pipeline_root=PIPELINE_ROOT,
      metadata_path=METADATA_PATH))

/home/xzy/anaconda3/envs/tfx/lib/python3.9/site-packages/tfx/orchestration/pipeline.py:408: UserWarning: Node Evaluator depends on the output of node latest_blessed_model_resolver, but latest_blessed_model_resolver is not included in the components of pipeline. Did you forget to add it?
  warnings.warn(
INFO:absl:Using deployment config:
 executor_specs {
  key: "CsvExampleGen"
  value {
    beam_executable_spec {
      python_executor_spec {
        class_path: "tfx.components.example_gen.csv_example_gen.executor.Executor"
      }
    }
  }
}
executor_specs {
  key: "Evaluator"
  value {
    beam_executable_spec {
      python_executor_spec {
        class_path: "tfx.components.evaluator.executor.Executor"
      }
    }
  }
}
......省略

_serving_model_dir

'./serving_model/taxi_simple'

inspect ServingModel

!saved_model_cli show --dir {_serving_model_dir}/$(ls -1 {_serving_model_dir}|sort -nr|head -1) --tag_set serve --signature_def serving_default

2023-06-27 11:05:35.695269: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/TensorRT/lib:/usr/local/cuda-11.7/lib64
2023-06-27 11:05:35.695379: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/TensorRT/lib:/usr/local/cuda-11.7/lib64
2023-06-27 11:05:35.695409: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
The given SavedModel SignatureDef contains the following input(s):
  inputs['examples'] tensor_info:
      dtype: DT_STRING
      shape: (-1)
      name: serving_default_examples:0
The given SavedModel SignatureDef contains the following output(s):
  outputs['outputs'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1, 1)
      name: StatefulPartitionedCall_27:0
Method name is: tensorflow/serving/predict

Test ServingModel

model_dirs = [item for item in os.scandir(_serving_model_dir) if item.is_dir()]
model_path = max(model_dirs,key=lambda i:int(i.name)).path
model_path

'./serving_model/taxi_simple/1687835132'

loaded_model = tf.keras.models.load_model(model_path) #输入是经过transform后的字典数据
inference_fn = loaded_model.signatures['serving_default'] #输入是原始examples数据（可不包含label）
transform_fn=loaded_model.signatures['transform_features'] #输入是原始examples数据（一定包含label）

WARNING:tensorflow:Inconsistent references when loading the checkpoint into this object graph. For example, in the saved checkpoint object, `model.layer.weight` and `model.layer_copy.weight` reference the same variable, while in the current object these are two different variables. The referenced variables are:(<keras.saving.legacy.saved_model.load.TensorFlowTransform>TransformFeaturesLayer object at 0x7f026a194c10> and <keras.engine.input_layer.InputLayer object at 0x7f02dd6632e0>).

from typing import List,Union,Optional
from tensorflow_metadata.proto.v0 import schema_pb2
import pandas as pd
import tensorflow_data_validation as tfdv
import numpy as np
def create_example_by_schema_from_dataframe(row:pd.Series,column_names:List[str],schema_or_schemapath:Union[str,schema_pb2.Schema]):
    """
    根据数据原来的Schema信息将输入的一行数据转换为序列化后的example
    input:
        row:类型为pd.Series的一行数据
        column_names:类型为列表，包含需要转换的列名
        schema_or_schemapath:数据的Schema实例或者Schema的路径（需要具体到schema.pbtxt）
    output:
        serialized_example:序列化后的example数据
    """
    features = {}
    if isinstance(schema_or_schemapath,str):
        schema_or_schemapath=tfdv.load_schema_text(schema_or_schemapath)
    for columnName in column_names:
        typeCode = tfdv.get_feature(schema_or_schemapath,columnName).type
        tempvalue = None
        if typeCode == 1: #string
            if pd.isna(row[columnName]):
                tempvalue = b''
            else:
                tempvalue = row[columnName].encode()
            features[columnName] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[tempvalue]))
        elif typeCode == 2: #int
            if pd.isna(row[columnName]):
                tempvalue = 0
            else:
                tempvalue = int(row[columnName])
            features[columnName] = tf.train.Feature(int64_list=tf.train.Int64List(value=[tempvalue]))
        elif typeCode == 3: #float
            if pd.isna(row[columnName]):
                tempvalue = 0.0
            else:
                tempvalue = float(row[columnName])
            features[columnName] = tf.train.Feature(float_list=tf.train.FloatList(value=[tempvalue]))
    example_proto = tf.train.Example(features=tf.train.Features(feature=features))
    return example_proto.SerializeToString()

def convert_from_csv_to_tfrecord(csv_path:str,tfrecord_path:str,
                                 schema_or_schemapath:Optional[Union[str,schema_pb2.Schema]]=None,
                                 infer_schema_bool:bool=False):
    """
    将csv数据文件转换为tfrecord文件，csv文件需要有header。
    input:
        csv_path:csv文件path
        schema_or_schemapath：schema实例或者路径
        infer_schema_bool：是否在转换前立即推断schema，如果为True，则schema_or_schemapath忽略。
    output:
        如果成功,'OK'
    """
    if infer_schema_bool ==False and schema_or_schemapath == None:
        raise Exception('infer_schema_bool or schema_or_schemapath is error!!!')
    data = pd.read_csv('tfx-data/data.csv')
    if infer_schema_bool: #推断Schema
        statistics = tfdv.generate_statistics_from_csv(data)
        schema_or_schemapath=tfdv.infer_schema(statistics)
    ColumnNames = data.columns
    with tf.io.TFRecordWriter(tfrecord_path) as writer:
        for i in range(len(data)):
            row = data.iloc[i]
            example=create_example_by_schema_from_dataframe(row,ColumnNames,schema_or_schemapath)
            writer.write(example)
    return 'OK'

import tensorflow_data_validation as tfdv
reload_schema=tfdv.load_schema_text(schema_gen.outputs['schema'].get()[0].uri+'/schema.pbtxt')
data = pd.read_csv('tfx-data/data.csv')
ColumnNames = taxi_constants.NUMERICAL_FEATURES + taxi_constants.BUCKET_FEATURES + \
    taxi_constants.CATEGORICAL_NUMERICAL_FEATURES + taxi_constants.CATEGORICAL_STRING_FEATURES + \
    [taxi_constants.LABEL_KEY]
example=create_example_by_schema_from_dataframe(data.iloc[207],ColumnNames,reload_schema)

inference_fn(tf.constant([example]))['outputs'].numpy() #输入可以不用包含label，因为这是推断

array([[11.537613]], dtype=float32)

transform_fn(tf.constant([example])) #输入一定要包含label，因为这是预处理（用于训练）

{'dropoff_census_tract_xf': <tf.Tensor: shape=(1, 216), dtype=float32, numpy=
 array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>,
 'company_xf': <tf.Tensor: shape=(1, 55), dtype=float32, numpy=
 array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>,
 'dropoff_longitude_xf': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([3.], dtype=float32)>,
 'dropoff_latitude_xf': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>,
 'trip_miles_xf': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-0.1588674], dtype=float32)>,
 'trip_start_day_xf': <tf.Tensor: shape=(1, 17), dtype=float32, numpy=
 array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.]], dtype=float32)>,
 'payment_type_xf': <tf.Tensor: shape=(1, 16), dtype=float32, numpy=
 array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       dtype=float32)>,
 'pickup_longitude_xf': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([3.], dtype=float32)>,
 'tips': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([1])>,
 'trip_seconds_xf': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-0.54713595], dtype=float32)>,
 'pickup_census_tract_xf': <tf.Tensor: shape=(1, 11), dtype=float32, numpy=array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>,
 'pickup_latitude_xf': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>,
 'trip_start_hour_xf': <tf.Tensor: shape=(1, 34), dtype=float32, numpy=
 array([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.]], dtype=float32)>,
 'dropoff_community_area_xf': <tf.Tensor: shape=(1, 79), dtype=float32, numpy=
 array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       dtype=float32)>,
 'pickup_community_area_xf': <tf.Tensor: shape=(1, 66), dtype=float32, numpy=
 array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.]], dtype=float32)>,
 'trip_start_month_xf': <tf.Tensor: shape=(1, 22), dtype=float32, numpy=
 array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], dtype=float32)>,
 'fare_xf': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-0.60789293], dtype=float32)>}

convert_from_csv_to_tfrecord('./tfx-data/data.csv','./tt.tfrecord',schema_gen.outputs['schema'].get()[0].uri+'/schema.pbtxt')

'OK'

Test Train

dataset =tf.data.TFRecordDataset('./tt.tfrecord')

dataset = dataset.batch(32).map(transform_fn)

2023-06-27 11:07:38.139610: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'args_0' with dtype string and shape [?]
	 [[{{node args_0}}]]

def make_input_and_label(Batch): #制作字典形式的输入和标签时，输入时字典，但是标签是张量
    label = Batch.pop('tips')
    return Batch,label

dataset = dataset.map(make_input_and_label)

for i in dataset.take(1):
    print(i)

2023-06-27 11:07:38.314296: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_15' with dtype float and shape [1,9]
	 [[{{node Placeholder/_15}}]]


({'dropoff_census_tract_xf': <tf.Tensor: shape=(32, 216), dtype=float32, numpy=
array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>, 'company_xf': <tf.Tensor: shape=(32, 55), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>, 'dropoff_longitude_xf': <tf.Tensor: shape=(32,), dtype=float32, numpy=
array([9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
       9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 1., 1., 1., 1.],
      dtype=float32)>, 'dropoff_latitude_xf': <tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 7., 7., 7., 7.],
      dtype=float32)>, 'trip_miles_xf': <tf.Tensor: shape=(32,), dtype=float32, numpy=
array([-0.1588674 , -0.1588674 ,  0.53216076, -0.1588674 , -0.1588674 ,
        0.21955279,  0.68572253,  0.6418478 ,  0.15977335, -0.1588674 ,
       -0.1588674 , -0.0491804 ,  0.13180317, -0.00475717, -0.13693   ,
        0.18664669, -0.0694725 , -0.04369606, -0.1588674 ,  0.08847681,
       -0.1588674 , -0.00969308, -0.0897646 , -0.07002094,  0.02101928,
        1.0970489 , -0.1588674 ,  0.4169894 ,  0.03308485, -0.1462534 ,
       -0.09853955, -0.06563345], dtype=float32)>, 'trip_start_day_xf': <tf.Tensor: shape=(32, 17), dtype=float32, numpy=
array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.]], dtype=float32)>, 'payment_type_xf': <tf.Tensor: shape=(32, 16), dtype=float32, numpy=
array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
      dtype=float32)>, 'pickup_longitude_xf': <tf.Tensor: shape=(32,), dtype=float32, numpy=
array([9., 9., 3., 0., 0., 0., 0., 0., 0., 8., 0., 0., 0., 8., 0., 0., 4.,
       6., 1., 2., 8., 0., 4., 3., 8., 9., 9., 9., 2., 1., 1., 1.],
      dtype=float32)>, 'trip_seconds_xf': <tf.Tensor: shape=(32,), dtype=float32, numpy=
array([-7.1184874e-01, -4.3732741e-01,  5.5094934e-01, -5.4713595e-01,
        2.7642804e-01,  1.9067146e-03,  3.8623658e-01,  1.6490347e+00,
        2.7642804e-01, -7.1184874e-01,  2.4725988e+00, -3.2751888e-01,
        2.2152378e-01,  5.6810979e-02, -6.0204017e-01,  1.2098006e+00,
        5.6810979e-02, -4.9223167e-01, -7.1184874e-01, -1.6280608e-01,
       -7.1184874e-01, -2.1771035e-01, -4.3732741e-01, -2.1771035e-01,
        5.6810979e-02,  2.1980774e+00,  1.7039390e+00,  3.8623658e-01,
        1.1171524e-01, -6.0204017e-01, -3.8242313e-01, -2.7261460e-01],
      dtype=float32)>, 'pickup_census_tract_xf': <tf.Tensor: shape=(32, 11), dtype=float32, numpy=
array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>, 'pickup_latitude_xf': <tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0., 0., 0., 9., 9., 9., 8., 0., 9., 0., 7., 3., 9., 0., 3., 7., 7.,
       0., 9., 8., 0., 9., 7., 6., 0., 0., 0., 0., 8., 6., 7., 7.],
      dtype=float32)>, 'trip_start_hour_xf': <tf.Tensor: shape=(32, 34), dtype=float32, numpy=
array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>, 'dropoff_community_area_xf': <tf.Tensor: shape=(32, 79), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>, 'pickup_community_area_xf': <tf.Tensor: shape=(32, 66), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>, 'trip_start_month_xf': <tf.Tensor: shape=(32, 22), dtype=float32, numpy=
array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.]], dtype=float32)>, 'fare_xf': <tf.Tensor: shape=(32,), dtype=float32, numpy=
array([ 0.0610606 , -0.9546066 ,  1.2521242 , -0.4773654 ,  0.40369532,
        0.38737947,  1.6600226 ,  2.1821327 ,  0.24053593, -0.6894726 ,
        2.932666  , -0.3957857 ,  0.22422   , -0.15104656, -0.70578855,
        0.74633014, -0.29789004, -0.3957857 , -0.9546066 ,  0.04474468,
       -0.6894726 , -0.23262626, -0.4773654 , -0.36315382, -0.08578287,
        2.867402  ,  1.855814  ,  0.9910692 ,  0.04474468, -0.6405248 ,
       -0.46104944, -0.36315382], dtype=float32)>}, <tf.Tensor: shape=(32,), dtype=int64, numpy=
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1])>)

#计算有几个实例（在batch后，所以这里计算几个batch）
dataset.reduce(0, lambda x, _: x + 1).numpy()

2023-06-27 11:07:38.782656: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_9' with dtype float
	 [[{{node Placeholder/_9}}]]





469

eval_dataset = dataset.take(100)
train_dataset = dataset.skip(100)

loaded_model.compile(
      loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
      optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
      metrics=[tf.keras.metrics.BinaryAccuracy()])
loaded_model.fit(train_dataset,epochs=10,validation_data=eval_dataset)

2023-06-27 11:07:39.767286: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_31' with dtype int64
	 [[{{node Placeholder/_31}}]]


Epoch 1/10
    355/Unknown - 2s 3ms/step - loss: 0.1946 - binary_accuracy: 0.9396

2023-06-27 11:07:42.644332: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_47' with dtype int64
	 [[{{node Placeholder/_47}}]]


369/369 [==============================] - 3s 6ms/step - loss: 0.1922 - binary_accuracy: 0.9401 - val_loss: 0.1629 - val_binary_accuracy: 0.9459
Epoch 2/10
369/369 [==============================] - 2s 4ms/step - loss: 0.1210 - binary_accuracy: 0.9553 - val_loss: 0.1560 - val_binary_accuracy: 0.9444
Epoch 3/10
369/369 [==============================] - 2s 4ms/step - loss: 0.1022 - binary_accuracy: 0.9611 - val_loss: 0.1599 - val_binary_accuracy: 0.9403
Epoch 4/10
369/369 [==============================] - 2s 4ms/step - loss: 0.0879 - binary_accuracy: 0.9659 - val_loss: 0.1878 - val_binary_accuracy: 0.9303
Epoch 5/10
369/369 [==============================] - 2s 5ms/step - loss: 0.0770 - binary_accuracy: 0.9712 - val_loss: 0.2153 - val_binary_accuracy: 0.9241
Epoch 6/10
369/369 [==============================] - 2s 5ms/step - loss: 0.0761 - binary_accuracy: 0.9709 - val_loss: 0.2313 - val_binary_accuracy: 0.9141
Epoch 7/10
369/369 [==============================] - 2s 4ms/step - loss: 0.0772 - binary_accuracy: 0.9692 - val_loss: 0.2275 - val_binary_accuracy: 0.9287
Epoch 8/10
369/369 [==============================] - 2s 4ms/step - loss: 0.0667 - binary_accuracy: 0.9742 - val_loss: 0.2536 - val_binary_accuracy: 0.9228
Epoch 9/10
369/369 [==============================] - 2s 4ms/step - loss: 0.0495 - binary_accuracy: 0.9818 - val_loss: 0.2533 - val_binary_accuracy: 0.9278
Epoch 10/10
369/369 [==============================] - 2s 4ms/step - loss: 0.0428 - binary_accuracy: 0.9848 - val_loss: 0.3007 - val_binary_accuracy: 0.9197





<keras.callbacks.History at 0x7f02980733a0>

Test TFMA

# from google.protobuf import text_format
# import tensorflow_model_analysis as tfma
# import os
# keras_eval_config = text_format.Parse("""
#     model_specs {
#         signature_name: "serving_default"
#         label_key: "tips"
#         preprocessing_function_names: "transform_features"
#     }
#     metrics_specs {
#         metrics {
#             class_name: "ExampleCount"
#         }
#         metrics {
#             class_name: "Calibration"
#         }
#         metrics {
#             class_name: "CalibrationPlot"
#         }
#         metrics {
#             class_name: "ConfusionMatrixPlot"
#         }
#         metrics {
#             class_name: "FairnessIndicators"
#             config: '{"thresholds":[0.1, 0.3, 0.5, 0.7, 0.9]}'
#         }
#         metrics {
#             class_name: "AUC"
#             threshold {
#                 value_threshold {
#                     lower_bound {
#                         value: 0.5
#                     }
#                 }
#             }
#         }
#     }
#     slicing_specs {}
#     slicing_specs {
#         feature_keys: ["trip_start_hour"]
#     }
#     slicing_specs {
#         feature_keys: ["payment_type"]
#     }
#     options {
#         compute_confidence_intervals { value: False }
#         disabled_outputs { values: "analysis" }
#     }
# """,tfma.EvalConfig())

# _serving_model_dir = os.path.join(
#     '.', 'serving_model/taxi_simple')
# model_dirs = [item for item in os.scandir(_serving_model_dir) if item.is_dir()]
# model_path = max(model_dirs,key=lambda i:int(i.name)).path

# keras_eval_shared_model = tfma.default_eval_shared_model(
#     eval_saved_model_path=model_path,
#     eval_config=keras_eval_config
# )

# keras_output_path = os.path.join('./evalresult','keras')
# keras_eval_result = tfma.run_model_analysis(
#     eval_shared_model=keras_eval_shared_model,
#     eval_config=keras_eval_config,
#     data_location='./tt.tfrecord',
#     output_path=keras_output_path
# )




#使用类
from google.protobuf.wrappers_pb2 import BoolValue

keras_eval_config = tfma.EvalConfig(
    model_specs=[
        # This assumes a serving model with signature 'serving_default'. If
        # using estimator based EvalSavedModel, add signature_name: 'eval' and 
        # remove the label_key.
        tfma.ModelSpec(label_key="tips",signature_name='serving_default',
                       preprocessing_function_names=['transform_features'])
    ],
    metrics_specs=[
        tfma.MetricsSpec(
            # The metrics added here are in addition to those saved with the
            # model (assuming either a keras model or EvalSavedModel is used).
            # Any metrics added into the saved model (for example using
            # model.compile(..., metrics=[...]), etc) will be computed
            # automatically.
            # To add validation thresholds for metrics saved with the model,
            # add them keyed by metric name to the thresholds map.
            metrics=[
                tfma.MetricConfig(class_name='ExampleCount'),
                tfma.MetricConfig(class_name='Calibration'),
                tfma.MetricConfig(class_name='CalibrationPlot'),
                tfma.MetricConfig(class_name='ConfusionMatrixPlot'),
                tfma.MetricConfig(class_name='FairnessIndicators',
                                  config='{ "thresholds": [0.1, 0.3, 0.5, 0.7, 0.9] }'),
                tfma.MetricConfig(class_name='AUC',
                                 threshold=tfma.MetricThreshold(
                                 value_threshold=tfma.GenericValueThreshold(
                                     lower_bound={'value':0.5})))
            ]
        )
    ]
    +tfma.metrics.default_binary_classification_specs() #默认的指标
    ,
    slicing_specs=[
        # An empty slice spec means the overall slice, i.e. the whole dataset.
        tfma.SlicingSpec(),
        tfma.SlicingSpec(feature_keys=['trip_start_hour']),
        tfma.SlicingSpec(feature_keys=['payment_type'])
    ],
    options = tfma.Options(compute_confidence_intervals=BoolValue(value=False))
)
#与上面一样
_serving_model_dir = os.path.join(
    '.', 'serving_model/taxi_simple')
model_dirs = [item for item in os.scandir(_serving_model_dir) if item.is_dir()]
model_path = max(model_dirs,key=lambda i:int(i.name)).path

keras_eval_shared_model = tfma.default_eval_shared_model(
    eval_saved_model_path=model_path,
    eval_config=keras_eval_config
)

keras_output_path = os.path.join('./evalresult','keras')
keras_eval_result = tfma.run_model_analysis(
    eval_shared_model=keras_eval_shared_model,
    eval_config=keras_eval_config,
    data_location='./tt.tfrecord',
    output_path=keras_output_path
)

WARNING:tensorflow:Inconsistent references when loading the checkpoint into this object graph. For example, in the saved checkpoint object, `model.layer.weight` and `model.layer_copy.weight` reference the same variable, while in the current object these are two different variables. The referenced variables are:(<keras.saving.legacy.saved_model.load.TensorFlowTransform>TransformFeaturesLayer object at 0x7f026f1bfb20> and <keras.engine.input_layer.InputLayer object at 0x7f02000a0f70>).


WARNING:absl:Tensorflow version (2.12.0) found. Note that TFMA support for TF 2.0 is currently in beta
INFO:absl:Request was made to ignore the baseline ModelSpec and any change thresholds. This is likely because a baseline model was not provided: updated_config=
model_specs {
  signature_name: "serving_default"
  label_key: "tips"
  preprocessing_function_names: "transform_features"
}
slicing_specs {
}
slicing_specs {
  feature_keys: "trip_start_hour"
}
slicing_specs {
  feature_keys: "payment_type"
}
...省略

keras_eval_config

model_specs {
  signature_name: "serving_default"
  label_key: "tips"
  preprocessing_function_names: "transform_features"
}
slicing_specs {
}
slicing_specs {
  feature_keys: "trip_start_hour"
}
slicing_specs {
  feature_keys: "payment_type"
}
metrics_specs {
  metrics {
    class_name: "ExampleCount"
  }
  metrics {
    class_name: "Calibration"
  }
  metrics {
    class_name: "CalibrationPlot"
  }
  metrics {
    class_name: "ConfusionMatrixPlot"
  }
  metrics {
    class_name: "FairnessIndicators"
    config: "{ \"thresholds\": [0.1, 0.3, 0.5, 0.7, 0.9] }"
  }
  metrics {
    class_name: "AUC"
    threshold {
      value_threshold {
        lower_bound {
          value: 0.5
        }
      }
    }
  }
}
metrics_specs {
  metrics {
    class_name: "ExampleCount"
    config: "{\"name\": \"example_count\"}"
  }
  example_weights {
    unweighted: true
  }
}
metrics_specs {
  metrics {
    class_name: "WeightedExampleCount"
    config: "{\"name\": \"weighted_example_count\"}"
  }
  example_weights {
    weighted: true
  }
}
metrics_specs {
  metrics {
    class_name: "BinaryAccuracy"
    config: "{\"name\": \"binary_accuracy\"}"
  }
  metrics {
    class_name: "AUC"
    config: "{\"curve\": \"ROC\", \"name\": \"auc\", \"num_thresholds\": 10000, \"summation_method\": \"interpolation\"}"
  }
  metrics {
    class_name: "AUC"
    config: "{\"curve\": \"PR\", \"name\": \"auc_precison_recall\", \"num_thresholds\": 10000, \"summation_method\": \"interpolation\"}"
  }
  metrics {
    class_name: "Precision"
    config: "{\"name\": \"precision\"}"
  }
  metrics {
    class_name: "Recall"
    config: "{\"name\": \"recall\"}"
  }
  metrics {
    class_name: "MeanLabel"
    config: "{\"name\": \"mean_label\"}"
  }
  metrics {
    class_name: "MeanPrediction"
    config: "{\"name\": \"mean_prediction\"}"
  }
  metrics {
    class_name: "Calibration"
    config: "{\"name\": \"calibration\"}"
  }
  metrics {
    class_name: "ConfusionMatrixPlot"
    config: "{\"name\": \"confusion_matrix_plot\", \"num_thresholds\": 1000}"
  }
  metrics {
    class_name: "CalibrationPlot"
    config: "{\"left\": null, \"name\": \"calibration_plot\", \"num_buckets\": 1000, \"right\": null}"
  }
  metrics {
    class_name: "BinaryCrossentropy"
    config: "{\"dtype\": \"float32\", \"from_logits\": false, \"label_smoothing\": 0, \"name\": \"loss\"}"
  }
}
options {
  compute_confidence_intervals {
  }
}

keras_eval_result

EvalResult(slicing_metrics=[((), {'': {'': {'binary_accuracy': {'doubleValue': 0.9538061591787762}, 'loss': {'doubleValue': 0.6815148591995239}, 'example_count': {'doubleValue': 15002.0}, 'weighted_example_count': {'doubleValue': 15002.0}, 'calibration': {'doubleValue': -141.13269912566062}, 'fairness_indicators_metrics/false_positive_rate@0.1': {'doubleValue': 0.03674698795180723}, 'fairness_indicators_metrics/false_negative_rate@0.1': {'doubleValue': 0.07894736842105263}, 'fairness_indicators_metrics/true_positive_rate@0.1': {'doubleValue': 0.9210526315789473}, 'fairness_indicators_metrics/true_negative_rate@0.1': {'doubleValue': 0.9632530120481928}, 'fairness_indicators_metrics/positive_rate@0.1': {'doubleValue': 0.23610185308625517}, 'fairness_indicators_metrics/negative_rate@0.1': {'doubleValue': 0.7638981469137448}, 'fairness_indicators_metrics/false_discovery_rate@0.1': {'doubleValue': 0.12055335968379446}, 'fairness_indicators_metrics/false_omission_rate@0.1': {'doubleValue': 0.02329842931937173}, 'fairness_indicators_metrics/precision@0.1': {'doubleValue': 0.8794466403162056}, 'fairness_indicators_metrics/recall@0.1': {'doubleValue': 0.9210526315789473}, 'fairness_indicators_metrics/false_positive_rate@0.3': {'doubleValue': 0.03571428571428571}, 'fairness_indicators_metrics/false_negative_rate@0.3': {'doubleValue': 0.08042578356002365}, 'fairness_indicators_metrics/true_positive_rate@0.3': {'doubleValue': 0.9195742164399764}, 'fairness_indicators_metrics/true_negative_rate@0.3': {'doubleValue': 0.9642857142857143}, 'fairness_indicators_metrics/positive_rate@0.3': {'doubleValue': 0.23496867084388748}, 'fairness_indicators_metrics/negative_rate@0.3': ......省略

#获取metrics的所有的pandas
dfs=tfma.experimental.dataframe.metrics_as_dataframes(tfma.load_metrics(keras_output_path))
dfs.double_value

	slices			metric_keys					metric_values
	Overall	trip_start_hour	payment_type	name	model_name	output_name	is_diff	example_weighted	double_value
0		NaN	NaN	binary_accuracy			False	NaN	0.953806
1		NaN	NaN	loss			False	NaN	0.358257
2		NaN	NaN	loss			False	False	0.681515
3		NaN	NaN	example_count			False	False	15002.000000
4		NaN	NaN	weighted_example_count			False	True	15002.000000
...	...	...	...	...	...	...	...	...	...
2011	NaN	NaN	b'Pcard'	auc_precison_recall			False	False	0.000000
2012	NaN	NaN	b'Pcard'	precision			False	False	NaN
2013	NaN	NaN	b'Pcard'	recall			False	False	NaN
2014	NaN	NaN	b'Pcard'	mean_label			False	False	0.000000
2015	NaN	NaN	b'Pcard'	mean_prediction			False	False	-23.387853

2016 rows × 9 columns

#切片为行，评价为列
tfma.experimental.dataframe.auto_pivot(dfs.double_value)

(metric_keys, name)	binary_accuracy	fairness_indicators_metrics/precision@0.5	fairness_indicators_metrics/recall@0.5	fairness_indicators_metrics/false_positive_rate@0.7	fairness_indicators_metrics/false_negative_rate@0.7	fairness_indicators_metrics/true_positive_rate@0.7	fairness_indicators_metrics/true_negative_rate@0.7	fairness_indicators_metrics/positive_rate@0.7	fairness_indicators_metrics/negative_rate@0.7	fairness_indicators_metrics/false_discovery_rate@0.7	...	fairness_indicators_metrics/positive_rate@0.3	fairness_indicators_metrics/negative_rate@0.3	fairness_indicators_metrics/false_discovery_rate@0.3	fairness_indicators_metrics/false_omission_rate@0.3	fairness_indicators_metrics/precision@0.3	fairness_indicators_metrics/false_positive_rate@0.3	fairness_indicators_metrics/false_positive_rate@0.5	fairness_indicators_metrics/false_negative_rate@0.5	fairness_indicators_metrics/true_positive_rate@0.5	fairness_indicators_metrics/true_negative_rate@0.5
(metric_keys, example_weighted)	NaN	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
slices
Overall:	0.953806	0.882721	0.916913	0.035026	0.085157	0.914843	0.964974	0.233369	0.766631	0.116252	...	0.234969	0.765031	0.117730	0.023700	0.882270	0.035714	0.035456	0.083087	0.916913	0.964544
payment_type:b'Cash'	0.999697	1.000000	0.250000	0.000000	0.750000	0.250000	1.000000	0.000101	0.999899	0.000000	...	0.000101	0.999899	0.000000	0.000303	1.000000	0.000000	0.000000	0.750000	0.250000	1.000000
payment_type:b'Credit Card'	0.862513	0.883376	0.918470	0.252809	0.083605	0.916395	0.747191	0.702714	0.297286	0.115847	...	0.707337	0.292663	0.117079	0.182692	0.882921	0.257179	0.255306	0.081530	0.918470	0.744694
payment_type:b'Dispute'	1.000000	NaN	NaN	0.000000	NaN	NaN	1.000000	0.000000	1.000000	NaN	...	0.000000	1.000000	NaN	0.000000	NaN	0.000000	0.000000	NaN	NaN	1.000000
payment_type:b'No Charge'	0.987654	0.000000	NaN	0.000000	NaN	NaN	1.000000	0.000000	1.000000	NaN	...	0.012346	0.987654	1.000000	0.000000	0.000000	0.012346	0.012346	NaN	NaN	0.987654
payment_type:b'Pcard'	1.000000	NaN	NaN	0.000000	NaN	NaN	1.000000	0.000000	1.000000	NaN	...	0.000000	1.000000	NaN	0.000000	NaN	0.000000	0.000000	NaN	NaN	1.000000
payment_type:b'Prcard'	0.000000	0.000000	NaN	1.000000	NaN	NaN	0.000000	1.000000	0.000000	1.000000	...	1.000000	0.000000	1.000000	NaN	0.000000	1.000000	1.000000	NaN	NaN	0.000000
payment_type:b'Unknown'	0.862069	0.666667	0.400000	0.041667	0.600000	0.400000	0.958333	0.103448	0.896552	0.333333	...	0.103448	0.896552	0.333333	0.115385	0.666667	0.041667	0.041667	0.600000	0.400000	0.958333
trip_start_hour:0.0	0.968439	0.933333	0.926471	0.019313	0.073529	0.926471	0.980687	0.224252	0.775748	0.066667	...	0.224252	0.775748	0.066667	0.021413	0.933333	0.019313	0.019313	0.073529	0.926471	0.980687
trip_start_hour:1.0	0.946691	0.883212	0.902985	0.039024	0.104478	0.895522	0.960976	0.250000	0.750000	0.117647	...	0.251838	0.748162	0.116788	0.031941	0.883212	0.039024	0.039024	0.097015	0.902985	0.960976
trip_start_hour:10.0	0.970888	0.928994	0.951515	0.022989	0.048485	0.951515	0.977011	0.245997	0.754003	0.071006	...	0.247453	0.752547	0.076471	0.015474	0.923529	0.024904	0.022989	0.048485	0.951515	0.977011
trip_start_hour:11.0	0.951535	0.869281	0.930070	0.042017	0.069930	0.930070	0.957983	0.247173	0.752827	0.130719	...	0.250404	0.749596	0.129032	0.017241	0.870968	0.042017	0.042017	0.069930	0.930070	0.957983
trip_start_hour:12.0	0.958389	0.895954	0.922619	0.031196	0.077381	0.922619	0.968804	0.232215	0.767785	0.104046	...	0.232215	0.767785	0.104046	0.022727	0.895954	0.031196	0.031196	0.077381	0.922619	0.968804
trip_start_hour:13.0	0.948424	0.875000	0.897436	0.036900	0.102564	0.897436	0.963100	0.229226	0.770774	0.125000	...	0.229226	0.770774	0.125000	0.029740	0.875000	0.036900	0.036900	0.102564	0.897436	0.963100
trip_start_hour:14.0	0.942935	0.832432	0.933333	0.050788	0.066667	0.933333	0.949212	0.248641	0.751359	0.158470	...	0.251359	0.748641	0.167568	0.019964	0.832432	0.054291	0.054291	0.066667	0.933333	0.945709
trip_start_hour:15.0	0.957064	0.885135	0.903448	0.029463	0.096552	0.903448	0.970537	0.204986	0.795014	0.114865	...	0.206371	0.793629	0.120805	0.024433	0.879195	0.031196	0.029463	0.096552	0.903448	0.970537
trip_start_hour:16.0	0.937090	0.860104	0.887701	0.046875	0.117647	0.882353	0.953125	0.251638	0.748362	0.140625	...	0.252949	0.747051	0.139896	0.036842	0.860104	0.046875	0.046875	0.112299	0.887701	0.953125
trip_start_hour:17.0	0.948130	0.881356	0.876404	0.032258	0.123596	0.876404	0.967742	0.213510	0.786490	0.118644	...	0.213510	0.786490	0.118644	0.033742	0.881356	0.032258	0.032258	0.123596	0.876404	0.967742
trip_start_hour:18.0	0.955367	0.902542	0.918103	0.031030	0.081897	0.918103	0.968970	0.249734	0.750266	0.093617	...	0.250797	0.749203	0.097458	0.026950	0.902542	0.032440	0.032440	0.081897	0.918103	0.967560
trip_start_hour:19.0	0.960396	0.895349	0.946721	0.035248	0.053279	0.946721	0.964752	0.255446	0.744554	0.104651	...	0.256436	0.743564	0.104247	0.015979	0.895753	0.035248	0.035248	0.053279	0.946721	0.964752
trip_start_hour:2.0	0.959427	0.862745	0.967033	0.039634	0.032967	0.967033	0.960366	0.241050	0.758950	0.128713	...	0.243437	0.756563	0.137255	0.009464	0.862745	0.042683	0.042683	0.032967	0.967033	0.957317
trip_start_hour:20.0	0.963768	0.896996	0.950000	0.032172	0.050000	0.950000	0.967828	0.241201	0.758799	0.103004	...	0.242236	0.757764	0.102564	0.013661	0.897436	0.032172	0.032172	0.050000	0.950000	0.967828
trip_start_hour:21.0	0.960621	0.892857	0.935829	0.032258	0.064171	0.935829	0.967742	0.233890	0.766110	0.107143	...	0.235084	0.764916	0.106599	0.017161	0.893401	0.032258	0.032258	0.064171	0.935829	0.967742
trip_start_hour:22.0	0.954600	0.903061	0.903061	0.029641	0.096939	0.903061	0.970359	0.234170	0.765830	0.096939	...	0.236559	0.763441	0.095960	0.026604	0.904040	0.029641	0.029641	0.096939	0.903061	0.970359
trip_start_hour:23.0	0.950345	0.891892	0.868421	0.027923	0.151316	0.848684	0.972077	0.200000	0.800000	0.110345	...	0.204138	0.795862	0.108108	0.034662	0.891892	0.027923	0.027923	0.131579	0.868421	0.972077
trip_start_hour:3.0	0.941781	0.838235	0.904762	0.048035	0.095238	0.904762	0.951965	0.232877	0.767123	0.161765	...	0.239726	0.760274	0.171429	0.022523	0.828571	0.052402	0.048035	0.095238	0.904762	0.951965
trip_start_hour:4.0	0.959391	0.842105	0.941176	0.036810	0.058824	0.941176	0.963190	0.192893	0.807107	0.157895	...	0.192893	0.807107	0.157895	0.012579	0.842105	0.036810	0.036810	0.058824	0.941176	0.963190
trip_start_hour:5.0	0.930070	0.842105	0.888889	0.056075	0.111111	0.888889	0.943925	0.265734	0.734266	0.157895	...	0.265734	0.734266	0.157895	0.038095	0.842105	0.056075	0.056075	0.111111	0.888889	0.943925
trip_start_hour:6.0	0.948187	0.897436	0.853659	0.026316	0.146341	0.853659	0.973684	0.202073	0.797927	0.102564	...	0.202073	0.797927	0.102564	0.038961	0.897436	0.026316	0.026316	0.146341	0.853659	0.973684
trip_start_hour:7.0	0.956954	0.849057	0.900000	0.031746	0.100000	0.900000	0.968254	0.175497	0.824503	0.150943	...	0.175497	0.824503	0.150943	0.020080	0.849057	0.031746	0.031746	0.100000	0.900000	0.968254
trip_start_hour:8.0	0.949057	0.853659	0.921053	0.043269	0.078947	0.921053	0.956731	0.232075	0.767925	0.146341	...	0.232075	0.767925	0.146341	0.022113	0.853659	0.043269	0.043269	0.078947	0.921053	0.956731
trip_start_hour:9.0	0.942771	0.845161	0.903448	0.044316	0.110345	0.889655	0.955684	0.228916	0.771084	0.151316	...	0.234940	0.765060	0.153846	0.025591	0.846154	0.046243	0.046243	0.096552	0.903448	0.953757

32 rows × 63 columns

#fileter slices
df_double = dfs.double_value
df_filtered = df_double.loc[df_double.slices.trip_start_hour.isin([1,3,5,7])]
tfma.experimental.dataframe.auto_pivot(df_filtered)

(metric_keys, name)	binary_accuracy	fairness_indicators_metrics/precision@0.5	fairness_indicators_metrics/recall@0.5	fairness_indicators_metrics/false_positive_rate@0.7	fairness_indicators_metrics/false_negative_rate@0.7	fairness_indicators_metrics/true_positive_rate@0.7	fairness_indicators_metrics/true_negative_rate@0.7	fairness_indicators_metrics/positive_rate@0.7	fairness_indicators_metrics/negative_rate@0.7	fairness_indicators_metrics/false_discovery_rate@0.7	...	fairness_indicators_metrics/false_positive_rate@0.5	fairness_indicators_metrics/precision@0.3	fairness_indicators_metrics/recall@0.3	fairness_indicators_metrics/false_discovery_rate@0.3	fairness_indicators_metrics/negative_rate@0.3	fairness_indicators_metrics/positive_rate@0.3	fairness_indicators_metrics/true_negative_rate@0.3	fairness_indicators_metrics/true_positive_rate@0.3	fairness_indicators_metrics/false_negative_rate@0.3	fairness_indicators_metrics/false_omission_rate@0.3
(metric_keys, example_weighted)	NaN	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
slices
trip_start_hour:1.0	0.946691	0.883212	0.902985	0.039024	0.104478	0.895522	0.960976	0.250000	0.750000	0.117647	...	0.039024	0.883212	0.902985	0.116788	0.748162	0.251838	0.960976	0.902985	0.097015	0.031941
trip_start_hour:3.0	0.941781	0.838235	0.904762	0.048035	0.095238	0.904762	0.951965	0.232877	0.767123	0.161765	...	0.048035	0.828571	0.920635	0.171429	0.760274	0.239726	0.947598	0.920635	0.079365	0.022523
trip_start_hour:5.0	0.930070	0.842105	0.888889	0.056075	0.111111	0.888889	0.943925	0.265734	0.734266	0.157895	...	0.056075	0.842105	0.888889	0.157895	0.734266	0.265734	0.943925	0.888889	0.111111	0.038095
trip_start_hour:7.0	0.956954	0.849057	0.900000	0.031746	0.100000	0.900000	0.968254	0.175497	0.824503	0.150943	...	0.031746	0.849057	0.900000	0.150943	0.824503	0.175497	0.968254	0.900000	0.100000	0.020080

4 rows × 63 columns

#sort metric values
tfma.experimental.dataframe.auto_pivot(df_filtered).sort_values(by=('auc',False),ascending=True)

(metric_keys, name)	binary_accuracy	fairness_indicators_metrics/precision@0.5	fairness_indicators_metrics/recall@0.5	fairness_indicators_metrics/false_positive_rate@0.7	fairness_indicators_metrics/false_negative_rate@0.7	fairness_indicators_metrics/true_positive_rate@0.7	fairness_indicators_metrics/true_negative_rate@0.7	fairness_indicators_metrics/positive_rate@0.7	fairness_indicators_metrics/negative_rate@0.7	fairness_indicators_metrics/false_discovery_rate@0.7	...	fairness_indicators_metrics/false_positive_rate@0.5	fairness_indicators_metrics/precision@0.3	fairness_indicators_metrics/recall@0.3	fairness_indicators_metrics/false_discovery_rate@0.3	fairness_indicators_metrics/negative_rate@0.3	fairness_indicators_metrics/positive_rate@0.3	fairness_indicators_metrics/true_negative_rate@0.3	fairness_indicators_metrics/true_positive_rate@0.3	fairness_indicators_metrics/false_negative_rate@0.3	fairness_indicators_metrics/false_omission_rate@0.3
(metric_keys, example_weighted)	NaN	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
slices
trip_start_hour:5.0	0.930070	0.842105	0.888889	0.056075	0.111111	0.888889	0.943925	0.265734	0.734266	0.157895	...	0.056075	0.842105	0.888889	0.157895	0.734266	0.265734	0.943925	0.888889	0.111111	0.038095
trip_start_hour:1.0	0.946691	0.883212	0.902985	0.039024	0.104478	0.895522	0.960976	0.250000	0.750000	0.117647	...	0.039024	0.883212	0.902985	0.116788	0.748162	0.251838	0.960976	0.902985	0.097015	0.031941
trip_start_hour:7.0	0.956954	0.849057	0.900000	0.031746	0.100000	0.900000	0.968254	0.175497	0.824503	0.150943	...	0.031746	0.849057	0.900000	0.150943	0.824503	0.175497	0.968254	0.900000	0.100000	0.020080
trip_start_hour:3.0	0.941781	0.838235	0.904762	0.048035	0.095238	0.904762	0.951965	0.232877	0.767123	0.161765	...	0.048035	0.828571	0.920635	0.171429	0.760274	0.239726	0.947598	0.920635	0.079365	0.022523

4 rows × 63 columns

#上面的slicing_specs配置是需要在哪些切片上计算的配置
#如果要显示某切片上的metrics需要指定，否则这里显示overall
tfma.view.render_slicing_metrics(keras_eval_result,slicing_column='trip_start_hour')

在这里插入图片描述

SlicingMetricsViewer(config={'weightedExamplesColumn': 'example_count'}, data=[{'slice': 'trip_start_hour:19',…

#Rendering Plots
tfma.view.render_plot(keras_eval_result)

在这里插入图片描述

PlotViewer(config={'sliceName': 'Overall', 'metricKeys': {'calibrationPlot': {'metricName': 'calibrationHistog…

tfma.addons.fairness.view.widget_view.render_fairness_indicator(keras_eval_result)

在这里插入图片描述

FairnessIndicatorViewer(slicingMetrics=[{'sliceValue': 'Overall', 'slice': 'Overall', 'metrics': {'binary_accu…

起名大废废

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
TFX完整流水线示例以及补充（下）

TFX完整流水线示例以及补充（下），包含从Trainer到完以及补充测试。
复制链接

扫一扫

专栏目录