测试数据集在https://download.csdn.net/download/T_eddy/87952418
import tensorflow as tf
import tfx
from tfx. orchestration. experimental. interactive. interactive_context import InteractiveContext
2023-06-27 09:44:32.218831: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-27 09:44:33.095143: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/TensorRT/lib:/usr/local/cuda-11.7/lib64
2023-06-27 09:44:33.095239: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/TensorRT/lib:/usr/local/cuda-11.7/lib64
2023-06-27 09:44:33.095248: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
import tensorflow as tf
print ( 'TensorFlow version: {}' . format ( tf. __version__) )
from tfx import v1 as tfx
print ( 'TFX version: {}' . format ( tfx. __version__) )
TensorFlow version: 2.11.1
TFX version: 1.12.0
context = InteractiveContext( )
WARNING:absl:InteractiveContext pipeline_root argument not provided: using temporary directory /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1 as root for pipeline outputs.
WARNING:absl:InteractiveContext metadata_connection_config not provided: using SQLite ML Metadata database at /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1/metadata.sqlite.
CsvExampleGen输入组件(可忽略)
import os
from tfx. components import CsvExampleGen
from tfx. proto import example_gen_pb2
base_dir = os. getcwd( )
data_dir = os. path. join( base_dir, 'data' )
input_config = example_gen_pb2. Input( splits= [
example_gen_pb2. Input. Split( name= 'train' , pattern= 'train/*' ) ,
example_gen_pb2. Input. Split( name= 'eval' , pattern= 'eval/*' )
] )
example_gen = CsvExampleGen( input_base= data_dir, input_config= input_config)
context. run( example_gen)
WARNING:apache_beam.runners.interactive.interactive_environment:Dependencies required for Interactive Beam PCollection visualization are not available, please use: `pip install apache-beam[interactive]` to install necessary dependencies to enable all data visualization features.
WARNING:apache_beam.io.tfrecordio:Couldn't find python-snappy so the implementation of _TFRecordUtil._masked_crc32c is not as fast as it could be.
ExecutionResult
at 0x7f15225d22b0
.execution_id 1 .component
Artifact of type
'Examples' (uri: /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1/CsvExampleGen/examples/1)
at 0x7f147a5d5910
.type <class 'tfx.types.standard_artifacts.Examples'> .uri /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1/CsvExampleGen/examples/1 .span 0 .split_names ["train", "eval"] .version 0
TFDV操作
import tensorflow_data_validation as tfdv
statistics生成、可视化
stats = tfdv. generate_statistics_from_csv( './data/train/train_db_0.csv' )
WARNING:tensorflow:From /home/xzy/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow_data_validation/utils/statistics_io_impl.py:91: tf_record_iterator (from tensorflow.python.lib.io.tf_record) is deprecated and will be removed in a future version.
Instructions for updating:
Use eager execution and:
`tf.data.TFRecordDataset(path)`
WARNING:tensorflow:From /home/xzy/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow_data_validation/utils/statistics_io_impl.py:91: tf_record_iterator (from tensorflow.python.lib.io.tf_record) is deprecated and will be removed in a future version.
Instructions for updating:
Use eager execution and:
`tf.data.TFRecordDataset(path)`
tfdv. visualize_statistics( stats)
schema生成、修改、可视化
schema = tfdv. infer_schema( stats)
schema
feature {
name: "Type"
type: BYTES
domain: "Type"
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "Age"
type: INT
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "Breed1"
type: BYTES
domain: "Breed1"
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "Gender"
type: BYTES
domain: "Gender"
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "Color1"
type: BYTES
domain: "Color1"
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "Color2"
type: BYTES
domain: "Color2"
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "MaturitySize"
type: BYTES
domain: "MaturitySize"
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "FurLength"
type: BYTES
domain: "FurLength"
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "Vaccinated"
type: BYTES
domain: "Vaccinated"
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "Sterilized"
type: BYTES
domain: "Sterilized"
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "Health"
type: BYTES
domain: "Health"
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "Fee"
type: INT
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "Description"
value_count {
min: 1
max: 1
}
type: BYTES
presence {
min_count: 1
}
}
feature {
name: "PhotoAmt"
type: INT
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
feature {
name: "AdoptionSpeed"
type: INT
presence {
min_fraction: 1.0
min_count: 1
}
shape {
dim {
size: 1
}
}
}
string_domain {
name: "Type"
value: "Cat"
value: "Dog"
}
string_domain {
name: "Breed1"
value: "Abyssinian"
value: "American Curl"
value: "American Shorthair"
value: "American Wirehair"
value: "Australian Kelpie"
value: "Basenji"
value: "Basset Hound"
value: "Beagle"
value: "Bedlington Terrier"
value: "Belgian Shepherd Laekenois"
value: "Belgian Shepherd Malinois"
value: "Bengal"
value: "Black Labrador Retriever"
value: "Bobtail"
value: "Bombay"
value: "Border Collie"
value: "Boston Terrier"
value: "Boxer"
value: "British Shorthair"
value: "Bull Terrier"
value: "Bullmastiff"
value: "Burmese"
value: "Calico"
value: "Cattle Dog"
value: "Cavalier King Charles Spaniel"
value: "Chartreux"
value: "Chihuahua"
value: "Chow Chow"
value: "Cocker Spaniel"
value: "Collie"
value: "Coonhound"
value: "Corgi"
value: "Cymric"
value: "Dachshund"
value: "Dalmatian"
value: "Doberman Pinscher"
value: "Domestic Long Hair"
value: "Domestic Medium Hair"
value: "Domestic Short Hair"
value: "Egyptian Mau"
value: "English Bulldog"
value: "English Cocker Spaniel"
value: "Fox Terrier"
value: "French Bulldog"
value: "German Pinscher"
value: "German Shepherd Dog"
value: "German Spitz"
value: "Golden Retriever"
value: "Hound"
value: "Husky"
value: "Irish Setter"
value: "Jack Russell Terrier"
value: "Jack Russell Terrier (Parson Russell Terrier)"
value: "Japanese Bobtail"
value: "Javanese"
value: "Korat"
value: "Labrador Retriever"
value: "Lhasa Apso"
value: "Maine Coon"
value: "Maltese"
value: "Miniature Pinscher"
value: "Mixed Breed"
value: "Norwegian Forest Cat"
value: "Ocicat"
value: "Oriental Long Hair"
value: "Oriental Short Hair"
value: "Oriental Tabby"
value: "Pekingese"
value: "Persian"
value: "Pit Bull Terrier"
value: "Pomeranian"
value: "Poodle"
value: "Pug"
value: "Ragdoll"
value: "Rat Terrier"
value: "Rottweiler"
value: "Russian Blue"
value: "Samoyed"
value: "Schnauzer"
value: "Shar Pei"
value: "Shepherd"
value: "Shetland Sheepdog Sheltie"
value: "Shih Tzu"
value: "Siamese"
value: "Siberian"
value: "Siberian Husky"
value: "Silky Terrier"
value: "Singapura"
value: "Spitz"
value: "Tabby"
value: "Terrier"
value: "Tiger"
value: "Tortoiseshell"
value: "Toy Fox Terrier"
value: "Turkish Angora"
value: "Tuxedo"
value: "Wheaten Terrier"
value: "Whippet"
value: "Yellow Labrador Retriever"
}
string_domain {
name: "Gender"
value: "Female"
value: "Male"
}
string_domain {
name: "Color1"
value: "Black"
value: "Brown"
value: "Cream"
value: "Golden"
value: "Gray"
value: "White"
value: "Yellow"
}
string_domain {
name: "Color2"
value: "Brown"
value: "Cream"
value: "Golden"
value: "Gray"
value: "No Color"
value: "White"
value: "Yellow"
}
string_domain {
name: "MaturitySize"
value: "Large"
value: "Medium"
value: "Small"
}
string_domain {
name: "FurLength"
value: "Long"
value: "Medium"
value: "Short"
}
string_domain {
name: "Vaccinated"
value: "No"
value: "Not Sure"
value: "Yes"
}
string_domain {
name: "Sterilized"
value: "No"
value: "Not Sure"
value: "Yes"
}
string_domain {
name: "Health"
value: "Healthy"
value: "Minor Injury"
value: "Serious Injury"
}
tfdv. display_schema( schema)
Type Presence Valency Domain Feature name 'Type' STRING required 'Type' 'Age' INT required - 'Breed1' STRING required 'Breed1' 'Gender' STRING required 'Gender' 'Color1' STRING required 'Color1' 'Color2' STRING required 'Color2' 'MaturitySize' STRING required 'MaturitySize' 'FurLength' STRING required 'FurLength' 'Vaccinated' STRING required 'Vaccinated' 'Sterilized' STRING required 'Sterilized' 'Health' STRING required 'Health' 'Fee' INT required - 'Description' BYTES optional single - 'PhotoAmt' INT required - 'AdoptionSpeed' INT required -
Values Domain 'Type' 'Cat', 'Dog' 'Breed1' 'Abyssinian', 'American Curl', 'American Shorthair', 'American Wirehair', 'Australian Kelpie', 'Basenji', 'Basset Hound', 'Beagle', 'Bedlington Terrier', 'Belgian Shepherd Laekenois', 'Belgian Shepherd Malinois', 'Bengal', 'Black Labrador Retriever', 'Bobtail', 'Bombay', 'Border Collie', 'Boston Terrier', 'Boxer', 'British Shorthair', 'Bull Terrier', 'Bullmastiff', 'Burmese', 'Calico', 'Cattle Dog', 'Cavalier King Charles Spaniel', 'Chartreux', 'Chihuahua', 'Chow Chow', 'Cocker Spaniel', 'Collie', 'Coonhound', 'Corgi', 'Cymric', 'Dachshund', 'Dalmatian', 'Doberman Pinscher', 'Domestic Long Hair', 'Domestic Medium Hair', 'Domestic Short Hair', 'Egyptian Mau', 'English Bulldog', 'English Cocker Spaniel', 'Fox Terrier', 'French Bulldog', 'German Pinscher', 'German Shepherd Dog', 'German Spitz', 'Golden Retriever', 'Hound', 'Husky', 'Irish Setter', 'Jack Russell Terrier', 'Jack Russell Terrier (Parson Russell Terrier)', 'Japanese Bobtail', 'Javanese', 'Korat', 'Labrador Retriever', 'Lhasa Apso', 'Maine Coon', 'Maltese', 'Miniature Pinscher', 'Mixed Breed', 'Norwegian Forest Cat', 'Ocicat', 'Oriental Long Hair', 'Oriental Short Hair', 'Oriental Tabby', 'Pekingese', 'Persian', 'Pit Bull Terrier', 'Pomeranian', 'Poodle', 'Pug', 'Ragdoll', 'Rat Terrier', 'Rottweiler', 'Russian Blue', 'Samoyed', 'Schnauzer', 'Shar Pei', 'Shepherd', 'Shetland Sheepdog Sheltie', 'Shih Tzu', 'Siamese', 'Siberian', 'Siberian Husky', 'Silky Terrier', 'Singapura', 'Spitz', 'Tabby', 'Terrier', 'Tiger', 'Tortoiseshell', 'Toy Fox Terrier', 'Turkish Angora', 'Tuxedo', 'Wheaten Terrier', 'Whippet', 'Yellow Labrador Retriever' 'Gender' 'Female', 'Male' 'Color1' 'Black', 'Brown', 'Cream', 'Golden', 'Gray', 'White', 'Yellow' 'Color2' 'Brown', 'Cream', 'Golden', 'Gray', 'No Color', 'White', 'Yellow' 'MaturitySize' 'Large', 'Medium', 'Small' 'FurLength' 'Long', 'Medium', 'Short' 'Vaccinated' 'No', 'Not Sure', 'Yes' 'Sterilized' 'No', 'Not Sure', 'Yes' 'Health' 'Healthy', 'Minor Injury', 'Serious Injury'
tfdv. get_feature( schema, 'Description' ) . presence. min_fraction = 1.0
tfdv. get_domain( schema, 'Color1' )
name: "Color1"
value: "Black"
value: "Brown"
value: "Cream"
value: "Golden"
value: "Gray"
value: "White"
value: "Yellow"
from tensorflow_metadata. proto. v0 import schema_pb2
intdomain = schema_pb2. IntDomain( min = 0 , max = 120 )
sd_test= schema_pb2. StringDomain( value= [ 'Black' , 'Brown' , 'Cream' ] )
tfdv. set_domain( schema, 'Color1' , sd_test)
WARNING:root:Replacing existing domain of feature "Color1".
tfdv. set_domain( schema, 'Age' , intdomain)
tfdv. display_schema( schema)
Type Presence Valency Domain Feature name 'Type' STRING required 'Type' 'Age' INT required min: 0; max: 120 'Breed1' STRING required 'Breed1' 'Gender' STRING required 'Gender' 'Color1' STRING required 'Color1_domain' 'Color2' STRING required 'Color2' 'MaturitySize' STRING required 'MaturitySize' 'FurLength' STRING required 'FurLength' 'Vaccinated' STRING required 'Vaccinated' 'Sterilized' STRING required 'Sterilized' 'Health' STRING required 'Health' 'Fee' INT required - 'Description' BYTES required single - 'PhotoAmt' INT required - 'AdoptionSpeed' INT required -
Values Domain 'Type' 'Cat', 'Dog' 'Breed1' 'Abyssinian', 'American Curl', 'American Shorthair', 'American Wirehair', 'Australian Kelpie', 'Basenji', 'Basset Hound', 'Beagle', 'Bedlington Terrier', 'Belgian Shepherd Laekenois', 'Belgian Shepherd Malinois', 'Bengal', 'Black Labrador Retriever', 'Bobtail', 'Bombay', 'Border Collie', 'Boston Terrier', 'Boxer', 'British Shorthair', 'Bull Terrier', 'Bullmastiff', 'Burmese', 'Calico', 'Cattle Dog', 'Cavalier King Charles Spaniel', 'Chartreux', 'Chihuahua', 'Chow Chow', 'Cocker Spaniel', 'Collie', 'Coonhound', 'Corgi', 'Cymric', 'Dachshund', 'Dalmatian', 'Doberman Pinscher', 'Domestic Long Hair', 'Domestic Medium Hair', 'Domestic Short Hair', 'Egyptian Mau', 'English Bulldog', 'English Cocker Spaniel', 'Fox Terrier', 'French Bulldog', 'German Pinscher', 'German Shepherd Dog', 'German Spitz', 'Golden Retriever', 'Hound', 'Husky', 'Irish Setter', 'Jack Russell Terrier', 'Jack Russell Terrier (Parson Russell Terrier)', 'Japanese Bobtail', 'Javanese', 'Korat', 'Labrador Retriever', 'Lhasa Apso', 'Maine Coon', 'Maltese', 'Miniature Pinscher', 'Mixed Breed', 'Norwegian Forest Cat', 'Ocicat', 'Oriental Long Hair', 'Oriental Short Hair', 'Oriental Tabby', 'Pekingese', 'Persian', 'Pit Bull Terrier', 'Pomeranian', 'Poodle', 'Pug', 'Ragdoll', 'Rat Terrier', 'Rottweiler', 'Russian Blue', 'Samoyed', 'Schnauzer', 'Shar Pei', 'Shepherd', 'Shetland Sheepdog Sheltie', 'Shih Tzu', 'Siamese', 'Siberian', 'Siberian Husky', 'Silky Terrier', 'Singapura', 'Spitz', 'Tabby', 'Terrier', 'Tiger', 'Tortoiseshell', 'Toy Fox Terrier', 'Turkish Angora', 'Tuxedo', 'Wheaten Terrier', 'Whippet', 'Yellow Labrador Retriever' 'Gender' 'Female', 'Male' 'Color1' 'Black', 'Brown', 'Cream', 'Golden', 'Gray', 'White', 'Yellow' 'Color2' 'Brown', 'Cream', 'Golden', 'Gray', 'No Color', 'White', 'Yellow' 'MaturitySize' 'Large', 'Medium', 'Small' 'FurLength' 'Long', 'Medium', 'Short' 'Vaccinated' 'No', 'Not Sure', 'Yes' 'Sterilized' 'No', 'Not Sure', 'Yes' 'Health' 'Healthy', 'Minor Injury', 'Serious Injury' 'Color1_domain' 'Black', 'Brown', 'Cream'
识别数据中问题、异常(anomalies)
train_stats = tfdv. generate_statistics_from_csv( data_location= 'data/train/train_db_0.csv' )
val_stats = tfdv. generate_statistics_from_csv( data_location= 'data/eval/valid_db_0.csv' )
tfdv. visualize_statistics( lhs_statistics= val_stats, rhs_statistics= train_stats,
lhs_name= 'VAL_DATASET' , rhs_name= 'TRAIN_DATASET' )
anomalies = tfdv. validate_statistics( statistics= val_stats, schema= schema)
tfdv. display_anomalies( anomalies= anomalies)
Anomaly short description Anomaly long description Feature name 'Age' Out-of-range values Unexpectedly large value: 255. 'Breed1' Unexpected string values Examples contain values missing from the schema: Black Mouth Cur (<1%), Burmilla (<1%), Greyhound (<1%), Lowchen (<1%), Munsterlander (<1%), Papillon (<1%), Standard Poodle (<1%), White German Shepherd (<1%), Yorkshire Terrier Yorkie (<1%). 'Color1' Unexpected string values Examples contain values missing from the schema: Golden (~7%), Gray (~4%), White (~5%), Yellow (~4%).
tfdv. get_domain( schema, 'Breed1' ) . value. append( 'Burmilla' )
tfdv. get_domain( schema, 'Breed1' ) . value. append( 'Black Mouth Cur' )
tfdv. get_domain( schema, 'Breed1' ) . value. append( 'Greyhound' )
tfdv. get_domain( schema, 'Breed1' ) . value. append( 'Lowchen' )
tfdv. get_domain( schema, 'Breed1' ) . value. append( 'Munsterlander' )
tfdv. get_domain( schema, 'Breed1' ) . value. append( 'Papillon' )
anomalies = tfdv. validate_statistics( statistics= val_stats, schema= schema)
tfdv. display_anomalies( anomalies= anomalies)
Anomaly short description Anomaly long description Feature name 'Breed1' Unexpected string values Examples contain values missing from the schema: Standard Poodle (<1%), White German Shepherd (<1%), Yorkshire Terrier Yorkie (<1%). 'Age' Out-of-range values Unexpectedly large value: 255. 'Color1' Unexpected string values Examples contain values missing from the schema: Golden (~7%), Gray (~4%), White (~5%), Yellow (~4%).
schema. default_environment. append( 'TRAINING' )
schema. default_environment. append( 'SERVING' )
tfdv. get_feature( schema, 'Type' ) . not_in_environment. append( 'SERVING' )
tfdv. get_feature( schema, 'Age' ) . skew_comparator. infinity_norm. threshold= 0.001
skew_anomalies= tfdv. validate_statistics( statistics= train_stats, serving_statistics= val_stats, schema= schema)
tfdv. display_anomalies( skew_anomalies)
Anomaly short description Anomaly long description Feature name 'Color1' Unexpected string values Examples contain values missing from the schema: Golden (~6%), Gray (~4%), White (~5%), Yellow (~4%). 'Description' Column dropped The feature was present in fewer examples than expected: minimum fraction = 1.000000, actual = 0.998375 'Age' Out-of-range values Unexpectedly large value: 180.
tfdv. get_feature( schema, 'Age' ) . drift_comparator. infinity_norm. threshold = 0.001
drift_anomalies= tfdv. validate_statistics( train_stats, schema, previous_statistics= val_stats)
tfdv. display_anomalies( drift_anomalies)
Anomaly short description Anomaly long description Feature name 'Age' Out-of-range values Unexpectedly large value: 180. 'Color1' Unexpected string values Examples contain values missing from the schema: Golden (~6%), Gray (~4%), White (~5%), Yellow (~4%). 'Description' Column dropped The feature was present in fewer examples than expected: minimum fraction = 1.000000, actual = 0.998375
Schema保存、加载
tfdv. write_schema_text( schema, './schema.pbtxt' )
reload_schema= tfdv. load_schema_text( './schema.pbtxt' )
tfdv. display_schema( reload_schema)
Type Presence Valency Domain Feature name 'Type' STRING required 'Type' 'Age' INT required min: 0; max: 120 'Breed1' STRING required 'Breed1' 'Gender' STRING required 'Gender' 'Color1' STRING required 'Color1_domain' 'Color2' STRING required 'Color2' 'MaturitySize' STRING required 'MaturitySize' 'FurLength' STRING required 'FurLength' 'Vaccinated' STRING required 'Vaccinated' 'Sterilized' STRING required 'Sterilized' 'Health' STRING required 'Health' 'Fee' INT required - 'Description' BYTES required single - 'PhotoAmt' INT required - 'AdoptionSpeed' INT required -
Values Domain 'Type' 'Cat', 'Dog' 'Breed1' 'Abyssinian', 'American Curl', 'American Shorthair', 'American Wirehair', 'Australian Kelpie', 'Basenji', 'Basset Hound', 'Beagle', 'Bedlington Terrier', 'Belgian Shepherd Laekenois', 'Belgian Shepherd Malinois', 'Bengal', 'Black Labrador Retriever', 'Bobtail', 'Bombay', 'Border Collie', 'Boston Terrier', 'Boxer', 'British Shorthair', 'Bull Terrier', 'Bullmastiff', 'Burmese', 'Calico', 'Cattle Dog', 'Cavalier King Charles Spaniel', 'Chartreux', 'Chihuahua', 'Chow Chow', 'Cocker Spaniel', 'Collie', 'Coonhound', 'Corgi', 'Cymric', 'Dachshund', 'Dalmatian', 'Doberman Pinscher', 'Domestic Long Hair', 'Domestic Medium Hair', 'Domestic Short Hair', 'Egyptian Mau', 'English Bulldog', 'English Cocker Spaniel', 'Fox Terrier', 'French Bulldog', 'German Pinscher', 'German Shepherd Dog', 'German Spitz', 'Golden Retriever', 'Hound', 'Husky', 'Irish Setter', 'Jack Russell Terrier', 'Jack Russell Terrier (Parson Russell Terrier)', 'Japanese Bobtail', 'Javanese', 'Korat', 'Labrador Retriever', 'Lhasa Apso', 'Maine Coon', 'Maltese', 'Miniature Pinscher', 'Mixed Breed', 'Norwegian Forest Cat', 'Ocicat', 'Oriental Long Hair', 'Oriental Short Hair', 'Oriental Tabby', 'Pekingese', 'Persian', 'Pit Bull Terrier', 'Pomeranian', 'Poodle', 'Pug', 'Ragdoll', 'Rat Terrier', 'Rottweiler', 'Russian Blue', 'Samoyed', 'Schnauzer', 'Shar Pei', 'Shepherd', 'Shetland Sheepdog Sheltie', 'Shih Tzu', 'Siamese', 'Siberian', 'Siberian Husky', 'Silky Terrier', 'Singapura', 'Spitz', 'Tabby', 'Terrier', 'Tiger', 'Tortoiseshell', 'Toy Fox Terrier', 'Turkish Angora', 'Tuxedo', 'Wheaten Terrier', 'Whippet', 'Yellow Labrador Retriever', 'Burmilla', 'Black Mouth Cur', 'Greyhound', 'Lowchen', 'Munsterlander', 'Papillon' 'Gender' 'Female', 'Male' 'Color1' 'Black', 'Brown', 'Cream', 'Golden', 'Gray', 'White', 'Yellow' 'Color2' 'Brown', 'Cream', 'Golden', 'Gray', 'No Color', 'White', 'Yellow' 'MaturitySize' 'Large', 'Medium', 'Small' 'FurLength' 'Long', 'Medium', 'Short' 'Vaccinated' 'No', 'Not Sure', 'Yes' 'Sterilized' 'No', 'Not Sure', 'Yes' 'Health' 'Healthy', 'Minor Injury', 'Serious Injury' 'Color1_domain' 'Black', 'Brown', 'Cream'
集成TFDV中的组件
from tfx. components import StatisticsGen
from tfx. components import SchemaGen
from tfx. components import ExampleValidator
statistics_gen = StatisticsGen( examples= example_gen. outputs[ 'examples' ] )
context. run( statistics_gen)
ExecutionResult
at 0x7f14d611afd0
.execution_id 2 .component
Artifact of type
'ExampleStatistics' (uri: /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1/StatisticsGen/statistics/2)
at 0x7f14d676bb20
.type <class 'tfx.types.standard_artifacts.ExampleStatistics'> .uri /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1/StatisticsGen/statistics/2 .span 0 .split_names ["train", "eval"]
schema_gen = SchemaGen( statistics= statistics_gen. outputs[ 'statistics' ] ,
infer_feature_shape= True )
context. run( schema_gen)
ExecutionResult
at 0x7f14d60fe4f0
.execution_id 3 .component
Artifact of type
'Schema' (uri: /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1/SchemaGen/schema/3)
at 0x7f14d60fe070
.type <class 'tfx.types.standard_artifacts.Schema'> .uri /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1/SchemaGen/schema/3
context. show( schema_gen. outputs[ 'schema' ] )
Artifact at /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1/SchemaGen/schema/3
Type Presence Valency Domain Feature name 'AdoptionSpeed' INT required - 'Age' INT required - 'Breed1' BYTES required - 'Color1' STRING required 'Color1' 'Color2' STRING required 'Color2' 'Description' BYTES required - 'Fee' INT required - 'FurLength' STRING required 'FurLength' 'Gender' STRING required 'Gender' 'Health' STRING required 'Health' 'MaturitySize' STRING required 'MaturitySize' 'PhotoAmt' INT required - 'Sterilized' STRING required 'Sterilized' 'Type' STRING required 'Type' 'Vaccinated' STRING required 'Vaccinated'
Values Domain 'Color1' 'Black', 'Brown', 'Cream', 'Golden', 'Gray', 'White', 'Yellow' 'Color2' 'Brown', 'Cream', 'Golden', 'Gray', 'No Color', 'White', 'Yellow' 'FurLength' 'Long', 'Medium', 'Short' 'Gender' 'Female', 'Male' 'Health' 'Healthy', 'Minor Injury', 'Serious Injury' 'MaturitySize' 'Large', 'Medium', 'Small' 'Sterilized' 'No', 'Not Sure', 'Yes' 'Type' 'Cat', 'Dog' 'Vaccinated' 'No', 'Not Sure', 'Yes'
example_validator = ExampleValidator( statistics= statistics_gen. outputs[ 'statistics' ] ,
schema= schema_gen. outputs[ 'schema' ] )
context. run( example_validator)
ExecutionResult
at 0x7f14d6899cd0
.execution_id 4 .component
Artifact of type
'ExampleAnomalies' (uri: /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1/ExampleValidator/anomalies/4)
at 0x7f14d67f5ac0
.type <class 'tfx.types.standard_artifacts.ExampleAnomalies'> .uri /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1/ExampleValidator/anomalies/4 .span 0 .split_names ["train", "eval"]
context. show( example_validator. outputs[ 'anomalies' ] )
Artifact at /tmp/tfx-interactive-2023-06-27T09_44_36.151286-r64f8mk1/ExampleValidator/anomalies/4
'train' split:
No anomalies found.
'eval' split:
No anomalies found.