import tensorflow as tf
import tensorflow_transform as tft
import tensorflow_transform.beam as tft_beam
import tensorflow_text as tf_text
from tfx import v1 as tfx
from tensorflow_transform.tf_metadata import schema_utils
import tempfile
from custom.bertpreprocess import BertTokenizerModule
2023-07-13 20:01:41.287848: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-13 20:01:44.243019: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
WARNING:absl:Failed to import tensorflow serving protos. It can fail if the TF version doesn't match with the TF Serving version. We will try importing again with a workaround:module 'tensorflow.core.protobuf.error_codes_pb2' has no attribute '_CODE'
raw_data =[{'context':b'Una vez hubo aqu\xc3\xad una iglesia.','target':b'There was a church here once.'},{'context':b'\xc2\xbfCu\xc3\xa1l es tu nombre completo?','target':b"What's your full name?"},{'context':b'No tendr\xc3\xa1s ning\xc3\xban problema m\xc3\xa1s.','target':b"You'll have no more problems."},{'context':b'Tom le mostr\xc3\xb3 a Mary la foto de John.','target':b"Tom showed Mary John's picture."},{'context':b'Pareces un polic\xc3\xada.','target':b'You look like a policeman.'},]
raw_data_metadata = tft.DatasetMetadata(
schema_utils.schema_from_feature_spec({'context': tf.io.FixedLenFeature(shape=[],dtype=tf.string),'target': tf.io.FixedLenFeature(shape=[],dtype=tf.string),}))
import pandas as pd
df = pd.DataFrame(raw_data)
df
context
target
0
b'Una vez hubo aqu\xc3\xad una iglesia.'
b'There was a church here once.'
1
b'\xc2\xbfCu\xc3\xa1l es tu nombre completo?'
b"What's your full name?"
2
b'No tendr\xc3\xa1s ning\xc3\xban problema m\x...
b"You'll have no more problems."
3
b'Tom le mostr\xc3\xb3 a Mary la foto de John.'
b"Tom showed Mary John's picture."
4
b'Pareces un polic\xc3\xada.'
b'You look like a policeman.'
dict(df)
{'context': 0 b'Una vez hubo aqu\xc3\xad una iglesia.'
1 b'\xc2\xbfCu\xc3\xa1l es tu nombre completo?'
2 b'No tendr\xc3\xa1s ning\xc3\xban problema m\x...
3 b'Tom le mostr\xc3\xb3 a Mary la foto de John.'
4 b'Pareces un polic\xc3\xada.'
Name: context, dtype: object,
'target': 0 b'There was a church here once.'
1 b"What's your full name?"
2 b"You'll have no more problems."
3 b"Tom showed Mary John's picture."
4 b'You look like a policeman.'
Name: target, dtype: object}
WARNING:apache_beam.runners.interactive.interactive_environment:Dependencies required for Interactive Beam PCollection visualization are not available, please use: `pip install apache-beam[interactive]` to install necessary dependencies to enable all data visualization features.
WARNING:absl:You are passing instance dicts and DatasetMetadata to TFT which will not provide optimal performance. Consider following the TFT guide to upgrade to the TFXIO format (Apache Arrow RecordBatch).
2023-07-12 16:05:27.948661: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-07-12 16:05:28.017529: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
WARNING:absl:You are passing instance dicts and DatasetMetadata to TFT which will not provide optimal performance. Consider following the TFT guide to upgrade to the TFXIO format (Apache Arrow RecordBatch).
WARNING:absl:You are outputting instance dicts from `TransformDataset` which will not provide optimal performance. Consider setting `output_record_batches=True` to upgrade to the TFXIO format (Apache Arrow RecordBatch). Encoding functionality in this module works with both formats.
WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['/home/xzy/anaconda3/envs/tfx/lib/python3.9/site-packages/ipykernel_launcher.py', '-f', '/home/xzy/.local/share/jupyter/runtime/kernel-556c4c67-5714-4385-b1d9-794589411043.json']
INFO:tensorflow:Assets written to: /tmp/tmpd1xlh5dp/tftransform_tmp/81b2cef6d77041b5a3d7d63b0399f8aa/assets
INFO:tensorflow:Assets written to: /tmp/tmpd1xlh5dp/tftransform_tmp/81b2cef6d77041b5a3d7d63b0399f8aa/assets
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['/home/xzy/anaconda3/envs/tfx/lib/python3.9/site-packages/ipykernel_launcher.py', '-f', '/home/xzy/.local/share/jupyter/runtime/kernel-556c4c67-5714-4385-b1d9-794589411043.json']
tf_output = tft.TFTransformOutput(output_dir)
infer=tf_output.transform_features_layer()
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:struct2tensor is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
INFO:tensorflow:tensorflow_decision_forests is not available.
infer({'context':tf.constant(['',b'Una vez hubo aqu\xc3\xad una iglesia.']),'target':tf.constant(['',b'There was a church here once.'])})
import logging
import time
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
import tensorflow as tf
import tensorflow_text
from custom.bertpreprocess import BertPreprocess
2023-07-14 21:08:26.017291: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-14 21:08:26.679469: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
defload_data(path: pathlib.Path):
text = path.read_text(encoding='utf-8')
lines = text.splitlines()
pairs =[line.split('\t')for line in lines]
en = np.array([en for en, spa in pairs])
spa = np.array([spa for en, spa in pairs])return en, spa
2023-07-14 21:08:32.775343: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-07-14 21:08:32.799577: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
WARNING:tensorflow:AutoGraph could not transform <function <lambda> at 0x7ff4a9802ee0> and will run it as-is.
Cause: could not parse the source code of <function <lambda> at 0x7ff4a9802ee0>: no matching AST found among candidates:
To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert
WARNING: AutoGraph could not transform <function <lambda> at 0x7ff4a9802ee0> and will run it as-is.
Cause: could not parse the source code of <function <lambda> at 0x7ff4a9802ee0>: no matching AST found among candidates:
To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert
2023-07-14 21:08:35.375294: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [95271]
[[{{node Placeholder/_1}}]]
2023-07-14 21:08:35.375514: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [95271]
[[{{node Placeholder/_1}}]]
WARNING:tensorflow:AutoGraph could not transform <function <lambda> at 0x7ff576e47310> and will run it as-is.
Cause: could not parse the source code of <function <lambda> at 0x7ff576e47310>: no matching AST found among candidates:
To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert
WARNING: AutoGraph could not transform <function <lambda> at 0x7ff576e47310> and will run it as-is.
Cause: could not parse the source code of <function <lambda> at 0x7ff576e47310>: no matching AST found among candidates:
To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert
2023-07-14 21:09:10.383556: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [95271]
[[{{node Placeholder/_1}}]]
2023-07-14 21:09:10.383806: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [95271]
[[{{node Placeholder/_1}}]]
CPU times: user 54.6 s, sys: 684 ms, total: 55.3 s
Wall time: 53.1 s
2023-07-13 20:02:00.316739: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-07-13 20:02:00.506817: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
defwrite_to_tfrecord(spa_line,en_line,f):print(spa_line)
spa= spa_line.numpy()print(spa)
en = en_line.numpy()
example = tf.train.Example(features=tf.train.Features(feature={'context':tf.train.Feature(bytes_list=tf.train.BytesList(value=[spa])),'target':tf.train.Feature(bytes_list=tf.train.BytesList(value=[en])),}))
f.write(example.SerializeToString())
###写入数据集,请注释# with tf.io.TFRecordWriter('./data/data.tfrecord') as f:# for spa_line,en_line in raw_dataset:# spa= spa_line.numpy()# en = en_line.numpy()# example = tf.train.Example(features=tf.train.Features(feature={# 'context':tf.train.Feature(bytes_list=tf.train.BytesList(value=[spa])),# 'target':tf.train.Feature(bytes_list=tf.train.BytesList(value=[en])),# }))# f.write(example.SerializeToString())
2023-07-13 20:02:00.714819: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [118964]
[[{{node Placeholder/_1}}]]
for ex in rel.take(5):
a=tf.io.parse_single_example(ex,{'context':tf.io.FixedLenFeature(shape=[],dtype=tf.string),'target':tf.io.FixedLenFeature(shape=[],dtype=tf.string),})print(a)
{'context': <tf.Tensor: shape=(), dtype=string, numpy=b'Ve.'>, 'target': <tf.Tensor: shape=(), dtype=string, numpy=b'Go.'>}
{'context': <tf.Tensor: shape=(), dtype=string, numpy=b'Vete.'>, 'target': <tf.Tensor: shape=(), dtype=string, numpy=b'Go.'>}
{'context': <tf.Tensor: shape=(), dtype=string, numpy=b'Vaya.'>, 'target': <tf.Tensor: shape=(), dtype=string, numpy=b'Go.'>}
{'context': <tf.Tensor: shape=(), dtype=string, numpy=b'V\xc3\xa1yase.'>, 'target': <tf.Tensor: shape=(), dtype=string, numpy=b'Go.'>}
{'context': <tf.Tensor: shape=(), dtype=string, numpy=b'Hola.'>, 'target': <tf.Tensor: shape=(), dtype=string, numpy=b'Hi.'>}
2023-07-13 20:02:34.959953: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [1]
[[{{node Placeholder/_0}}]]
模型测试(两种导入)
import tensorflow as tf
from typing import List
import tensorflow as tf
import tensorflow_transform as tft
from tensorflow import keras
from tfx import v1 as tfx
from tfx_bsl.public import tfxio
from tensorflow_metadata.proto.v0 import schema_pb2
from models import constants
from custom.TransformerModel import Transformer,TranslatorForTFX,CustomSchedule,masked_accuracy,masked_loss
WARNING:absl:Failed to import tensorflow serving protos. It can fail if the TF version doesn't match with the TF Serving version. We will try importing again with a workaround:module 'tensorflow.core.protobuf.error_codes_pb2' has no attribute '_CODE'
第一种导入
model = tf.saved_model.load('./tfx_pipeline_output/nmt3/Trainer/model/6/Format-Serving/')
2023-07-14 20:42:25.503655: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-07-14 20:42:25.596098: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
#模型所有签名
model.signatures
_SignatureMap({'serving_default': <ConcreteFunction signature_wrapper(*, examples) at 0x7EFD522E07F0>, 'transform_features': <ConcreteFunction signature_wrapper(*, examples) at 0x7EFD522C7D90>, 'translator': <ConcreteFunction signature_wrapper(*, examples) at 0x7EFD5236F6A0>, 'train_step': <ConcreteFunction signature_wrapper(*, context_tensor, target_tensor) at 0x7EFD521E2670>})
#测试数据
spa =b'Una vez hubo aqu\xc3\xad una iglesia.'
en =b'There was a church here once.'
example = tf.train.Example(features=tf.train.Features(feature={'context':tf.train.Feature(bytes_list=tf.train.BytesList(value=[spa])),'target':tf.train.Feature(bytes_list=tf.train.BytesList(value=[en])),})).SerializeToString()
inputs = tf.constant([example])
inputs
<tf.Tensor: shape=(1,), dtype=string, numpy=
array([b'\n]\n.\n\x07context\x12#\n!\n\x1fUna vez hubo aqu\xc3\xad una iglesia.\n+\n\x06target\x12!\n\x1f\n\x1dThere was a church here once.'],
dtype=object)>
spa =b'Si quieres sonar como un hablante nativo, debes estar dispuesto a practicar diciendo la misma frase una y otra vez de la misma manera en que un m\xc3\xbasico de banjo practica el mismo fraseo una y otra vez hasta que lo puedan tocar correctamente y en el tiempo esperado.'
example = tf.train.Example(features=tf.train.Features(feature={'context':tf.train.Feature(bytes_list=tf.train.BytesList(value=[spa])),})).SerializeToString()
inputs = tf.constant([example])
model.signatures['translator'](inputs)
{'outputs': <tf.Tensor: shape=(1,), dtype=string, numpy=
array([b'[UNK] you you may to you you you you may follow walked ?ter a itate you touch signrop be start meeting you you fool you even shouldes you you you you you mustap its to you you you you other ,ur the want to you you you you you you you you you you you you still you plenty private you you you make . you let willing pay tight you you not you you you you is re . tend successful you you you just you you you just you you you you you you you belt would way wfe youent want button isn once you you call be . youop you don you don he rich answer'],
dtype=object)>}
<tf.Tensor: shape=(118964,), dtype=string, numpy=
array([b'Ve.', b'Vete.', b'Vaya.', ...,
b'Una huella de carbono es la cantidad de contaminaci\xc3\xb3n de di\xc3\xb3xido de carbono que producimos como producto de nuestras actividades. Algunas personas intentan reducir su huella de carbono porque est\xc3\xa1n preocupados acerca del cambio clim\xc3\xa1tico.',
b'Como suele haber varias p\xc3\xa1ginas web sobre cualquier tema, normalmente s\xc3\xb3lo le doy al bot\xc3\xb3n de retroceso cuando entro en una p\xc3\xa1gina web que tiene anuncios en ventanas emergentes. Simplemente voy a la siguiente p\xc3\xa1gina encontrada por Google y espero encontrar algo menos irritante.',
b'Si quieres sonar como un hablante nativo, debes estar dispuesto a practicar diciendo la misma frase una y otra vez de la misma manera en que un m\xc3\xbasico de banjo practica el mismo fraseo una y otra vez hasta que lo puedan tocar correctamente y en el tiempo esperado.'],
dtype=object)>
<tf.Tensor: shape=(118964,), dtype=string, numpy=
array([b'Go.', b'Go.', b'Go.', ...,
b'A carbon footprint is the amount of carbon dioxide pollution that we produce as a result of our activities. Some people try to reduce their carbon footprint because they are concerned about climate change.',
b'Since there are usually multiple websites on any given topic, I usually just click the back button when I arrive on any webpage that has pop-up advertising. I just go to the next page found by Google and hope for something less irritating.',
b'If you want to sound like a native speaker, you must be willing to practice saying the same sentence over and over in the same way that banjo players practice the same phrase over and over until they can play it correctly and at the desired tempo.'],
dtype=object)>
WARNING:tensorflow:Inconsistent references when loading the checkpoint into this object graph. For example, in the saved checkpoint object, `model.layer.weight` and `model.layer_copy.weight` reference the same variable, while in the current object these are two different variables. The referenced variables are:(<keras.saving.legacy.saved_model.load.TensorFlowTransform>TransformFeaturesLayer object at 0x7fd3bfe86a30> and <keras.engine.input_layer.InputLayer object at 0x7fd53d278280>).
WARNING:tensorflow:Inconsistent references when loading the checkpoint into this object graph. For example, in the saved checkpoint object, `model.layer.weight` and `model.layer_copy.weight` reference the same variable, while in the current object these are two different variables. The referenced variables are:(<keras.saving.legacy.saved_model.load.TensorFlowTransform>TransformFeaturesLayer object at 0x7fd3bfe86a30> and <keras.engine.input_layer.InputLayer object at 0x7fd53d278280>).
2023-07-14 20:44:10.156792: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs_1' with dtype float and shape [?,?]
[[{{node inputs_1}}]]
2023-07-14 20:44:10.156895: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor '115617' with dtype float and shape [2048,128]
[[{{node 115617}}]]
2023-07-14 20:44:10.237320: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder_1' with dtype float and shape [?,?]
[[{{node Placeholder_1}}]]
2023-07-14 20:44:10.237433: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'transformer/115968' with dtype float and shape [2048,128]
[[{{node transformer/115968}}]]
for index,(context_b, target_b)in raw_tensor_dataset.take(10).enumerate():
metrics_dict=model_load.signatures['train_step'](context_tensor=context_b,target_tensor=target_b)print('\rStep: ',index,', metrics: ',metrics_dict,end='',flush=True)
2023-07-14 20:45:27.488129: I tensorflow/core/grappler/optimizers/data/replicate_on_split.cc:32] Running replicate on split optimization
2023-07-14 20:45:27.502371: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_5' with dtype string and shape [118964]
[[{{node Placeholder/_5}}]]
Step: tf.Tensor(9, shape=(), dtype=int64) , metrics: {'loss': <tf.Tensor: shape=(), dtype=float32, numpy=1.6589187>, 'masked_accuracy': <tf.Tensor: shape=(), dtype=float32, numpy=0.6777657>}}