1.保存模型
from transformers import TFBertForSequenceClassification
model = TFBertForSequenceClassification.from_pretrained("nateraw/bert-base-uncased-imdb", from_pt=True)
# the saved_model parameter is a flag to create a SavedModel version of the model in same time than the h5 weights
model.save_pretrained("my_model", saved_model=True)
2.使用docker拉取tf-serving并制作serving
3.使用requests调用Sevring
from transformers import BertTokenizerFast, BertConfig
import requests
import json
import numpy as np
sentence = "I love the new TensorFlow update in transformers."
# Load the corresponding tokenizer of our SavedModel
tokenizer = BertTokenizerFast.from_pretrained("nateraw/bert-base-uncased-imdb")
# Load the model config of our SavedModel
config = BertConfig.from_pretrained("nateraw/bert-base-uncased-imdb")
# Tokenize the sentence
batch = tokenizer(sentence)
# Convert the batch into a proper dict
batch = dict(batch)
# Put the example into a list of size 1, that corresponds to the batch size
batch = [batch]
# The REST API needs a JSON that contains the key instances to declare the examples to process
input_data = {"instances": batch}
# Query the REST API, the path corresponds to http://host:port/model_version/models_root_folder/model_name:method
r = requests.post("http://localhost:8501/v1/models/bert:predict", data=json.dumps(input_data))
# Parse the JSON result. The results are contained in a list with a root key called "predictions"
# and as there is only one example, takes the first element of the list
result = json.loads(r.text)["predictions"][0]
# The returned results are probabilities, that can be positive or negative hence we take their absolute value
abs_scores = np.abs(result)
# Take the argmax that correspond to the index of the max probability.
label_id = np.argmax(abs_scores)
# Print the proper LABEL with its index
print(config.id2label[label_id])