前言
tf. convert_to_tensor( my_np_array, dtype= tf. float32)
torch. FloatTensor( py_list)
import pandas as pd
def file_process ( file_path) :
with open ( file_path, 'r' , encoding= 'utf_8' ) as f:
labels= [ ]
texts= [ ]
for line in f:
line= line. split( ' ' )
labels. append( line[ 0 ] )
texts. append( line[ 1 ] )
return labels, texts
def describe_file ( file_path) :
labels, texts= file_process( file_path)
file_df= pd. DataFrame( { 'label' : labels, 'text' : texts} )
file_length= file_df[ 'text' ] . apply ( lambda x: len ( x) )
return file_df
test_df= describe_file( path)
print ( test_df)
!pip install transformers
from transformers import BertTokenizer, TFBertModel
tokenizer = BertTokenizer. from_pretrained( 'bert-base-uncased' )
model = TFBertModel. from_pretrained( "bert-base-uncased" )
test_text= [ text for text in test_df[ 'text' ] ]
encoded_input = tokenizer( test_text, padding= True , return_tensors= 'tf' )
output = model( encoded_input)
import tensorflow as tf
y_test= [ int ( label) for label in test_df[ 'label' ] ]
y_train= output[ 'pooler_output' ]
y_test= tf. convert_to_tensor( y_test, dtype= tf. float32)
print ( output[ 'pooler_output' ] . shape, len ( y_test) )
print ( type ( y_test) , type ( y_train) )
from keras. models import Sequential, Model
from keras. layers import LSTM, Dense, Embedding, Dropout, Input
from tensorflow. keras. optimizers import Adam
def build_classifier_model ( ) :
x_input= Input( shape= ( 768 , ) )
x_out= Dense( 4 , activation= 'relu' ) ( x_input)
x_out= Dense( 1 , activation= 'softmax' ) ( x_out)
return Model( x_input, x_out)
classifier_model= build_classifier_model( )
print ( classifier_model. summary( ) )
classifier_model. compile ( loss= 'categorical_crossentropy' ,
optimizer= Adam( ) ,
metrics= [ 'accuracy' ] )
classifier_model. fit( y_train, y_test, epochs= 2 )