一,数据预处理
0,下载数据
西班牙语和英语互翻数据
1,导包,设置GPU内存自增长
import tensorflow as tf
gpus = tf. config. experimental. list_physical_devices( 'GPU' )
print ( gpus)
for gpu in gpus:
tf. config. experimental. set_memory_growth( gpu, True )
import matplotlib as mpl
import matplotlib. pyplot as plt
import numpy as np
import os
import pandas as pd
import sklearn
import sys
import tensorflow as tf
import time
from tensorflow import keras
import unicodedata
import re
from sklearn. model_selection import train_test_split
print ( tf. __version__)
print ( sys. version_info)
for module in mpl, np, pd, sklearn, tf, keras:
print ( module. __name__, module. __version__)
2,处理西班牙语重音
def unicode_to_ascii ( s) :
"""将西班牙语从unicode转换为ascii码"""
return '' . join( c for c in unicodedata. normalize( 'NFD' , s) if unicodedata. category( c) != 'Mn' )
en_sentence = u"May I borrow this book?"
sp_sentence = u"¿Puedo tomar prestado este libro?"
print ( unicode_to_ascii( en_sentence) )
print ( unicode_to_ascii( sp_sentence) )
3,处理英,西语单个句子
def preprocess_sentence ( w) :
"""处理英,西 单个句子
1,符号左右加空格;
2,空格去重;
3,将非字母和符号的字符替换为空格;
4,句子前后加<start>和<end>;
"""
w = unicode_to_ascii( w. lower( ) . strip( ) )
w = re. sub( r"([?.!,¿])" , r" \1 " , w)
w = re. sub( r'[" "]+' , " " , w)
w = re. sub( r"[^a-zA-Z?.!,¿]+" , " " , w)
w = w. strip( )
w = '<start> ' + w + ' <end>'
return w
print ( preprocess_sentence( en_sentence) )
print ( preprocess_sentence( sp_sentence) . encode( 'utf-8' ) )
4,读取全部数据,并以处理单个句子为单位处理,分割英西文
"""
# 1. Remove the accents
# 2. Clean the sentences
# 3. Return word pairs in the format: [ENGLISH, SPANISH]
"""
data_path = './data_spa_en/spa.txt'
def create_dataset ( path, num_examples) :
lines = open ( path, encoding= 'UTF-8' ) . read( ) . strip( ) . split( '\n' )
word_pairs = [ [ preprocess_sentence( w) for w in l. split( '\t' ) ] for l in lines[ : num_examples] ]
return zip ( * word_pairs)
en, sp = create_dataset( data_path, None )
print ( en[ - 1 ] )
print ( sp[ - 1 ] )
5,加载数据;使用tokenizer做词嵌入;分割数据为train,vaild,test数据集
def max_length ( tensor) :
"""找到数据集中padding的最大值"""
return max ( len ( t) for t in tensor)
def tokenize ( lang) :
"""将数据集做padding"""
lang_tokenizer = tf. keras. preprocessing. text. Tokenizer( filters= '' )
lang_tokenizer. fit_on_texts( lang)
tensor = lang_tokenizer. texts_to_sequences( lang)
tensor = tf. keras. preprocessing. sequence. pad_sequences( tensor, padding= 'post' )
return tensor, lang_tokenizer
def load_dataset ( path, num_examples= None ) :
"""加载数据;tokenizer词嵌入;对数据做padding"""
targ_lang, inp_lang = create_dataset( path, num_examples)
input_tensor, inp_lang_tokenizer = tokenize( inp_lang)
target_tensor, targ_lang_tokenizer = tokenize( targ_lang)
return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer
num_examples = 30000
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset( data_path, num_examples)
max_length_targ, max_length_inp = max_length( target_tensor) , max_length( input_tensor)
6,拆分训练集和验证集
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split( input_tensor, target_tensor, test_size= 0.2 )
print ( len ( input_tensor_train) , len ( target_tensor_train) , len ( input_tensor_val) , len ( target_tensor_val) )
7,测试数据转化结果
def convert ( lang, tensor) :
for t in tensor:
if t != 0 :
print ( "%d ----> %s" % ( t, lang. index_word[ t] ) )
print ( "Input Language; index to word mapping" )
convert( inp_lang, input_tensor_train[ 0 ] )
print ( )
print ( "Target Language; index to word mapping" )
convert( targ_lang, target_tensor_train[ 0 ] )
8,训练集和验证集 转化为tensor数据类型
BUFFER_SIZE = len ( input_tensor_train)
BATCH_SIZE = 64
steps_per_epoch = len ( input_tensor_train) // BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_inp_size = len ( inp_lang. word_index) + 1
vocab_tar_size = len ( targ_lang. word_index) + 1
dataset = tf. data. Dataset. from_tensor_slices( ( input_tensor_train, target_tensor_train) ) . shuffle( BUFFER_SIZE)
dataset = dataset. batch( BATCH_SIZE, drop_remainder= True )
9,测试打印一个batch
example_input_batch, example_target_batch = next ( iter ( dataset) )
print ( example_input_batch. shape, example_target_batch. shape)
二,构造Encoder
class Encoder ( tf. keras. Model) :
def __init__ ( self, vocab_size, embedding_dim, encoding_units, batch_size) :
super ( Encoder, self) . __init__( )
self. batch_size = batch_size
self. encoding_units = encoding_units
self. embedding = keras. layers. Embedding( vocab_size, embedding_dim)
self. gru = keras. layers. GRU( self. encoding_units,
return_sequences= True ,
return_state= True ,
recurrent_initializer= 'glorot_uniform' )
def call ( self, x, hidden) :
x = self. embedding( x)
output, state = self. gru( x, initial_state = hidden)
return output, state
def initialize_hidden_state ( self) :
return tf. zeros( ( self. batch_size, self. encoding_units) )
encoder = Encoder( vocab_inp_size, embedding_dim, units, BATCH_SIZE)
sample_hidden = encoder. initialize_hidden_state( )
sample_output, sample_hidden = encoder( example_input_batch, sample_hidden)
print ( 'Encoder output shape: (batch size, sequence length, units) {}' . format ( sample_output. shape) )
print ( 'Encoder Hidden state shape: (batch size, units) {}' . format ( sample_hidden. shape) )
三,构造Attention
class BahdanauAttention ( tf. keras. Model) :
def __init__ ( self, units) :
super ( BahdanauAttention, self) . __init__( )
self. W1 = tf. keras. layers. Dense( units)
self. W2 = tf. keras. layers. Dense( units)
self. V = tf. keras. layers. Dense( 1 )
def call ( self, query, values) :
hidden_with_time_axis = tf. expand_dims( query, 1 )
print ( values. shape)
print ( hidden_with_time_axis. shape)
print ( self. W1( values) . shape)
print ( self. W2( hidden_with_time_axis) . shape)
temp = self. W1( values) + self. W2( hidden_with_time_axis)
print ( temp. shape)
print ( tf. nn. tanh( temp) . shape)
score = self. V( tf. nn. tanh( self. W1( values) + self. W2( hidden_with_time_axis) ) )
print ( score. shape)
attention_weights = tf. nn. softmax( score, axis= 1 )
print ( attention_weights. shape)
print ( values. shape)
context_vector = attention_weights * values
print ( context_vector. shape)
context_vector = tf. reduce_sum( context_vector, axis= 1 )
print ( context_vector. shape)
return context_vector, attention_weights
attention_layer = BahdanauAttention( 10 )
attention_result, attention_weights = attention_layer( sample_hidden, sample_output)
print ( "Attention result shape: (batch size, units) {}" . format ( attention_result. shape) )
print ( "Attention weights shape: (batch_size, sequence_length, 1) {}" . format ( attention_weights. shape) )
四,构造Decoder
class Decoder ( tf. keras. Model) :
def __init__ ( self, vocab_size, embedding_dim, decoding_units, batch_size) :
super ( Decoder, self) . __init__( )
self. batch_size = batch_size
self. decoding_units = decoding_units
self. embedding = keras. layers. Embedding( vocab_size, embedding_dim)
self. gru = keras. layers. GRU( self. decoding_units,
return_sequences= True ,
return_state= True ,
recurrent_initializer= 'glorot_uniform' )
self. fc = keras. layers. Dense( vocab_size)
self. attention = BahdanauAttention( self. decoding_units)
def call ( self, x, hidden, encoding_output) :
context_vector, attention_weights = self. attention( hidden, encoding_output)
x = self. embedding( x)
x = tf. concat( [ tf. expand_dims( context_vector, 1 ) , x] , axis= - 1 )
output, state = self. gru( x)
output = tf. reshape( output, ( - 1 , output. shape[ 2 ] ) )
x = self. fc( output)
return x, state, attention_weights
decoder = Decoder( vocab_tar_size, embedding_dim, units, BATCH_SIZE)
sample_decoder_output, _, _ = decoder( tf. random. uniform( ( 64 , 1 ) ) ,
sample_hidden, sample_output)
print ( 'Decoder output shape: (batch_size, vocab size) {}' . format ( sample_decoder_output. shape) )
五, 定义损失函数
optimizer = keras. optimizers. Adam( )
loss_object = keras. losses. SparseCategoricalCrossentropy( from_logits= True , reduction= 'none' )
def loss_function ( real, pred) :
mask = tf. math. logical_not( tf. math. equal( real, 0 ) )
loss_ = loss_object( real, pred)
mask = tf. cast( mask, dtype= loss_. dtype)
loss_ *= mask
return tf. reduce_mean( loss_)
六,定义单步训练函数
@tf. function
def train_step ( inp, targ, encoding_hidden) :
loss = 0
with tf. GradientTape( ) as tape:
encoding_output, encoding_hidden = encoder( inp, encoding_hidden)
decoding_hidden = encoding_hidden
decoding_input = tf. expand_dims( [ targ_lang. word_index[ '<start>' ] ] * BATCH_SIZE, 1 )
for t in range ( 1 , targ. shape[ 1 ] ) :
predictions, decoding_hidden, _ = decoder( decoding_input, decoding_hidden, encoding_output)
loss += loss_function( targ[ : , t] , predictions)
decoding_input = tf. expand_dims( targ[ : , t] , 1 )
batch_loss = ( loss / int ( targ. shape[ 1 ] ) )
variables = encoder. trainable_variables + decoder. trainable_variables
gradients = tape. gradient( loss, variables)
optimizer. apply_gradients( zip ( gradients, variables) )
return batch_loss
七,训练模型
EPOCHS = 10
def train_model ( EPOCHS) :
for epoch in range ( EPOCHS) :
start = time. time( )
encoding_hidden = encoder. initialize_hidden_state( )
total_loss = 0
for ( batch, ( inp, targ) ) in enumerate ( dataset. take( steps_per_epoch) ) :
batch_loss = train_step( inp, targ, encoding_hidden)
total_loss += batch_loss
if batch % 100 == 0 :
print ( 'Epoch {} Batch {} Loss {:.4f}' . format ( epoch + 1 , batch, batch_loss. numpy( ) ) )
if ( epoch + 1 ) % 2 == 0 :
checkpoint. save( file_prefix = checkpoint_prefix)
print ( 'Epoch {} Loss {:.4f}' . format ( epoch + 1 , total_loss / steps_per_epoch) )
print ( 'Time taken for 1 epoch {} sec\n' . format ( time. time( ) - start) )
八,checkpoints 保存模型
checkpoint_dir = './checkpoints'
checkpoint_prefix = os. path. join( checkpoint_dir, "ckpt" )
checkpoint = tf. train. Checkpoint( optimizer= optimizer,
encoder= encoder,
decoder= decoder)
if not os. path. exists( checkpoint_dir) :
os. mkdir( checkpoint_dir)
train_model( EPOCHS)
else :
checkpoint. restore( tf. train. latest_checkpoint( checkpoint_dir) )
九,模型预测实现
def evaluate ( sentence) :
attention_plot = np. zeros( ( max_length_targ, max_length_inp) )
sentence = preprocess_sentence( sentence)
inputs = [ inp_lang. word_index[ i] for i in sentence. split( ' ' ) ]
inputs = keras. preprocessing. sequence. pad_sequences( [ inputs] , maxlen= max_length_inp, padding= 'post' )
inputs = tf. convert_to_tensor( inputs)
result = ''
hidden = [ tf. zeros( ( 1 , units) ) ]
encoding_out, encoding_hidden = encoder( inputs, hidden)
decoding_hidden = encoding_hidden
decoding_input = tf. expand_dims( [ targ_lang. word_index[ '<start>' ] ] , 0 )
for t in range ( max_length_targ) :
predictions, decoding_hidden, attention_weights = decoder(
decoding_input, decoding_hidden, encoding_out)
attention_weights = tf. reshape( attention_weights, ( - 1 , ) )
attention_plot[ t] = attention_weights. numpy( )
predicted_id = tf. argmax( predictions[ 0 ] ) . numpy( )
result += targ_lang. index_word[ predicted_id] + ' '
if targ_lang. index_word[ predicted_id] == '<end>' :
return result, sentence, attention_plot
decoding_input = tf. expand_dims( [ predicted_id] , 0 )
return result, sentence, attention_plot
十,打印注意力权重图
def plot_attention ( attention, sentence, predicted_sentence) :
fig = plt. figure( figsize= ( 10 , 10 ) )
ax = fig. add_subplot( 1 , 1 , 1 )
ax. matshow( attention, cmap= 'viridis' )
fontdict = { 'fontsize' : 14 }
ax. set_xticklabels( [ '' ] + sentence, fontdict= fontdict, rotation= 90 )
ax. set_yticklabels( [ '' ] + predicted_sentence, fontdict= fontdict)
plt. show( )
十一,调用模型翻译单句
def translate ( sentence) :
result, sentence, attention_plot = evaluate( sentence)
print ( 'Input: %s' % ( sentence) )
print ( 'Predicted translation: {}' . format ( result) )
attention_plot = attention_plot[ : len ( result. split( ' ' ) ) , : len ( sentence. split( ' ' ) ) ]
plot_attention( attention_plot, sentence. split( ' ' ) , result. split( ' ' ) )
十二,调用模型测试单句
translate( u'hace mucho frio aqui.' )
translate( u'esta es mi vida.' )
translate( u'trata de averiguarlo.' )
十三,总结代码
import tensorflow as tf
gpus = tf. config. experimental. list_physical_devices( 'GPU' )
print ( gpus)
for gpu in gpus:
tf. config. experimental. set_memory_growth( gpu, True )
import matplotlib as mpl
import matplotlib. pyplot as plt
import numpy as np
import os
import pandas as pd
import sklearn
import sys
import tensorflow as tf
import time
from tensorflow import keras
import unicodedata
import re
from sklearn. model_selection import train_test_split
print ( tf. __version__)
print ( sys. version_info)
for module in mpl, np, pd, sklearn, tf, keras:
print ( module. __name__, module. __version__)
def unicode_to_ascii ( s) :
"""将西班牙语从unicode转换为ascii码"""
return '' . join( c for c in unicodedata. normalize( 'NFD' , s) if unicodedata. category( c) != 'Mn' )
en_sentence = u"May I borrow this book?"
sp_sentence = u"¿Puedo tomar prestado este libro?"
print ( unicode_to_ascii( en_sentence) )
print ( unicode_to_ascii( sp_sentence) )
def preprocess_sentence ( w) :
"""处理英,西 单个句子
1,符号左右加空格;
2,空格去重;
3,将非字母和符号的字符替换为空格;
4,句子前后加<start>和<end>;
"""
w = unicode_to_ascii( w. lower( ) . strip( ) )
w = re. sub( r"([?.!,¿])" , r" \1 " , w)
w = re. sub( r'[" "]+' , " " , w)
w = re. sub( r"[^a-zA-Z?.!,¿]+" , " " , w)
w = w. strip( )
w = '<start> ' + w + ' <end>'
return w
print ( preprocess_sentence( en_sentence) )
print ( preprocess_sentence( sp_sentence) . encode( 'utf-8' ) )
"""
# 1. Remove the accents
# 2. Clean the sentences
# 3. Return word pairs in the format: [ENGLISH, SPANISH]
"""
data_path = './data_spa_en/spa.txt'
def create_dataset ( path, num_examples) :
lines = open ( path, encoding= 'UTF-8' ) . read( ) . strip( ) . split( '\n' )
word_pairs = [ [ preprocess_sentence( w) for w in l. split( '\t' ) ] for l in lines[ : num_examples] ]
return zip ( * word_pairs)
en, sp = create_dataset( data_path, None )
print ( en[ - 1 ] )
print ( sp[ - 1 ] )
def max_length ( tensor) :
"""找到数据集中padding的最大值"""
return max ( len ( t) for t in tensor)
def tokenize ( lang) :
"""将数据集做padding"""
lang_tokenizer = tf. keras. preprocessing. text. Tokenizer( filters= '' )
lang_tokenizer. fit_on_texts( lang)
tensor = lang_tokenizer. texts_to_sequences( lang)
tensor = tf. keras. preprocessing. sequence. pad_sequences( tensor, padding= 'post' )
return tensor, lang_tokenizer
def load_dataset ( path, num_examples= None ) :
"""加载数据;tokenizer词嵌入;对数据做padding"""
targ_lang, inp_lang = create_dataset( path, num_examples)
input_tensor, inp_lang_tokenizer = tokenize( inp_lang)
target_tensor, targ_lang_tokenizer = tokenize( targ_lang)
return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer
num_examples = 30000
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset( data_path, num_examples)
max_length_targ, max_length_inp = max_length( target_tensor) , max_length( input_tensor)
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split( input_tensor, target_tensor, test_size= 0.2 )
print ( len ( input_tensor_train) , len ( target_tensor_train) , len ( input_tensor_val) , len ( target_tensor_val) )
def convert ( lang, tensor) :
for t in tensor:
if t != 0 :
print ( "%d ----> %s" % ( t, lang. index_word[ t] ) )
print ( "Input Language; index to word mapping" )
convert( inp_lang, input_tensor_train[ 0 ] )
print ( )
print ( "Target Language; index to word mapping" )
convert( targ_lang, target_tensor_train[ 0 ] )
BUFFER_SIZE = len ( input_tensor_train)
BATCH_SIZE = 64
steps_per_epoch = len ( input_tensor_train) // BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_inp_size = len ( inp_lang. word_index) + 1
vocab_tar_size = len ( targ_lang. word_index) + 1
dataset = tf. data. Dataset. from_tensor_slices( ( input_tensor_train, target_tensor_train) ) . shuffle( BUFFER_SIZE)
dataset = dataset. batch( BATCH_SIZE, drop_remainder= True )
example_input_batch, example_target_batch = next ( iter ( dataset) )
print ( example_input_batch. shape, example_target_batch. shape)
class Encoder ( tf. keras. Model) :
def __init__ ( self, vocab_size, embedding_dim, encoding_units, batch_size) :
super ( Encoder, self) . __init__( )
self. batch_size = batch_size
self. encoding_units = encoding_units
self. embedding = keras. layers. Embedding( vocab_size, embedding_dim)
self. gru = keras. layers. GRU( self. encoding_units,
return_sequences= True ,
return_state= True ,
recurrent_initializer= 'glorot_uniform' )
def call ( self, x, hidden) :
x = self. embedding( x)
output, state = self. gru( x, initial_state = hidden)
return output, state
def initialize_hidden_state ( self) :
return tf. zeros( ( self. batch_size, self. encoding_units) )
encoder = Encoder( vocab_inp_size, embedding_dim, units, BATCH_SIZE)
sample_hidden = encoder. initialize_hidden_state( )
sample_output, sample_hidden = encoder( example_input_batch, sample_hidden)
print ( 'Encoder output shape: (batch size, sequence length, units) {}' . format ( sample_output. shape) )
print ( 'Encoder Hidden state shape: (batch size, units) {}' . format ( sample_hidden. shape) )
class BahdanauAttention ( tf. keras. Model) :
def __init__ ( self, units) :
super ( BahdanauAttention, self) . __init__( )
self. W1 = tf. keras. layers. Dense( units)
self. W2 = tf. keras. layers. Dense( units)
self. V = tf. keras. layers. Dense( 1 )
def call ( self, query, values) :
hidden_with_time_axis = tf. expand_dims( query, 1 )
print ( values. shape)
print ( hidden_with_time_axis. shape)
print ( self. W1( values) . shape)
print ( self. W2( hidden_with_time_axis) . shape)
temp = self. W1( values) + self. W2( hidden_with_time_axis)
print ( temp. shape)
print ( tf. nn. tanh( temp) . shape)
score = self. V( tf. nn. tanh( self. W1( values) + self. W2( hidden_with_time_axis) ) )
print ( score. shape)
attention_weights = tf. nn. softmax( score, axis= 1 )
print ( attention_weights. shape)
print ( values. shape)
context_vector = attention_weights * values
print ( context_vector. shape)
context_vector = tf. reduce_sum( context_vector, axis= 1 )
print ( context_vector. shape)
return context_vector, attention_weights
attention_layer = BahdanauAttention( 10 )
attention_result, attention_weights = attention_layer( sample_hidden, sample_output)
print ( "Attention result shape: (batch size, units) {}" . format ( attention_result. shape) )
print ( "Attention weights shape: (batch_size, sequence_length, 1) {}" . format ( attention_weights. shape) )
class Decoder ( tf. keras. Model) :
def __init__ ( self, vocab_size, embedding_dim, decoding_units, batch_size) :
super ( Decoder, self) . __init__( )
self. batch_size = batch_size
self. decoding_units = decoding_units
self. embedding = keras. layers. Embedding( vocab_size, embedding_dim)
self. gru = keras. layers. GRU( self. decoding_units,
return_sequences= True ,
return_state= True ,
recurrent_initializer= 'glorot_uniform' )
self. fc = keras. layers. Dense( vocab_size)
self. attention = BahdanauAttention( self. decoding_units)
def call ( self, x, hidden, encoding_output) :
context_vector, attention_weights = self. attention( hidden, encoding_output)
x = self. embedding( x)
x = tf. concat( [ tf. expand_dims( context_vector, 1 ) , x] , axis= - 1 )
output, state = self. gru( x)
output = tf. reshape( output, ( - 1 , output. shape[ 2 ] ) )
x = self. fc( output)
return x, state, attention_weights
decoder = Decoder( vocab_tar_size, embedding_dim, units, BATCH_SIZE)
sample_decoder_output, _, _ = decoder( tf. random. uniform( ( 64 , 1 ) ) ,
sample_hidden, sample_output)
print ( 'Decoder output shape: (batch_size, vocab size) {}' . format ( sample_decoder_output. shape) )
optimizer = keras. optimizers. Adam( )
loss_object = keras. losses. SparseCategoricalCrossentropy( from_logits= True , reduction= 'none' )
def loss_function ( real, pred) :
mask = tf. math. logical_not( tf. math. equal( real, 0 ) )
loss_ = loss_object( real, pred)
mask = tf. cast( mask, dtype= loss_. dtype)
loss_ *= mask
return tf. reduce_mean( loss_)
@tf. function
def train_step ( inp, targ, encoding_hidden) :
loss = 0
with tf. GradientTape( ) as tape:
encoding_output, encoding_hidden = encoder( inp, encoding_hidden)
decoding_hidden = encoding_hidden
decoding_input = tf. expand_dims( [ targ_lang. word_index[ '<start>' ] ] * BATCH_SIZE, 1 )
for t in range ( 1 , targ. shape[ 1 ] ) :
predictions, decoding_hidden, _ = decoder( decoding_input, decoding_hidden, encoding_output)
loss += loss_function( targ[ : , t] , predictions)
decoding_input = tf. expand_dims( targ[ : , t] , 1 )
batch_loss = ( loss / int ( targ. shape[ 1 ] ) )
variables = encoder. trainable_variables + decoder. trainable_variables
gradients = tape. gradient( loss, variables)
optimizer. apply_gradients( zip ( gradients, variables) )
return batch_loss
EPOCHS = 10
def train_model ( EPOCHS) :
for epoch in range ( EPOCHS) :
start = time. time( )
encoding_hidden = encoder. initialize_hidden_state( )
total_loss = 0
for ( batch, ( inp, targ) ) in enumerate ( dataset. take( steps_per_epoch) ) :
batch_loss = train_step( inp, targ, encoding_hidden)
total_loss += batch_loss
if batch % 100 == 0 :
print ( 'Epoch {} Batch {} Loss {:.4f}' . format ( epoch + 1 , batch, batch_loss. numpy( ) ) )
if ( epoch + 1 ) % 2 == 0 :
checkpoint. save( file_prefix = checkpoint_prefix)
print ( 'Epoch {} Loss {:.4f}' . format ( epoch + 1 , total_loss / steps_per_epoch) )
print ( 'Time taken for 1 epoch {} sec\n' . format ( time. time( ) - start) )
checkpoint_dir = './checkpoints'
checkpoint_prefix = os. path. join( checkpoint_dir, "ckpt" )
checkpoint = tf. train. Checkpoint( optimizer= optimizer,
encoder= encoder,
decoder= decoder)
if not os. path. exists( checkpoint_dir) :
os. mkdir( checkpoint_dir)
train_model( EPOCHS)
else :
checkpoint. restore( tf. train. latest_checkpoint( checkpoint_dir) )
def evaluate ( sentence) :
attention_plot = np. zeros( ( max_length_targ, max_length_inp) )
sentence = preprocess_sentence( sentence)
inputs = [ inp_lang. word_index[ i] for i in sentence. split( ' ' ) ]
inputs = keras. preprocessing. sequence. pad_sequences( [ inputs] , maxlen= max_length_inp, padding= 'post' )
inputs = tf. convert_to_tensor( inputs)
result = ''
hidden = [ tf. zeros( ( 1 , units) ) ]
encoding_out, encoding_hidden = encoder( inputs, hidden)
decoding_hidden = encoding_hidden
decoding_input = tf. expand_dims( [ targ_lang. word_index[ '<start>' ] ] , 0 )
for t in range ( max_length_targ) :
predictions, decoding_hidden, attention_weights = decoder(
decoding_input, decoding_hidden, encoding_out)
attention_weights = tf. reshape( attention_weights, ( - 1 , ) )
attention_plot[ t] = attention_weights. numpy( )
predicted_id = tf. argmax( predictions[ 0 ] ) . numpy( )
result += targ_lang. index_word[ predicted_id] + ' '
if targ_lang. index_word[ predicted_id] == '<end>' :
return result, sentence, attention_plot
decoding_input = tf. expand_dims( [ predicted_id] , 0 )
return result, sentence, attention_plot
def plot_attention ( attention, sentence, predicted_sentence) :
fig = plt. figure( figsize= ( 10 , 10 ) )
ax = fig. add_subplot( 1 , 1 , 1 )
ax. matshow( attention, cmap= 'viridis' )
fontdict = { 'fontsize' : 14 }
ax. set_xticklabels( [ '' ] + sentence, fontdict= fontdict, rotation= 90 )
ax. set_yticklabels( [ '' ] + predicted_sentence, fontdict= fontdict)
plt. show( )
def translate ( sentence) :
result, sentence, attention_plot = evaluate( sentence)
print ( 'Input: %s' % ( sentence) )
print ( 'Predicted translation: {}' . format ( result) )
attention_plot = attention_plot[ : len ( result. split( ' ' ) ) , : len ( sentence. split( ' ' ) ) ]
plot_attention( attention_plot, sentence. split( ' ' ) , result. split( ' ' ) )
translate( u'hace mucho frio aqui.' )
translate( u'esta es mi vida.' )
translate( u'trata de averiguarlo.' )