吴恩达 深度学习 第5课 week1 Dinosaurus Island -- Character level language model final - v1

def clip(gradients,maxValue):
    dWaa, dWax, dWya, db, dby = gradients['dWaa'], gradients['dWax'], gradients['dWya'], gradients['db'], gradients['dby']
    ### START CODE HERE ###
    # clip to mitigate exploding gradients, loop over [dWax, dWaa, dWya, db, dby]. (≈2 lines)
    for gradient in [dWax, dWaa, dWya, db, dby]:
        np.clip(gradient,-maxValue,maxValue,out=gradient)
    ### END CODE HERE ###
    gradients = {"dWaa": dWaa, "dWax": dWax, "dWya": dWya, "db": db, "dby": dby}
    return gradients
def sample(parameters,char_to_ix,seed):
    # Retrieve parameters and relevant shapes from "parameters" dictionary
    Waa, Wax, Wya, by, b = parameters['Waa'], parameters['Wax'], parameters['Wya'], parameters['by'], parameters['b']
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    ### START CODE HERE ###
    # Step 1: Create the one-hot vector x for the first character (initializing the sequence generation). (≈1 line)
    x = np.zeros((vocab_size,1))
    # Step 1': Initialize a_prev as zeros (≈1 line)
    a_prev = np.zeros((n_a,1))
    # Create an empty list of indices, this is the list which will contain the list of indices of the characters to generate (≈1 line)
    indices = []
    # Idx is a flag to detect a newline character, we initialize it to -1
    idx = -1
    # Loop over time-steps t. At each time-step, sample a character from a probability distribution and append
    # its index to "indices". We'll stop if we reach 50 characters (which should be very unlikely with a well
    # trained model), which helps debugging and prevents entering an infinite loop.
    counter = 0
    newline_character = char_to_ix['\n']
    while (idx != newline_character and counter != 50):
        # Step 2: Forward propagate x using the equations (1), (2) and (3)
        a = np.tanh(np.dot(Wax,x)+np.dot(Waa,a_prev)+b)
        z = np.dot(Wya,a)+by
        y = softmax(z)
        # for grading purposes
        np.random.seed(counter + seed)
        # Step 3: Sample the index of a character within the vocabulary from the probability distribution y
        idx = np.random.choice(range(len(y)),p=y.ravel())
        # Append the index to "indices"
        indices.append(idx)
        # Step 4: Overwrite the input character as the one corresponding to the sampled index.
        x = np.zeros((vocab_size,1))
        x[idx] =1
        # Update "a_prev" to be "a"
        a_prev = a
        # for grading purposes
        seed += 1
        counter += 1
    ### END CODE HERE ###
    if (counter == 50):
        indices.append(char_to_ix['\n'])
    return indices
def optimize(X, Y, a_prev, parameters, learning_rate = 0.01):
    # Forward propagate through time (≈1 line)
    loss, cache = rnn_forward(X,Y,a_prev,parameters)
    # Backpropagate through time (≈1 line)
    gradients, a = rnn_backward(X,Y,parameters,cache)
    # Clip your gradients between -5 (min) and 5 (max) (≈1 line)
    gradients = clip(gradients,5)
    # Update parameters (≈1 line)
    parameters = update_parameters(parameters,gradients,learning_rate)
    ### END CODE HERE ###
    return loss, gradients, a[len(X) - 1]
def model(data, ix_to_char, char_to_ix, num_iterations = 35000, n_a = 50, dino_names = 7, vocab_size = 27):
    # Retrieve n_x and n_y from vocab_size
    n_x, n_y = vocab_size, vocab_size
    # Initialize parameters
    parameters = initialize_parameters(n_a, n_x, n_y)
    # Initialize loss (this is required because we want to smooth our loss, don't worry about it)
    loss = get_initial_loss(vocab_size, dino_names)
    # Build list of all dinosaur names (training examples).
    with open("dinos.txt") as f:
        examples = f.readlines()
    examples = [x.lower().strip() for x in examples]
    # Shuffle list of all dinosaur names
    shuffle(examples)
    # Initialize the hidden state of your LSTM
    a_prev = np.zeros((n_a, 1))
    # Optimization loop
    for j in range(num_iterations):
        ### START CODE HERE ###
        # Use the hint above to define one training example (X,Y) (≈ 2 lines)
        index = j%len(examples)
        X = [None]+[char_to_ix[ch]for ch in examples[index]]
        Y = X[1:]+[char_to_ix["\n"]]
        # Perform one optimization step: Forward-prop -> Backward-prop -> Clip -> Update parameters
        # Choose a learning rate of 0.01
        curr_loss, gradients, a_prev = optimize(X,Y,a_prev,parameters)
        ### END CODE HERE ###
        # Use a latency trick to keep the loss smooth. It happens here to accelerate the training.
        loss = smooth(loss, curr_loss)
        # Every 2000 Iteration, generate "n" characters thanks to sample() to check if the model is learning properly
        if j % 2000 == 0:
            print('Iteration: %d, Loss: %f' % (j, loss) + '\n')
            # The number of dinosaur names to print
            seed = 0
            for name in range(dino_names):
                # Sample indices and print them
                sampled_indices = sample(parameters, char_to_ix, seed)
                print_sample(sampled_indices, ix_to_char)
                seed += 1  # To get the same result for grading purposed, increment the seed by one.
            print('\n')
    return parameters

result:

Here is your poem: 

Forsooth this maketh no sense  hin the wire,
i ende that hy hary cume canctore,
sure mont of which relaven to be besing,
and eye with hist horoure you doth thy sided's frved,
have casy thou wirt wateath of this conness,
dide yours denerted beaution, to the my and ell fites,
thou taken both worse inensefted bitpy yous,,
 
sray i ipenthanders shill to pizino breased.
and care thou hist' owr that ever so nas,
by suce your chnect w

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值