fizzbuzz游戏
3倍数fizz 5倍数buzz 15倍数fizzbuzz。其余数字输出本身
1
2
fizz
4
buzz
fizz
7
8
fizz
buzz
11
fizz
13
14
fizzbuzz
def fizz_buzz_encode(i):
if i %15 == 0 : return 3
elif i%5 == 0 : return 2
elif i %3 == 0: return 1
else: return 0
def fizz_buzz_decode(i,prediction):
return [str(i),"fizz","buzz","fizzbuzz"][prediction]
def helper(i):
print(fizz_buzz_decode(i,fizz_buzz_encode(i)))
for i in range(1,16):
helper(i)
神经网络自己学习这个游戏
import torch
import numpy as np
import torch.nn as nn
def fizz_buzz_encoder(i):
if i %15 == 0 : return 3
elif i%5 == 0 : return 2
elif i %3 == 0: return 1
else: return 0
NUM_DIGITS = 10
NUM_HIDDEN = 100
def binary_encode(i,num_digits):
return np.array([i>>d&1 for d in range(num_digits)][::-1]) #输入变为十位的二进制
#训练数据: 101以上的数据(10位二进制 -》 4 分类 )
trX = torch.Tensor([binary_encode(i,NUM_DIGITS) for i in range (101,2**NUM_DIGITS)])
trY = torch.LongTensor([fizz_buzz_encoder(i) for i in range(101,2**NUM_DIGITS)])
model = torch.nn.Sequential(
torch.nn.Linear(NUM_DIGITS,NUM_HIDDEN),
torch.nn.ReLU(),
torch.nn.Linear(NUM_HIDDEN,4), #四分类
)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = 0.01)
BATCH_SIZE = 50
for epoch in range(500):
for start in range(0,len(trX),BATCH_SIZE):
end = start+BATCH_SIZE
batchX = trX[start:end]
batchY = trY[start:end]
y_pre = model(batchX)
loss = loss_fn(y_pre,batchY)
print("EPOCH",epoch, loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
#测试数据 (101以下的数据)
testX = torch.Tensor([binary_encode(i,NUM_DIGITS) for i in range(1,101)])
with torch.no_grad():
testY = model(testX)
def fizz_buzz_decode(i,prediction):
return [str(i),"fizz","buzz","fizzbuzz"][prediction]
#testY : 100*4矩阵,选择四个维度取最大的那个, max(1): 第一个维度上最大的数字拿出来 [1]:哪个是这个最大的数字ie.argmax
predicts = zip(range(1,101),list(testY.max(1)[1]))
print([fizz_buzz_decode(i,x) for i,x in predicts])
词向量
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as tud
import pandas as pd
from collections import Counter
import numpy as np
import random
import math
import sklearn
import scipy
from sklearn.metrics.pairwise import cosine_similarity
#相同数组random到相同的值,()里面的数字是开始的位置
random.seed(1)
np.random.seed(1)
torch.manual_seed(1)
#设定hyper parameter
C=3 #context window只看周围三个单词
K= 100 #随机采样 negative samples
NUM_EPOCHS = 2
MAX_VOCAB_SIZE =30000 #词汇表
BATCH_SIZE = 128
LEARNING_RATE = 0.2
EMBEDDING_SIZE = 100
def word_tokenize(text):
return text.split()
with open("text.train.txt","r") as fin:
text = fin.read()
text = text.split()
vocab = dict( Counter(text).most_common(MAX_VOCAB_SIZE -1))
vocab["<uk>"] = len(text)-np.sum(list(vocab.values()))
#print(type( Counter(text)), Counter(text) ) : <class 'collections.Counter'>
#print(dict( Counter(text)))
idx_to_word = {word for word in vocab.keys()}
word_to_idx = {word:i for i, word in enumerate(idx_to_word)}
#print(word_to_idx)
word_counts = np.array([count for count in vocab.values()],dtype=np.float32)
word_freqs = word_counts/np.sum(word_counts)
word_freqs = word_freqs **(3./4.)
word_freqs = word_freqs/np.sum(word_freqs)
VOCAB_SIZE = len(idx_to_word)
#print(VOCAB_SIZE)
class WordEmbeddingModel(nn.Module):
def __init__():
def forward(self,cen,co_nb,wr_nb):
model = EnbeddingModel()
optimizer = torch.optim.SGD(model.parameters,lr = LEARNING_RATE)
for e in range (NUM_EPOCHS):
optimizer.zero_grad()
loss = model().mean()
loss.backward()
optimizer.step()