set()操作
a = set(‘boy’)
a
set([‘y’ , ‘b’ , ‘o’])
counter()用法
import collections
print collections.Counter([‘a’,’b’,’c’,’a’,’b’,’b’])
Counter({‘b’:3 , ‘a’:2 , ‘c’:1})
item()遍历字典的方法
person = {'name': 'lizhong' }
for key,value in person.items():
print ('key', = key , 'value' = value)
得出 key = name , value = lizhong
random.random()方法
从0~1.0中取一个随机浮点数
random.randint(a,b)
从a,b中随机取一个数
list()方法
list(a)将元祖改成列表
tf.random_uniform的使用
tf.random_uniform((4,4),minval = low ,max = high,dtype = tf.float32)返回4*4矩阵,产生于low 和high之间,产生的值均匀分布。
tf.truncated_normal(shape , stddev , mean)这个函数产生正态分布,均值和标准差由自己决定,如果产生的正态分布的值与均值的差值大于两倍的标准差,那就重新生成。
实现代码:
from collections import Counter
import random
word_counts = Counter(int_words)
total_count = len(int_words)
freqs = {word:count/total_count for word,count in words_counts.items()}
p_drop = {word:1-np.sqrt(threshold/freqs[word]) for word in word_counts}
train_words = [word for word in int_words if random.random() < (1-p_drop[word])]
def get_target(words ,idx , windows_size = 5)
R = np.random.randint(1, window_size+1)
start = idx - R if (idx-R) > 0 else 0
stop = idx + R
target_words = set(words[start:idx] + words[idx+1:stop+1])
return list(target_words)
def get_batches(words , batch_size , window_size = 5)
n_batches = len(words)//batch_size
words = words[:n_batches*batch_size]
for idx in range(0 , len(words) , batch_size):
x,y = [],[]
batch = words[idx:idx+batch_size]
for ii in range(len(batch))
batch_x = batch[ii]
batch_y = get_target(batch , ii ,window_size)
y.extend(batch_y)
x.extend([batch_x] * len(batch_y))
yield x, y