hangman游戏

最新推荐文章于 2022-11-15 11:11:27 发布

王天平

最新推荐文章于 2022-11-15 11:11:27 发布

阅读量3.5k

点赞数 3

本文链接：https://blog.csdn.net/weixin_42327556/article/details/103285869

版权

挑战是代码刽子手的算法来玩这个游戏。你的算法应该比baseline算法,我们提供给你和理想有超过50%的准确率。用这个电子邮件,我们已经附加训练字典文件以及Jupyter笔记本模板显示了如何代码,执行和提交你的算法

游戏的规则，给定单词长度，让你猜单词，猜对的字母可以保留，猜错的字母将用掉你一条命，每个单词你有六条命。

模型背景，已知游戏规则，还有一个单词字典作为训练集。

这个游戏有两种思路，一个是比较吃GPU硬件设施，也就是用RNN的方法来训练字典内的信息做nlp预测，另外一个基于游戏规则从训练的字典中获得一些统计信息，基于这些先验信息来预测字典外的单词。由于硬件问题，我用的是后者，采取的是，基于单词长度以及猜对单词后，字母对应的位置，未猜对的字母对应位置等等作为我的特征进行训练。

// An highlighted block

 #my code
def update_counter(lm, data, order):
    for i in range(len(data)-order):
        history, char = data[i:i+order], data[i+order]
        if history.find(' ') == -1 and char != ' ':
            lm[history][char]+=1
    return lm

def train_char_lm(dic, order=3):
    lm = defaultdict(Counter)
    data = dic
    # calculate the probability of letter appearnce in a specified gram (length decided by order)
    for i in range(len(data)-order):
        gram = data[i:i+order]
        histories = []
        chars = []
        if gram.find(' ') == -1 and gram.find('_') == -1:
            replace_space = order - 1
            while(replace_space>0):
                gram_list = list(gram)
                comb = combinations(range(len(gram)), replace_space)
                for i in list(comb):
                    keeped_letter = [x if idx not in list(i) else '_' for idx, x in enumerate(gram_list)]
                    replaced_letter = [x for idx, x in enumerate(gram_list) if idx in list(i)]
                    histories += ["".join(keeped_letter)]
                    chars += [Counter("".join(replaced_letter)).most_common()[0][0]]               
                replace_space -= 1
        for history, char in zip(histories, chars):
            if history.find(' ') == -1 and char != ' ':
                lm[history][char]+=1
    
    def normalize(counter):
        s = float(sum(counter.values()))
        return [(c,cnt/s) for c,cnt in counter.items()]
    outlm = {hist:normalize(chars) for hist, chars in lm.items()}
    return outlm
def check_guessed(sorted_letter_count, guessed_letters):
    flag = False
    for letter, instance_prob in sorted_letter_count:
        if letter not in guessed_letters and letter is not '_':
            flag = True
            break
    if flag:
        return (letter, instance_prob)
    else:
        return

def update_candidate(candidate_list, curr_guess):
    if curr_guess is not None:
        if curr_guess[0] in candidate_list:
            candidate_list[curr_guess[0]] += curr_guess[1]
        else:
            candidate_list[curr_guess[0]] = curr_guess[1]
    return candidate_list

def generate_letter(full_dict, guess, lm, d, guessed_letters, order = 3):
    candidate_list = {} 
    print (guess)
     ## the first time guess is based on probablity of all words with the length of target word
    if len(set(guess))== 1 and guess[0] == '_':
        curr_guess = check_guessed(collections.Counter("".join(d[len(guess)])).most_common(), guessed_letters)
        candidate_list = update_candidate(candidate_list, curr_guess)
    
    ## get the gram of current word and get the probablity of letters from language model
    for i in range(len(guess)):
        stem = guess[i:i+order]
        if stem in lm:
            curr_guess = check_guessed(sorted(lm[stem], key=lambda item:item[1], reverse=True), guessed_letters)
            candidate_list = update_candidate(candidate_list, curr_guess)
            
    ## if the gram is not in the language model, then use the default probability of all words with the length of target word
    if (len(candidate_list) == 0):
        curr_guess = check_guessed(collections.Counter("".join(d[len(guess)])).most_common(), guessed_letters)
        candidate_list = update_candidate(candidate_list, curr_guess)
    
    letter = max(candidate_list, key=lambda k: candidate_list[k])
    return letter

def play(answer, lm,  d, order, nTrials=6):
    guess = "_ " * int(len(answer)/2)
    guess_clean = guess[::2].replace(" ", "")
    full_dict = full_dictionary
    guessed_letters = []
    errors = 0
    count = 0
    flag = False
    while(errors < nTrials):
        c = generate_letter(full_dict, guess_clean, lm, d, guessed_letters, order)
        guessed_letters += [c]
        if answer.find(c)!=-1:
            idx = [pos for pos, char in enumerate(answer) if char == c]
            for j in idx:
                guess = '%s%s%s'%(guess[:j],c,guess[j+1:])
        else:
            errors += 1
        print ("-------------")   
        print (count, errors, c, ':  ', guess_clean)
        print ("-------------")
        guess_clean = guess[::2]
        if guess_clean.find('_') == -1:
            flag = True
            break
        count += 1
    return guess, flag
def build_dictionary(dictionary_file_location):
    text_file = open(dictionary_file_location,"r")
    full_dictionary = text_file.read().splitlines()
    text_file.close()
    return full_dictionary

full_dictionary_location = "words_250000_train.txt"
full_dictionary = build_dictionary(full_dictionary_location)
#95% train and 5% test
full_dic, answers = train_test_split(full_dictionary, test_size = 0.05)
lm = train_char_lm(" ".join(full_dic), 5)
d=defaultdict(list)
for word in full_dic:
    d[len(word)].append(word)
N = len(answers)
success = 0
for answer in answers:
    answer = " ".join(answer) + " "
    res, flag = play(answer, lm, d, order = 5)
    if flag:
        success += 1
        print ("success!, the answer is " + res)
    else:
        print("failed!, the answer is " + answer + " the guess is " + res)
acc = success/(N*1.0)*100
print ("success rate is %0.2f%%"%acc)
# here ahout is 51%

通过这个方法，大概可以提高到50%左右，正确率，在6次live的情况下，以上是我在做trex机试的时候上机题，有空再写详细点吧