python exec, eval 方法整理

#coding:utf-8
import os
import time
import codecs

def get_lenght_single_word():
    '''筛选出文件中不同长度的汉字'''
    filename = r'E:\SVN\linguistic_model\9_keys\0709modify\wordlist_61633_weight.txt'
    with codecs.open(filename, encoding='utf-8') as f:
        for i in range(1, 10):
            exec 'lenght_%s = 0'%i
        for line in f.readlines():
            splited_line = line.split('\t')
            ngram_item = splited_line[0]
            lenght_of_item = len(ngram_item)
            for i in range(1, 10):
                exec 'if lenght_of_item == %d:lenght_%s+=1'%(i,i)
        for i in range(1, 10):
            exec 'print "lenght_%s is: {}".format(lenght_%s)'%(i,i)
# get_lenght_single_word()
   def combine_bigram_freq(self, uncombine_file_pattern, combine_freq_filename='combine_word_freq.txt'):
        '''将n个排序后文件的N元模型进行词频叠加'''
        combine_bigram_freq_filename = os.path.join(self.src_data_file_path, combine_freq_filename)
        com_fileObj = codecs.open(combine_bigram_freq_filename, mode='a', encoding='utf-8')
        for file_count in range(1, self.TOTAL_FILE_COUNT+1):
            #28个bigram_filename[1, 28]
            exec "bigram_filename%(bigram_filename_count)s = os.path.join(self.src_data_file_path, '%(bigram_inorder)s_{}'.format(uncombine_file_pattern))"%{'bigram_filename_count':file_count, 'bigram_inorder':file_count} in globals(), locals()
            #28个fileobj[1, 28]
            exec "fileObj%(fileObj_count)s = codecs.open(bigram_filename%(bigram_filename_count)s, encoding='utf-8')"%({'fileObj_count':file_count,'bigram_filename_count':file_count}) in globals(), locals()
        bigram_param_list = []
        for fileObj_index in range(1, self.TOTAL_FILE_COUNT+1):
            bigram_param_list.append((fileObj_index, eval('next(fileObj%s)'%fileObj_index)))
            #以fileObj的index为key,以bigram_param freq 为value生成字典
        bigram_dic = dict(bigram_param_list)
        file_count = 0
        while 1:
            #按照bigram_param进行排序,返回key(index)值组成的List
            sorted_bigram_dic_keys_list = sorted(bigram_dic.iterkeys(), key=lambda x:bigram_dic[x].split('\t')[0])
            # print sum([int(item.encode('utf-8').split('\t')[1]) for item in bigram_dic.itervalues()])
            #排序后字典内第一个元素,查找与该元素相等的元素
            if len(sorted_bigram_dic_keys_list) == 0:
                break
            first_index = sorted_bigram_dic_keys_list[0]
            first_item_in_bigram_dic_splited =  bigram_dic[first_index].split('\t')
            first_bigram_param = first_item_in_bigram_dic_splited[0]
            freq_int = int(first_item_in_bigram_dic_splited[1])
            bigram_dic.pop(first_index)
            try:
                bigram_dic[first_index] = eval('next(fileObj%s)'%first_index)
            except:
                file_count += 1
                print file_count
                if file_count == self.TOTAL_FILE_COUNT:
                    break
            for sorted_index in sorted_bigram_dic_keys_list[1:]:
                if first_bigram_param == bigram_dic[sorted_index].split('\t')[0]:
                    freq_int += int(bigram_dic[sorted_index].split('\t')[1])
                    bigram_dic.pop(sorted_index)
                    try:
                        bigram_dic[sorted_index] = eval('next(fileObj%s)'%sorted_index)
                    except:
                        file_count += 1
                        print file_count
                        if file_count == self.TOTAL_FILE_COUNT:
                            break
            com_str = '\t'.join((first_bigram_param, str(freq_int)))
            com_fileObj.write(com_str+'\n')


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值