python 常用方法

一、
row为list对象
row = "".join(row) #将一个列表里面的元素拼接成一个字符串

for row in csvreader:
    print("\r", "process:{}/{}".format(ids,all_sentences_num), end="", flush=True)
    ids+=1
打印结果格式:process:186857/186857


二、
from collections import Counter
captions = read_input_csv1(excel_path) #captions是列表
counter = Counter(captions) #对captions列表进行统计个数,得到counter字典
#对counter字典进行按value值得大小排序,reverse=True降序
sorted(counter.items(), key = lambda kv:(kv[1], kv[0]), reverse=True)  

三、
from openpyxl import load_workbook
def read_input_xlsx1(filename):
    workbook = load_workbook(filename) #找到需要xlsx文件的位置
    booksheet = workbook.active #获取当前活跃的sheet,默认是第一个sheet
    #如果想获取别的sheet页采取下面这种方式,先获取所有sheet页名,在通过指定那一页。
    # sheets = workbook.get_sheet_names("表单名字")  # 从名称获取sheet
    # booksheet = workbook.get_sheet_by_name(sheets[0])
    #获取sheet页的行数据
    rows = booksheet.rows
    #获取sheet页的列数据
    columns = booksheet.columns
    col_0 = []#获取第0列全部内容
    for s in rows:
        s = s[0].value
        # if s != '':
        try:
            s1 = s.split()
            col_0.append(s1)
        except:
            continue
    return col_0[10:]


#输入文件为csv时读取方式
def read_input_csv(filename):
    i = 0
    # filename = "./dataset/dataTime2.csv"
    id_cn_sentences = defaultdict(str)
    id_bd_sentences = defaultdict(str)
    id = []
    cn = []
    baidu = []
    # with open(filename, 'r') as file:
    # reader = csv.DictReader(filename)
    with open(filename, 'rt', encoding='gbk') as file:
        reader = csv.reader(file)
        for row in reader:
            if i == 0:
                i = i+1
                continue
            i = i+1
            id.append(row[0])
            row1 = row[2].split() #去除空格,返回以空格为切分点的列表
            row1 = "".join(row1) #将一个列表里面的元素拼接成一个字符串
            cn.append(row1)
            baidu.append(row[4])
            # print('row[0]',row[4])
            # if i > 100:break
    for id_, cn_sentence, baidu_sentence in zip(id, cn, baidu):
        id_cn_sentences[id_] = cn_sentence
        id_bd_sentences[id_] = baidu_sentence
    return id_cn_sentences, id_bd_sentences

#初始化输出的csv文件
def init_csv_file(csv_file):
    if os.path.exists(csv_file):
        os.remove(csv_file)
    csv_file = open(csv_file, 'a', newline='', encoding='utf-8-sig') #newline=''防止中间隔空行
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["id", "englishText", "chineseText", "sent_score"])
    return csv_writer, csv_file
coherent,f1 = init_csv_file(save_coherent)
coherent.writerow([id_, en_sentences[id_], zh_sentences[id_], score])
f1.close

#json转csv,json格式为
{"id": 278, "text": "啊...这些是我一生的果实", "score": 0.30681753139473444}
{"id": 379, "text": "我想去这里", "score": 0.3820151642892597}
{"id": 403, "text": "我想知道它说什么", "score": 0.3295139660149289}
{"id": 450, "text": "非常好的联赛徽标", "score": 0.307017321127546}
# coding:utf-8
import json
import csv
with open('111.csv','w',newline = '',encoding='utf-8-sig') as csv_file:
	csv_writer = csv.writer(csv_file)
	csv_writer.writerow(["caption_id","chinese_text","modification_text"])
	with open('111.json', 'r', encoding="utf-8-sig") as jsonfile:
		for line in jsonfile.readlines():
			line_dict = {}
			line_dict = json.loads(line.strip('\n'))
			text = str(line_dict['text'])
			text_id = int(line_dict['id'])
			csv_writer.writerow([text_id, text])

四、
defaultdict接受一个工厂函数作为参数,如下来构造:
dict =defaultdict( factory_function)
这个factory_function可以是list、set、str等等,作用是当key不存在时,返回的是工厂函数的默认值,比如list对应[ ],str对应的是空字符串,set对应set( ),int对应0,如下举例:

from collections import defaultdict

dict1 = defaultdict(int)
dict2 = defaultdict(set)
dict3 = defaultdict(str)
dict4 = defaultdict(list)
dict1[2] ='two'

print(dict1[3])
print(dict2[1])
print(dict3[1])
print(dict4[1])

结果:
0
set()

[]

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值