csv文件拆分

# ecoding=utf-8
import os
import time


def mkSubFile(lines, head, srcName, sub):
    [des_filename, extname] = os.path.splitext(srcName)
    filename = des_filename + '_' + str(sub) + extname
    print('make file: %s' % filename)
    fout = open(filename, 'w')
    try:
        fout.writelines([head])
        fout.writelines(lines)
        return sub + 1
    finally:
        fout.close()


def splitByLineCount(filename, count):
    fin = open(filename, encoding="utf-8")
    try:
        head = fin.readline()
        buf = []
        sub = 1
        for line in fin:
            buf.append(line)
            if len(buf) == count:
                sub = mkSubFile(buf, head, filename, sub)
                buf = []
        if len(buf) != 0:
            sub = mkSubFile(buf, head, filename, sub)
    finally:
        fin.close()


if __name__ == '__main__':
    begin = time.time()
    splitByLineCount('lidar40_list.json.csv', 600)
    end = time.time()
    print('time is %d seconds ' % (end - begin))

正则匹配字符

# -*- coding: utf-8 -*-
import xlrd
import re
import json
data = xlrd.open_workbook("result.xlsx")
table = data.sheets()[0]
cn_pattern = re.compile("[^a-zA-Z]*")
en_pattern = re.compile("^[a-zA-Z]*\s{0,}[a-zA-Z]*")
result = {}
nrows = table.nrows
ncols = table.ncols
for i in range(nrows):
	for j in range(0, ncols-1):
		cn = table.cell(i,j).value
		en = table.cell(i,j+1).value
		if re.findall(cn_pattern,cn)[0]!="" and re.findall(en_pattern,en)[0]!="":
			result[cn] = en
print(result)
with open("result.json", "w", encoding="utf-8") as f:
            json.dump(result, f, indent=4, ensure_ascii=False)
	

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值