学生信息转换系统,doc读入,一键录入xlxs

'''
逐个读取学生信息,转换为格式表
'''
import os
import docx
import xlrd, xlwt
from xlutils.copy import copy
import re

#fileName = "一二班信息"
for d in os.listdir():
    if d.endswith(".doc") or d.endswith(".docx"):
        fileName = '.'.join(d.split(".")[:-1])
        print("发现文件》》》》", fileName, "》》》展开分析")

def newXls(file, sheet_name, value):
    '''
    create a new sheet namedby : sheet_name
    add head of Value
    '''
    idx = len(value)
    wb = xlwt.Workbook()
    sheet = wb.add_sheet(sheet_name)
    for i in range(idx):
        sheet.write(0,i,value[i])
    wb.save(file)
    print('create workbook and sheet successfully!')

def appendXls(file, value):
    '''
    create a new sheet namedby : sheet_name
    add head of Value
    '''
    idx = len(value)
    wb = xlrd.open_workbook(file)
    sheet = wb.sheet_names()
    rs = wb.sheet_by_name(sheet[0])
    nr = rs.nrows #已有行数
    ws = copy(wb) #转化为写表
    nws = ws.get_sheet(0)
    j = 0 #jump null grid
    for i in range(idx):
        if value[i].strip(): # null jump
           nws.write(nr, j, str(value[i])) # all convert to string
           j += 1
    ws.save(file)
    #print('append successfully!')

head = ["姓名","性别","出生年月日","学籍号","身份证号","父亲","联系方式","工作单位",
        "母亲","联系方式","工作单位","户籍地址","现住地址","所属街道","邮政编码"]
# 读取doc
file = docx.Document(fileName + ".docx")
para = file.paragraphs
Ctx = []
line = []
for p in para:
    txt = (p.text).strip()
    if txt != '' and txt != ' '  and len(txt) > 0:
       strA = re.split('[,,; ]', txt) #[",",",",";", " "]
       for s in strA:
           if s != '' and s != ' '  and len(s) > 0:
              line.append(s)
    else:
       Ctx.append(line)
       line = []
       #print("This is space line")
    #print(p.text)

# 处理:去掉关键字词、所有转化为string
word = ["姓名","性别","出生年月日","学籍号","身份证号","父亲","联系方式","工作单位",
        "母亲","联系方式","工作单位","户籍地址","现住地址","所属街道","邮政编码",
        ":", ":", ",",",“, ","邮编", "现住地", "电话", "性:","学生", "出生日期",
        "出生年月", "出生日", "出生", ]
for i in range(len(Ctx)):
    for j in range(len(Ctx[i])):
        xx = Ctx[i][j]
        for a in word:
            xx = xx.replace(a, '')
        Ctx[i][j] = xx

# 转为xls
newXls(fileName+".xlsx","class",head)
for line in Ctx:
    appendXls(fileName + ".xlsx", line)

print("Final Succeed!")
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值