很久没有写博客了,看着自己以前写的,感慨万千吧,都是慢慢的一份回忆,以前是做JAVA的,现在也转成了产品经理,最近接到了一个数据整理的需求,好久没写代码了,用Python处理一下吧。
需求:将excel里面的案由,去各个法律法规文件里面检索关键字,把对应的法条取出来按照特定格式输出Excel文件,样式如图
代码如下,实现了功能,就没有在优化代码了。
import docx
import xlrd
import xlwt
import re
import os
countNum = '第(.*?)条?章?'
excelWork = xlwt.Workbook()
worokSheet = excelWork.add_sheet("cc",cell_overwrite_ok=True)
excelRowIndex = 1
aydir = "D:\行政法条库\AY.xlsx"
lawCatalogue = "D:\行政法条库\法律文书\\"
#获取法律文书文书名称,返回名称集合
def getLawName():
dir = "D:/行政法条库/法律文书"
return os.listdir(dir)
#写入excele标题栏
def setTitleExcel():
style = xlwt.easyxf('pattern: pattern solid, fore_colour ice_blue; font: bold on')
worokSheet.write(0,0,"案由",style)
worokSheet.write(0,1,"法律名称",style)
worokSheet.write(0,2,"法律条款",style)
worokSheet.write(0,3,"法律内容",style)
#设置标题栏字体颜色加粗
def set_color(color,bold):
style=xlwt.XFStyle()
font=xlwt.Font()
font.colour_index=color
font.bold = bold
style.font=font
alignment = xlwt.Alignment()
alignment.vert = 0x01
style.alignment = alignment
return style
def readLaw(ayContentPath,lawCatalogue):
#获取案由表
excel = xlrd.open_workbook(aydir).sheets()[0]
for rownum in range(0, excel.nrows):
row = excel.row_values(rownum)
ayKey = "".join(row)
lawcontent = ""
for lawContentIndex in ((getLawName())):
doc = docx.Document(lawCatalogue+lawContentIndex)
content = doc.paragraphs
for index in range(len(content)):#从法律条文内检索案由
ayOutputFlag = 1
if (ayKey in content[index].text):
if (not re.compile(countNum).findall(content[index].text)): #判断当前内容是包含第X条或章,如果不包含,需要把上一段落的内容也拿出来
previous = index - 1
flag = previous
while (previous != 0):
if (re.compile(countNum).findall(content[previous].text[0:10])): #判断当前段落里的前8个字是否包含第X条,以防段落内出现第X条,结束查找了
print("案由:" + ayKey)
lawcontent+="\n"
ayOutputFlag = 0
while (previous <= flag):
print(content[previous].text)
lawcontent += content[previous].text
lawcontent += "\n"
previous = previous + 1
break
previous = previous - 1
if (ayOutputFlag == 1):
print("案由:" + ayKey)
print(content[index].text)
lawcontent += content[index].text
lawcontent += "\n"
nextNum = index + 1
while (nextNum < len(content)):
if (not (re.compile(countNum).findall(content[nextNum].text))): #判断下一段落是否包含第X条或章,如果不包含,继续取值
print(content[nextNum].text)
lawcontent=lawcontent+content[nextNum].text
lawcontent += "\n"
nextNum = nextNum + 1
else:
break
global excelRowIndex
if(re.compile("第.*条").findall(lawcontent)):
worokSheet.write(excelRowIndex,0,ayKey,set_color(0x00,False))
worokSheet.write(excelRowIndex,1,"《"+lawContentIndex.rstrip(".docx")+"》",set_color(0x00,False))
lawCount = re.search("第.*条",lawcontent[0:18]).group(0)
worokSheet.write(excelRowIndex,2,lawCount,set_color(0x00,False))
worokSheet.write(excelRowIndex,3,lawcontent.strip().lstrip(lawCount),set_color(0x00,False))
lawcontent = ""
worokSheet.col(0).width = 256*30
worokSheet.col(1).width = 256 * 40
worokSheet.col(2).width = 256 * 30
worokSheet.col(3).width = 256 * 100
excelRowIndex = excelRowIndex+1
else:
lawcontent = ""
setTitleExcel()
readLaw(aydir,lawCatalogue)
excelWork.save("result1.xls")
文件路径:
需要替换的,直接把文件路径替换了就好
好了,接下来把这个习惯捡起来,不了解开发的需求分析不是好的产品经理,嘿嘿。