最近在研究python生成word文档,此处利用python-docx模块,发现含有中文时报错如下:
ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters
修改为如下格式,即可规避该错误
p = document.add_paragraph(u"哈哈 ")
看到有同学在问添加的字符串变量怎么办,以下代码是当时写的自动生成关于pbootcms漏洞报告函数,title_1是网站名称为中文,最终会经过处理写入文档,主要方法是 weak_des = u" " + title + u";" + ip_info,大家可参考下
def pbootcms(url, vul_payload, title_1, ip_info, lujing, vulname):
wenjian=open("G:\\python\\info\\title_error.txt",'a+')
# 去掉标题中的乱码字符串
sub_str = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", title_1)
if str("403") in str(title_1) or str("没有") in title_1 or "Not" in title_1 or str("Nginx") in title_1 or str(
"503") in title_1 or str("500") in title_1 or str("抱歉") in str(title_1) or len(title_1) == 0:
title="wrong"
title1 = parse.urlparse(url).netloc
aa = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", title1)
title=aa+"_wrong"
else:
title = sub_str
print(title)
dayTime = datetime.datetime.now().strftime('%Y-%m-%d')
document = Document()
run = document.add_heading('', level=0).add_run(url + u"数据库文件下载 ")
run.font.name = u'宋体'
run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
run = document.add_heading('', level=1).add_run(u"一、漏洞描述 ")
run.font.name = u'宋体'
run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
aa="PbootCMS v2.0.7 默认数据库下载,默认的数据库路径是/data/pbootcms.db,且data目录下没有进行任何的判断,后台也没有提供修改数据库路径的功能,所以可直接下载。"
bb= str(aa).encode("utf-8")
paragraph = document.add_paragraph(aa )
# 设置首行缩进
paragraph.paragraph_format.first_line_indent = 406400
weak_des = u" " + title + u";" + ip_info
run = paragraph.add_run(weak_des)
# 设置行间距
paragraph.paragraph_format.line_spacing = Pt(25)
run.font.size = Pt(12)
run.font.name = u'宋体'
r = run._element
r.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
# run.font.size = 150000
run.font.bold = False
run = document.add_heading('', level=1).add_run(u"二、漏洞详情 ")
run.font.name = u'宋体'
run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
paragraph = document.add_paragraph()
vul_description="PbootCMS存在默认数据库文件下载漏洞"
run = paragraph.add_run(url + vul_description)
document.add_picture(lujing, width=Inches(6))
paragraph.paragraph_format.first_line_indent = 406400
paragraph.paragraph_format.line_spacing = Pt(25)
run.font.size = Pt(12)
run.font.name = u'宋体'
r = run._element
r.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
run = document.add_heading('', level=1).add_run(u"三、加固建议 ")
run.font.name = u'宋体'
run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
paragraph = document.add_paragraph()
vul_advice="建议进行系统升级"
run = paragraph.add_run(vul_advice)
paragraph.paragraph_format.first_line_indent = 406400
paragraph.paragraph_format.line_spacing = Pt(25)
run.font.size = Pt(12)
run.font.name = u'宋体'
r = run._element
r.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
hl = hashlib.md5()
hl.update(str(url).encode(encoding='utf-8'))
# name=name.replace(':','_')
pwd = "G:\cnvd\exp\cnvd_report\\" + dayTime + vulname
isExists = os.path.exists(pwd)
if not isExists:
os.makedirs(pwd)
name = pwd + '\\' + title + '.docx'
print(name)
document.save(name)
print(title + "cnvd报告生成成功!!!")
# print(title,ip_info)
# dic=['错误信息','PbootCMS','title']
# print(len(ip_info))
pbootcms(url="http://www.xxx.com",vul_payload='hello',title_1="科技有限公司服务热线",ip_info="192.178.1.1_北京市_北京市",vulname="注入",lujing="G:\\cnvd\\exp\\pic\\1bf134b0b8029460d939ab3f73c275dd.png")